Compare commits

..

2 Commits

Author SHA1 Message Date
fur%netscape.com
1c43d4984f This is a copy of regalloc_code2_BRANCH from Netscape's private repository,
as it existed in January of 1998.


git-svn-id: svn://10.0.0.236/branches/regalloc_code2_BRANCH@22571 18797224-902f-48f8-a5cc-f745e15eee43
1999-03-02 16:12:08 +00:00
(no author)
cfe021ff88 This commit was manufactured by cvs2svn to create branch
'regalloc_code2_BRANCH'.

git-svn-id: svn://10.0.0.236/branches/regalloc_code2_BRANCH@22567 18797224-902f-48f8-a5cc-f745e15eee43
1999-03-02 15:57:58 +00:00
158 changed files with 5324 additions and 46486 deletions

View File

@@ -0,0 +1,134 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "Fundamentals.h"
#include "BitSet.h"
// Return the next bit after index set to true or -1 if none.
//
Int32 BitSet::nextOne(Int32 pos) const
{
++pos;
if (pos < 0 || Uint32(pos) >= universeSize)
return -1;
Uint32 offset = getWordOffset(pos);
Uint8 index = getBitOffset(pos);
Word* ptr = &word[offset];
Word currentWord = *ptr++ >> index;
if (currentWord != Word(0)) {
while ((currentWord & Word(1)) == 0) {
++index;
currentWord >>= 1;
}
return (offset << nBitsInWordLog2) + index;
}
Word* limit = &word[getSizeInWords(universeSize)];
while (ptr < limit) {
++offset;
currentWord = *ptr++;
if (currentWord != Word(0)) {
index = 0;
while ((currentWord & Word(1)) == 0) {
++index;
currentWord >>= 1;
}
return (offset << nBitsInWordLog2) + index;
}
}
return -1;
}
// Return the next bit after index set to false or -1 if none.
//
Int32 BitSet::nextZero(Int32 pos) const
{
++pos;
if (pos < 0 || Uint32(pos) >= universeSize)
return -1;
Uint32 offset = getWordOffset(pos);
Uint8 index = getBitOffset(pos);
Word* ptr = &word[offset];
Word currentWord = *ptr++ >> index;
if (currentWord != Word(~0)) {
for (; index < nBitsInWord; ++index) {
if ((currentWord & Word(1)) == 0) {
Int32 ret = (offset << nBitsInWordLog2) + index;
return (Uint32(ret) < universeSize) ? ret : -1;
}
currentWord >>= 1;
}
}
Word* limit = &word[getSizeInWords(universeSize)];
while (ptr < limit) {
++offset;
currentWord = *ptr++;
if (currentWord != Word(~0)) {
for (index = 0; index < nBitsInWord; ++index) {
if ((currentWord & Word(1)) == 0) {
Int32 ret = (offset << nBitsInWordLog2) + index;
return (Uint32(ret) < universeSize) ? ret : -1;
}
currentWord >>= 1;
}
}
}
return -1;
}
#ifdef DEBUG_LOG
// Print the set.
//
void BitSet::printPretty(LogModuleObject log)
{
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("[ "));
for (Int32 i = firstOne(); i != -1; i = nextOne(i)) {
Int32 currentBit = i;
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("%d", currentBit));
Int32 nextBit = nextOne(currentBit);
if (nextBit != currentBit + 1) {
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (" "));
continue;
}
while ((nextBit != -1) && (nextBit == (currentBit + 1))) {
currentBit = nextBit;
nextBit = nextOne(nextBit);
}
if (currentBit > (i+1))
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("-%d ", currentBit));
else
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (" %d ", currentBit));
i = currentBit;
}
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("]\n"));
}
#endif // DEBUG_LOG

View File

@@ -0,0 +1,195 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _BITSET_H_
#define _BITSET_H_
#include "Fundamentals.h"
#include "LogModule.h"
#include "Pool.h"
#include <string.h>
//------------------------------------------------------------------------------
// BitSet -
class BitSet
{
private:
#if (PR_BITS_PER_WORD == 64)
typedef Uint64 Word;
#elif (PR_BITS_PER_WORD == 32)
typedef Uint32 Word;
#endif
static const nBitsInWord = PR_BITS_PER_WORD;
static const nBytesInWord = PR_BYTES_PER_WORD;
static const nBitsInWordLog2 = PR_BITS_PER_WORD_LOG2;
static const nBytesInWordLog2 = PR_BYTES_PER_WORD_LOG2;
// Return the number of Word need to store the universe.
static Uint32 getSizeInWords(Uint32 sizeOfUniverse) {return (sizeOfUniverse + (nBitsInWord - 1)) >> nBitsInWordLog2;}
// Return the given element offset in its containing Word.
static Uint32 getBitOffset(Uint32 element) {return element & (nBitsInWord - 1);}
// Return the Word offset for the given element int the universe.
static Uint32 getWordOffset(Uint32 element) {return element >> nBitsInWordLog2;}
// Return the mask for the given bit index.
static Word getMask(Uint8 index) {return Word(1) << index;}
private:
Uint32 universeSize; // Size of the universe
Word* word; // universe memory.
private:
// No copy constructor.
BitSet(const BitSet&);
// Check if the given set's universe is of the same size than this universe.
void checkUniverseCompatibility(const BitSet& set) const {assert(set.universeSize == universeSize);}
// Check if pos is valid for this set's universe.
void checkMember(Int32 pos) const {assert(pos >=0 && Uint32(pos) < universeSize);}
public:
// Create a bitset of universeSize bits.
BitSet(Pool& pool, Uint32 universeSize) : universeSize(universeSize) {word = new(pool) Word[getSizeInWords(universeSize)]; clear();}
// Return the size of this bitset.
Uint32 getSize() const {return universeSize;}
// Clear the bitset.
void clear() {memset(word, 0x00, getSizeInWords(universeSize) << nBytesInWordLog2);}
// Clear the bit at index.
void clear(Uint32 index) {checkMember(index); word[getWordOffset(index)] &= ~getMask(index);}
// Set the bitset.
void set() {memset(word, 0xFF, getSizeInWords(universeSize) << nBytesInWordLog2);}
// Set the bit at index.
void set(Uint32 index) {checkMember(index); word[getWordOffset(index)] |= getMask(index);}
// Return true if the bit at index is set.
bool test(Uint32 index) const {checkMember(index); return (word[getWordOffset(index)] & getMask(index)) != 0;}
// Union with the given bitset.
inline void or(const BitSet& set);
// Intersection with the given bitset.
inline void and(const BitSet& set);
// Difference with the given bitset.
inline void difference(const BitSet& set);
// Copy set.
inline BitSet& operator = (const BitSet& set);
// Return true if the bitset are identical.
friend bool operator == (const BitSet& set1, const BitSet& set2);
// Return true if the bitset are different.
friend bool operator != (const BitSet& set1, const BitSet& set2);
// Logical operators.
BitSet& operator |= (const BitSet& set) {or(set); return *this;}
BitSet& operator &= (const BitSet& set) {and(set); return *this;}
BitSet& operator -= (const BitSet& set) {difference(set); return *this;}
// Return the first bit at set to true or -1 if none.
Int32 firstOne() const {return nextOne(-1);}
// Return the next bit after index set to true or -1 if none.
Int32 nextOne(Int32 pos) const;
// Return the first bit at set to false or -1 if none.
Int32 firstZero() const {return nextZero(-1);}
// Return the next bit after index set to false or -1 if none.
Int32 nextZero(Int32 pos) const;
// Iterator to conform with the set API.
typedef Int32 iterator;
// Return true if the walk is ordered.
static bool isOrdered() {return true;}
// Return the iterator for the first element of this set.
iterator begin() const {return firstOne();}
// Return the next iterator.
iterator advance(iterator pos) const {return nextOne(pos);}
// Return true if the iterator is at the end of the set.
bool done(iterator pos) const {return pos == -1;}
// Return the element corresponding to the given iterator.
Uint32 get(iterator pos) const {return pos;}
#ifdef DEBUG_LOG
// Print the set.
void printPretty(LogModuleObject log);
#endif // DEBUG_LOG
};
// Union with the given bitset.
//
inline void BitSet::or(const BitSet& set)
{
checkUniverseCompatibility(set);
Word* src = set.word;
Word* dst = word;
Word* limit = &src[getSizeInWords(universeSize)];
while (src < limit)
*dst++ |= *src++;
}
// Intersection with the given bitset.
//
inline void BitSet::and(const BitSet& set)
{
checkUniverseCompatibility(set);
Word* src = set.word;
Word* dst = word;
Word* limit = &src[getSizeInWords(universeSize)];
while (src < limit)
*dst++ &= *src++;
}
// Difference with the given bitset.
//
inline void BitSet::difference(const BitSet& set)
{
checkUniverseCompatibility(set);
Word* src = set.word;
Word* dst = word;
Word* limit = &src[getSizeInWords(universeSize)];
while (src < limit)
*dst++ &= ~*src++;
}
// Copy the given set into this set.
//
inline BitSet& BitSet::operator = (const BitSet& set)
{
checkUniverseCompatibility(set);
if (this != &set)
memcpy(word, set.word, getSizeInWords(universeSize) << nBytesInWordLog2);
return *this;
}
// Return true if the given set is identical to this set.
inline bool operator == (const BitSet& set1, const BitSet& set2)
{
set1.checkUniverseCompatibility(set2);
if (&set1 == &set2)
return true;
return memcmp(set1.word, set2.word, BitSet::getSizeInWords(set1.universeSize) << BitSet::nBytesInWordLog2) == 0;
}
inline bool operator != (const BitSet& set1, const BitSet& set2) {return !(set1 == set2);}
#endif // _BITSET_H

View File

@@ -0,0 +1,159 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _COALESCING_H_
#define _COALESCING_H_
#include "Fundamentals.h"
#include "Pool.h"
#include "RegisterPressure.h"
#include "InterferenceGraph.h"
#include "ControlGraph.h"
#include "ControlNodes.h"
#include "Instruction.h"
#include "SparseSet.h"
#include "RegisterAllocator.h"
#include "RegisterAllocatorTools.h"
#if 1
// Performing an ultra conservative coalescing meens that when we look at
// candidates (source,destination) for coalescing we need to make sure
// that the combined interference of the source and destination register
// will not exceed the total number of register available for the register
// class.
#define ULTRA_CONSERVATIVE_COALESCING
#else
// If we are not doing an ultra conservative coalescing we have to make sure
// that the total number of neighbor whose degree is greater than the total
// number of register is not greater than the total number of register.
#undef ULTRA_CONSERVATIVE_COALESCING
#endif
template <class RegisterPressure>
struct Coalescing
{
static bool coalesce(RegisterAllocator& registerAllocator);
};
template <class RegisterPressure>
bool Coalescing<RegisterPressure>::coalesce(RegisterAllocator& registerAllocator)
{
Pool& pool = registerAllocator.pool;
// Initialize the lookup table
//
Uint32 rangeCount = registerAllocator.rangeCount;
RegisterName* newRange = new RegisterName[2 * rangeCount];
RegisterName* coalescedRange = &newRange[rangeCount];
RegisterName* name2range = registerAllocator.name2range;
init(coalescedRange, rangeCount);
SparseSet interferences(pool, rangeCount);
InterferenceGraph<RegisterPressure>& iGraph = registerAllocator.iGraph;
bool removedInstructions = false;
ControlGraph& controlGraph = registerAllocator.controlGraph;
ControlNode** nodes = controlGraph.lndList;
Uint32 nNodes = controlGraph.nNodes;
// Walk the nodes in the loop nesting depth list.
for (Int32 n = nNodes - 1; n >= 0; n--) {
InstructionList& instructions = nodes[n]->getInstructions();
InstructionList::iterator it = instructions.begin();
while (!instructions.done(it)) {
Instruction& instruction = instructions.get(it);
it = instructions.advance(it);
if ((instruction.getFlags() & ifCopy) != 0) {
assert(instruction.getInstructionUseBegin() != instruction.getInstructionUseEnd() && instruction.getInstructionUseBegin()[0].isRegister());
assert(instruction.getInstructionDefineBegin() != instruction.getInstructionDefineEnd() && instruction.getInstructionDefineBegin()[0].isRegister());
RegisterName source = findRoot(name2range[instruction.getInstructionUseBegin()[0].getRegisterName()], coalescedRange);
RegisterName destination = findRoot(name2range[instruction.getInstructionDefineBegin()[0].getRegisterName()], coalescedRange);
if (source == destination) {
instruction.remove();
} else if (!iGraph.interfere(source, destination)) {
InterferenceVector* sourceVector = iGraph.getInterferenceVector(source);
InterferenceVector* destinationVector = iGraph.getInterferenceVector(destination);
#ifdef ULTRA_CONSERVATIVE_COALESCING
interferences.clear();
InterferenceVector* vector;
for (vector = sourceVector; vector != NULL; vector = vector->next) {
RegisterName* neighbors = vector->neighbors;
for (Uint32 i = 0; i < vector->count; i++)
interferences.set(findRoot(neighbors[i], coalescedRange));
}
for (vector = destinationVector; vector != NULL; vector = vector->next) {
RegisterName* neighbors = vector->neighbors;
for (Uint32 i = 0; i < vector->count; i++)
interferences.set(findRoot(neighbors[i], coalescedRange));
}
Uint32 count = interferences.getSize();
#else // ULTRA_CONSERVATIVE_COALESCING
trespass("not implemented");
Uint32 count = 0;
#endif // ULTRA_CONSERVATIVE_COALESCING
if (count < 6 /* FIX: should get the number from the class */) {
// Update the interferences vector.
if (sourceVector == NULL) {
iGraph.setInterferenceVector(source, destinationVector);
sourceVector = destinationVector;
} else if (destinationVector == NULL)
iGraph.setInterferenceVector(destination, sourceVector);
else {
InterferenceVector* last = NULL;
for (InterferenceVector* v = sourceVector; v != NULL; v = v->next)
last = v;
assert(last);
last->next = destinationVector;
iGraph.setInterferenceVector(destination, sourceVector);
}
// Update the interference matrix.
for (InterferenceVector* v = sourceVector; v != NULL; v = v->next) {
RegisterName* neighbors = v->neighbors;
for (Uint32 i = 0; i < v->count; i++) {
RegisterName neighbor = findRoot(neighbors[i], coalescedRange);
iGraph.setInterference(neighbor, source);
iGraph.setInterference(neighbor, destination);
}
}
instruction.remove();
coalescedRange[source] = destination;
removedInstructions = true;
}
}
}
}
}
registerAllocator.rangeCount = compress(registerAllocator.name2range, coalescedRange, registerAllocator.nameCount, rangeCount);
delete newRange;
return removedInstructions;
}
#endif // _COALESCING_H_

View File

@@ -0,0 +1,283 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef NEW_LAURENTM_CODE
#include "Coloring.h"
#include "VirtualRegister.h"
#include "FastBitSet.h"
#include "FastBitMatrix.h"
#include "CpuInfo.h"
bool Coloring::
assignRegisters(FastBitMatrix& interferenceMatrix)
{
PRUint32 *stackPtr = new(pool) PRUint32[vRegManager.count()];
return select(interferenceMatrix, stackPtr, simplify(interferenceMatrix, stackPtr));
}
PRInt32 Coloring::
getLowestSpillCostRegister(FastBitSet& bitset)
{
PRInt32 lowest = bitset.firstOne();
if (lowest != -1)
{
Flt32 cost = vRegManager.getVirtualRegister(lowest).spillInfo.spillCost;
for (PRInt32 r = bitset.nextOne(lowest); r != -1; r = bitset.nextOne(r))
{
VirtualRegister& vReg = vRegManager.getVirtualRegister(r);
if (!vReg.spillInfo.infiniteSpillCost && (vReg.spillInfo.spillCost < cost))
{
cost = vReg.spillInfo.spillCost;
lowest = r;
}
}
}
return lowest;
}
PRUint32* Coloring::
simplify(FastBitMatrix interferenceMatrix, PRUint32* stackPtr)
{
// first we construct the sets low and high. low contains all nodes of degree
// inferior to the number of register available on the processor. All the
// nodes with an high degree and a finite spill cost are placed in high.
// Nodes of high degree and infinite spill cost are not included in either sets.
PRUint32 nRegisters = vRegManager.count();
FastBitSet low(pool, nRegisters);
FastBitSet high(pool, nRegisters);
FastBitSet stack(pool, nRegisters);
for (VirtualRegisterManager::iterator i = vRegManager.begin(); !vRegManager.done(i); i = vRegManager.advance(i))
{
VirtualRegister& vReg = vRegManager.getVirtualRegister(i);
if (vReg.getClass() == vrcStackSlot)
{
stack.set(i);
vReg.colorRegister(nRegisters);
}
else
{
if (vReg.colorInfo.interferenceDegree < NUMBER_OF_REGISTERS)
low.set(i);
else // if (!vReg.spillInfo.infiniteSpillCost)
high.set(i);
// Set coloring info.
vReg.spillInfo.willSpill = false;
switch(vReg.getClass())
{
case vrcInteger:
vReg.colorRegister(LAST_GREGISTER + 1);
break;
case vrcFloatingPoint:
case vrcFixedPoint:
vReg.colorRegister(LAST_FPREGISTER + 1);
break;
default:
PR_ASSERT(false); // Cannot happen.
}
}
}
// push the stack registers
PRInt32 j;
for (j = stack.firstOne(); j != -1; j = stack.nextOne(j))
*stackPtr++ = j;
// simplify
while (true)
{
PRInt32 r;
while ((r = getLowestSpillCostRegister(low)) != -1)
{
VirtualRegister& vReg = vRegManager.getVirtualRegister(r);
/* update low and high */
FastBitSet inter(interferenceMatrix.getRow(r), nRegisters);
for (j = inter.firstOne(); j != -1; j = inter.nextOne(j))
{
VirtualRegister& neighbor = vRegManager.getVirtualRegister(j);
// if the new interference degree of one of his neighbor becomes
// NUMBER_OF_REGISTERS - 1 then it is added to the set 'low'.
PRUint32 maxInterference = 0;
switch (neighbor.getClass())
{
case vrcInteger:
maxInterference = NUMBER_OF_GREGISTERS;
break;
case vrcFloatingPoint:
case vrcFixedPoint:
maxInterference = NUMBER_OF_FPREGISTERS;
break;
default:
PR_ASSERT(false);
}
if ((vRegManager.getVirtualRegister(j).colorInfo.interferenceDegree-- == maxInterference))
{
high.clear(j);
low.set(j);
}
vReg.colorInfo.interferenceDegree--;
interferenceMatrix.clear(r, j);
interferenceMatrix.clear(j, r);
}
low.clear(r);
// Push this register.
*stackPtr++ = r;
}
if ((r = getLowestSpillCostRegister(high)) != -1)
{
high.clear(r);
low.set(r);
}
else
break;
}
return stackPtr;
}
bool Coloring::
select(FastBitMatrix& interferenceMatrix, PRUint32* stackBase, PRUint32* stackPtr)
{
PRUint32 nRegisters = vRegManager.count();
FastBitSet usedRegisters(NUMBER_OF_REGISTERS + 1); // usedRegisters if used for both GR & FPR.
FastBitSet preColoredRegisters(NUMBER_OF_REGISTERS + 1);
FastBitSet usedStack(nRegisters + 1);
bool success = true;
Int32 lastUsedSSR = -1;
// select
while (stackPtr != stackBase)
{
// Pop one register.
PRUint32 r = *--stackPtr;
VirtualRegister& vReg = vRegManager.getVirtualRegister(r);
FastBitSet neighbors(interferenceMatrix.getRow(r), nRegisters);
if (vReg.getClass() == vrcStackSlot)
// Stack slots coloring.
{
usedStack.clear();
for (PRInt32 i = neighbors.firstOne(); i != -1; i = neighbors.nextOne(i))
usedStack.set(vRegManager.getVirtualRegister(i).getColor());
Int32 color = usedStack.firstZero();
vReg.colorRegister(color);
if (color > lastUsedSSR)
lastUsedSSR = color;
}
else
// Integer & Floating point register coloring.
{
usedRegisters.clear();
preColoredRegisters.clear();
for (PRInt32 i = neighbors.firstOne(); i != -1; i = neighbors.nextOne(i))
{
VirtualRegister& nvReg = vRegManager.getVirtualRegister(i);
usedRegisters.set(nvReg.getColor());
if (nvReg.isPreColored())
preColoredRegisters.set(nvReg.getPreColor());
}
if (vReg.hasSpecialInterference)
usedRegisters |= vReg.specialInterference;
PRInt8 c = -1;
PRInt8 maxColor = 0;
PRInt8 firstColor = 0;
switch (vReg.getClass())
{
case vrcInteger:
firstColor = FIRST_GREGISTER;
maxColor = LAST_GREGISTER;
break;
case vrcFloatingPoint:
case vrcFixedPoint:
firstColor = FIRST_FPREGISTER;
maxColor = LAST_FPREGISTER;
break;
default:
PR_ASSERT(false);
}
if (vReg.isPreColored())
{
c = vReg.getPreColor();
if (usedRegisters.test(c))
c = -1;
}
else
{
for (c = usedRegisters.nextZero(firstColor - 1); (c >= 0) && (c <= maxColor) && (preColoredRegisters.test(c));
c = usedRegisters.nextZero(c)) {}
}
if ((c >= 0) && (c <= maxColor))
{
vReg.colorRegister(c);
}
else
{
VirtualRegister& stackRegister = vRegManager.newVirtualRegister(vrcStackSlot);
vReg.equivalentRegister[vrcStackSlot] = &stackRegister;
vReg.spillInfo.willSpill = true;
success = false;
}
}
}
#ifdef DEBUG
if (success)
{
for (VirtualRegisterManager::iterator i = vRegManager.begin(); !vRegManager.done(i); i = vRegManager.advance(i))
{
VirtualRegister& vReg = vRegManager.getVirtualRegister(i);
switch (vReg.getClass())
{
case vrcInteger:
if (vReg.getColor() > LAST_GREGISTER)
PR_ASSERT(false);
break;
case vrcFloatingPoint:
case vrcFixedPoint:
#if NUMBER_OF_FPREGISTERS != 0
if (vReg.getColor() > LAST_FPREGISTER)
PR_ASSERT(false);
#endif
break;
default:
break;
}
}
}
#endif
vRegManager.nUsedStackSlots = lastUsedSSR + 1;
return success;
}
#endif // NEW_LAURENTM_CODE

View File

@@ -0,0 +1,284 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "Fundamentals.h"
#include "ControlGraph.h"
#include "ControlNodes.h"
#include "Instruction.h"
#include "RegisterAllocator.h"
#include "VirtualRegister.h"
#include "InterferenceGraph.h"
#include "SparseSet.h"
#include "Spilling.h"
#include "Splits.h"
UT_EXTERN_LOG_MODULE(RegAlloc);
template <class RegisterPressure>
class Coloring
{
private:
static RegisterName* simplify(RegisterAllocator& registerAllocator, RegisterName* coloringStack);
static bool select(RegisterAllocator& registerAllocator, RegisterName* coloringStack, RegisterName* coloringStackPtr);
public:
static bool color(RegisterAllocator& registerAllocator);
static void finalColoring(RegisterAllocator& registerAllocator);
};
template <class RegisterPressure>
void Coloring<RegisterPressure>::finalColoring(RegisterAllocator& registerAllocator)
{
RegisterName* color = registerAllocator.color;
RegisterName* name2range = registerAllocator.name2range;
ControlGraph& controlGraph = registerAllocator.controlGraph;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
for (Uint32 n = 0; n < nNodes; n++) {
InstructionList& instructions = nodes[n]->getInstructions();
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
Instruction& instruction = instructions.get(i);
InstructionUse* useEnd = instruction.getInstructionUseEnd();
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
if (usePtr->isRegister()) {
usePtr->setRegisterName(color[name2range[usePtr->getRegisterName()]]);
#ifdef DEBUG
RegisterID rid = usePtr->getRegisterID();
setColoredRegister(rid);
usePtr->setRegisterID(rid);
#endif // DEBUG
}
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
if (definePtr->isRegister()) {
definePtr->setRegisterName(color[name2range[definePtr->getRegisterName()]]);
#ifdef DEBUG
RegisterID rid = definePtr->getRegisterID();
setColoredRegister(rid);
definePtr->setRegisterID(rid);
#endif // DEBUG
}
}
}
}
template <class RegisterPressure>
bool Coloring<RegisterPressure>::select(RegisterAllocator& registerAllocator, RegisterName* coloringStack, RegisterName* coloringStackPtr)
{
Uint32 rangeCount = registerAllocator.rangeCount;
RegisterName* color = new RegisterName[rangeCount];
registerAllocator.color = color;
for (Uint32 r = 1; r < rangeCount; r++)
color[r] = RegisterName(6); // FIX;
// Color the preColored registers.
//
VirtualRegisterManager& vrManager = registerAllocator.vrManager;
RegisterName* name2range = registerAllocator.name2range;
PreColoredRegister* machineEnd = vrManager.getMachineRegistersEnd();
for (PreColoredRegister* machinePtr = vrManager.getMachineRegistersBegin(); machinePtr < machineEnd; machinePtr++)
if (machinePtr->id != invalidID) {
color[name2range[getName(machinePtr->id)]] = machinePtr->color;
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\twill preColor range %d as %d\n", name2range[getName(machinePtr->id)], machinePtr->color));
}
SpillCost* cost = registerAllocator.spillCost;
Pool& pool = registerAllocator.pool;
SparseSet& spill = *new(pool) SparseSet(pool, rangeCount);
registerAllocator.willSpill = &spill;
SparseSet neighborColors(pool, 6); // FIX
InterferenceGraph<RegisterPressure>& iGraph = registerAllocator.iGraph;
bool coloringFailed = false;
while (coloringStackPtr > coloringStack) {
RegisterName range = *--coloringStackPtr;
if (!cost[range].infinite && cost[range].cost < 0) {
coloringFailed = true;
spill.set(range);
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\tfailed to color %d, will spill.\n", range));
} else {
neighborColors.clear();
for (InterferenceVector* vector = iGraph.getInterferenceVector(range); vector != NULL; vector = vector->next)
for (Int32 i = vector->count - 1; i >= 0; --i) {
RegisterName neighborColor = color[vector->neighbors[i]];
if (neighborColor < 6) // FIX
neighborColors.set(neighborColor);
}
if (neighborColors.getSize() == 6) { // FIX
coloringFailed = true;
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\tfailed to color %d, ", range));
if (!Splits<RegisterPressure>::findSplit(registerAllocator, color, range)) {
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("will spill.\n"));
spill.set(range);
} else
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("will split.\n"));
} else {
for (Uint32 i = 0; i < 6; i++) // FIX
if (!neighborColors.test(i)) {
fprintf(stdout, "\twill color %d as %d\n", range, i);
color[range] = RegisterName(i);
break;
}
}
}
}
#ifdef DEBUG_LOG
if (coloringFailed) {
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("Coloring failed:\n"));
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\twill spill: "));
spill.printPretty(UT_LOG_MODULE(RegAlloc));
} else {
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("Coloring succeeded:\n"));
for (Uint32 i = 1; i < rangeCount; i++)
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\trange %d colored as %d\n", i, color[i]));
}
#endif
return !coloringFailed;
}
template <class RegisterPressure>
RegisterName* Coloring<RegisterPressure>::simplify(RegisterAllocator& registerAllocator, RegisterName* coloringStack)
{
InterferenceGraph<RegisterPressure>& iGraph = registerAllocator.iGraph;
SpillCost* spillCost = registerAllocator.spillCost;
Uint32 rangeCount = registerAllocator.rangeCount;
Uint32* degree = new Uint32[rangeCount];
for (RegisterName i = RegisterName(1); i < rangeCount; i = RegisterName(i + 1)) {
InterferenceVector* vector = iGraph.getInterferenceVector(i);
degree[i] = (vector != NULL) ? vector->count : 0;
}
Pool& pool = registerAllocator.pool;
SparseSet low(pool, rangeCount);
SparseSet high(pool, rangeCount);
SparseSet highInfinite(pool, rangeCount);
SparseSet preColored(pool, rangeCount);
// Get the precolored registers.
//
VirtualRegisterManager& vrManager = registerAllocator.vrManager;
RegisterName* name2range = registerAllocator.name2range;
PreColoredRegister* machineEnd = vrManager.getMachineRegistersEnd();
for (PreColoredRegister* machinePtr = vrManager.getMachineRegistersBegin(); machinePtr < machineEnd; machinePtr++)
if (machinePtr->id != invalidID)
preColored.set(name2range[getName(machinePtr->id)]);
// Insert the live ranges in the sets.
//
for (Uint32 range = 1; range < rangeCount; range++)
if (!preColored.test(range))
if (degree[range] < 6) // FIX
low.set(range);
else if (!spillCost[range].infinite)
high.set(range);
else
highInfinite.set(range);
#ifdef DEBUG_LOG
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("Coloring sets:\n\tlow = "));
low.printPretty(UT_LOG_MODULE(RegAlloc));
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\thigh = "));
high.printPretty(UT_LOG_MODULE(RegAlloc));
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\thighInfinite = "));
highInfinite.printPretty(UT_LOG_MODULE(RegAlloc));
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\tpreColored = "));
preColored.printPretty(UT_LOG_MODULE(RegAlloc));
#endif // DEBUG_LOG
RegisterName* coloringStackPtr = coloringStack;
while (low.getSize() != 0 || high.getSize() != 0) {
while (low.getSize() != 0) {
RegisterName range = RegisterName(low.getOne());
low.clear(range);
*coloringStackPtr++ = range;
for (InterferenceVector* vector = iGraph.getInterferenceVector(range); vector != NULL; vector = vector->next)
for (Int32 i = (vector->count - 1); i >= 0; --i) {
RegisterName neighbor = vector->neighbors[i];
degree[neighbor]--;
if (degree[neighbor] < 6) // FIX
if (high.test(neighbor)) {
high.clear(neighbor);
low.set(neighbor);
} else if (highInfinite.test(neighbor)) {
highInfinite.clear(neighbor);
low.set(neighbor);
}
}
}
if (high.getSize() != 0) {
RegisterName best = RegisterName(high.getOne());
double bestCost = spillCost[best].cost;
double bestDegree = degree[best];
// Choose the next best candidate.
//
for (SparseSet::iterator i = high.begin(); !high.done(i); i = high.advance(i)) {
RegisterName range = RegisterName(high.get(i));
double thisCost = spillCost[range].cost;
double thisDegree = degree[range];
if (thisCost * bestDegree < bestCost * thisDegree) {
best = range;
bestCost = thisCost;
bestDegree = thisDegree;
}
}
high.clear(best);
low.set(best);
}
}
assert(highInfinite.getSize() == 0);
delete degree;
#ifdef DEBUG_LOG
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("Coloring stack:\n\t"));
for (RegisterName* sp = coloringStack; sp < coloringStackPtr; ++sp)
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("%d ", *sp));
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\n"));
#endif // DEBUG_LOG
return coloringStackPtr;
}
template <class RegisterPressure>
bool Coloring<RegisterPressure>::color(RegisterAllocator& registerAllocator)
{
RegisterName* coloringStack = new RegisterName[registerAllocator.rangeCount];
return select(registerAllocator, coloringStack, simplify(registerAllocator, coloringStack));
}

View File

@@ -0,0 +1,212 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "Fundamentals.h"
#include <string.h>
#include "ControlGraph.h"
#include "ControlNodes.h"
#include "DominatorGraph.h"
DominatorGraph::DominatorGraph(ControlGraph& controlGraph) : controlGraph(controlGraph)
{
Uint32 nNodes = controlGraph.nNodes;
GtoV = new Uint32[nNodes + 1];
VtoG = new Uint32[nNodes + 1];
Uint32 v = 1;
for (Uint32 n = 0; n < nNodes; n++) {
VtoG[v] = n;
GtoV[n] = v++;
}
// Initialize all the 1-based arrays.
//
parent = new Uint32[v];
semi = new Uint32[v];
vertex = new Uint32[v];
label = new Uint32[v];
size = new Uint32[v];
ancestor = new Uint32[v];
child = new Uint32[v];
dom = new Uint32[v];
bucket = new DGLinkedList*[v];
memset(semi, '\0', v * sizeof(Uint32));
memset(bucket, '\0', v * sizeof(DGLinkedList*));
vCount = v;
build();
delete parent;
delete semi;
delete vertex;
delete label;
delete size;
delete ancestor;
delete child;
delete dom;
delete bucket;
}
Uint32 DominatorGraph::DFS(Uint32 vx, Uint32 n)
{
semi[vx] = ++n;
vertex[n] = label[vx] = vx;
ancestor[vx] = child[vx] = 0;
size[vx] = 1;
ControlNode& node = *controlGraph.dfsList[VtoG[vx]];
ControlEdge* successorEnd = node.getSuccessorsEnd();
for (ControlEdge* successorPtr = node.getSuccessorsBegin(); successorPtr < successorEnd; successorPtr++) {
Uint32 w = GtoV[successorPtr->getTarget().dfsNum];
if (semi[w] == 0) {
parent[w] = vx;
n = DFS(w, n);
}
}
return n;
}
void DominatorGraph::LINK(Uint32 vx, Uint32 w)
{
Uint32 s = w;
while (semi[label[w]] < semi[label[child[s]]]) {
if (size[s] + size[child[child[s]]] >= (size[child[s]] << 1)) {
ancestor[child[s]] = s;
child[s] = child[child[s]];
} else {
size[child[s]] = size[s];
s = ancestor[s] = child[s];
}
}
label[s] = label[w];
size[vx] += size[w];
if(size[vx] < (size[w] << 1)) {
Uint32 t = s;
s = child[vx];
child[vx] = t;
}
while( s != 0 ) {
ancestor[s] = vx;
s = child[s];
}
}
void DominatorGraph::COMPRESS(Uint32 vx)
{
if(ancestor[ancestor[vx]] != 0) {
COMPRESS(ancestor[vx]);
if(semi[label[ancestor[vx]]] < semi[label[vx]])
label[vx] = label[ancestor[vx]];
ancestor[vx] = ancestor[ancestor[vx]];
}
}
Uint32 DominatorGraph::EVAL(Uint32 vx)
{
if(ancestor[vx] == 0)
return label[vx];
COMPRESS(vx);
return (semi[label[ancestor[vx]]] >= semi[label[vx]]) ? label[vx] : label[ancestor[vx]];
}
void DominatorGraph::build()
{
Uint32 n = DFS(GtoV[0], 0);
size[0] = label[0] = semi[0];
for (Uint32 i = n; i >= 2; i--) {
Uint32 w = vertex[i];
ControlNode& node = *controlGraph.dfsList[VtoG[w]];
const DoublyLinkedList<ControlEdge>& predecessors = node.getPredecessors();
for (DoublyLinkedList<ControlEdge>::iterator p = predecessors.begin(); !predecessors.done(p); p = predecessors.advance(p)) {
Uint32 vx = GtoV[predecessors.get(p).getSource().dfsNum];
Uint32 u = EVAL(vx);
if(semi[u] < semi[w])
semi[w] = semi[u];
}
DGLinkedList* elem = new DGLinkedList();
elem->next = bucket[vertex[semi[w]]];
elem->index = w;
bucket[vertex[semi[w]]] = elem;
LINK(parent[w], w);
elem = bucket[parent[w]];
while(elem != NULL) {
Uint32 vx = elem->index;
Uint32 u = EVAL(vx);
dom[vx] = (semi[u] < semi[vx]) ? u : parent[w];
elem = elem->next;
}
}
memset(size, '\0', n * sizeof(Uint32));
Pool& pool = controlGraph.pool;
nodes = new(pool) DGNode[n];
for(Uint32 j = 2; j <= n; j++) {
Uint32 w = vertex[j];
Uint32 d = dom[w];
if(d != vertex[semi[w]]) {
d = dom[d];
dom[w] = d;
}
size[d]++;
}
dom[GtoV[0]] = 0;
for (Uint32 k = 1; k <= n; k++) {
DGNode& node = nodes[VtoG[k]];
Uint32 count = size[k];
node.successorsEnd = node.successorsBegin = (count) ? new(pool) Uint32[count] : (Uint32*) 0;
}
for (Uint32 l = 2; l <= n; l++)
*(nodes[VtoG[dom[l]]].successorsEnd)++ = VtoG[l];
}
#ifdef DEBUG_LOG
void DominatorGraph::printPretty(LogModuleObject log)
{
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Dominator Graph:\n"));
Uint32 nNodes = controlGraph.nNodes;
for (Uint32 i = 0; i < nNodes; i++) {
DGNode& node = nodes[i];
if (node.successorsBegin != node.successorsEnd) {
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\tN%d dominates ", i));
for (Uint32* successorsPtr = node.successorsBegin; successorsPtr < node.successorsEnd; successorsPtr++)
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("N%d ", *successorsPtr));
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
}
}
}
#endif // DEBUG_LOG

View File

@@ -0,0 +1,80 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _DOMINATOR_GRAPH_H_
#define _DOMINATOR_GRAPH_H_
#include "LogModule.h"
class ControlGraph;
struct DGNode
{
Uint32* successorsBegin;
Uint32* successorsEnd;
};
struct DGLinkedList
{
DGLinkedList* next;
Uint32 index;
};
class DominatorGraph
{
private:
ControlGraph& controlGraph;
Uint32 vCount;
Uint32* VtoG;
Uint32* GtoV;
Uint32* parent;
Uint32* semi;
Uint32* vertex;
Uint32* label;
Uint32* size;
Uint32* ancestor;
Uint32* child;
Uint32* dom;
DGLinkedList** bucket;
DGNode* nodes;
private:
void build();
Uint32 DFS(Uint32 vx, Uint32 n);
void LINK(Uint32 vx, Uint32 w);
void COMPRESS(Uint32 vx);
Uint32 EVAL(Uint32 vx);
public:
DominatorGraph(ControlGraph& controlGraph);
Uint32* getSuccessorsBegin(Uint32 n) const {return nodes[n].successorsBegin;}
Uint32* getSuccessorsEnd(Uint32 n) const {return nodes[n].successorsEnd;}
#ifdef DEBUG_LOG
void printPretty(LogModuleObject log);
#endif // DEBUG_LOG
};
#endif // _DOMINATOR_GRAPH_H_

View File

@@ -0,0 +1,20 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "Fundamentals.h"
#include "HashSet.h"

View File

@@ -0,0 +1,97 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _HASH_SET_H_
#define _HASH_SET_H_
#include "Fundamentals.h"
#include "Pool.h"
#include <string.h>
struct HashSetElement
{
Uint32 index;
HashSetElement* next;
};
class HashSet
{
private:
static const hashSize = 64;
// Return the hash code for the given element index.
static Uint32 getHashCode(Uint32 index) {return index & (hashSize - 1);} // Could be better !
private:
Pool& allocationPool;
HashSetElement** bucket;
HashSetElement* free;
private:
// No copy constructor.
HashSet(const HashSet&);
// No copy operator.
void operator = (const HashSet&);
public:
// Create a new HashSet.
inline HashSet(Pool& pool, Uint32 universeSize);
// Clear the hashset.
void clear();
// Clear the element for the given index.
void clear(Uint32 index);
// Set the element for the given index.
void set(Uint32 index);
// Return true if the element at index is a member.
bool test(Uint32 index) const;
// Union with the given hashset.
inline void or(const HashSet& set);
// Intersection with the given hashset.
inline void and(const HashSet& set);
// Difference with the given hashset.
inline void difference(const HashSet& set);
// Logical operators.
HashSet& operator |= (const HashSet& set) {or(set); return *this;}
HashSet& operator &= (const HashSet& set) {and(set); return *this;}
HashSet& operator -= (const HashSet& set) {difference(set); return *this;}
// Iterator to conform with the set API.
typedef HashSetElement* iterator;
// Return the iterator for the first element of this set.
iterator begin() const;
// Return the next iterator.
iterator advance(iterator pos) const;
// Return true if the iterator is at the end of the set.
bool done(iterator pos) const {return pos == NULL;}
};
inline HashSet::HashSet(Pool& pool, Uint32 /*universeSize*/)
: allocationPool(pool), free(NULL)
{
bucket = new(pool) HashSetElement*[hashSize];
memset(bucket, '\0', sizeof(HashSetElement*));
}
#endif // _HASH_SET_H_

View File

@@ -0,0 +1,213 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _INDEXED_POOL_H_
#define _INDEXED_POOL_H_
#include "Fundamentals.h"
#include <string.h>
#include <stdlib.h>
//------------------------------------------------------------------------------
// IndexedPool<IndexedObjectSubclass> is an indexed pool of objects. The
// template parameter 'IndexedObjectSubclass' must be a subclass of the struct
// IndexedObject.
//
// When the indexed pool is ask to allocate and initialize a new object (using
// the operator new(anIndexedPool) it will zero the memory used to store the
// object and initialize the field 'index' of this object to its position in
// the pool.
//
// An object allocated by the indexed pool can be freed by calling the method
// IndexedPool::release(IndexedElement& objectIndex).
//
// example:
//
// IndexedPool<IndexedElement> elementPool;
//
// IndexedElement& element1 = *new(elementPool) IndexedElement();
// IndexedElement& element2 = *new(elementPool) IndexedElement();
//
// indexedPool.release(element1);
// IndexedElement& element3 = *new(elementPool) IndexedElement();
//
// At this point element1 is no longer a valid object, element2 is at
// index 2 and element3 is at index 1.
//
//------------------------------------------------------------------------------
// IndexedObject -
//
template<class Object>
struct IndexedObject
{
Uint32 index; // Index in the pool.
Object* next; // Used to link IndexedObject together.
Uint32 getIndex() {return index;}
};
//------------------------------------------------------------------------------
// IndexedPool<IndexedObject> -
//
template <class IndexedObject>
class IndexedPool
{
private:
static const blockSize = 4; // Size of one block.
Uint32 nBlocks; // Number of blocks in the pool.
IndexedObject** block; // Array of block pointers.
IndexedObject* freeObjects; // Chained list of free IndexedObjects.
Uint32 nextIndex; // Index of the next free object in the last block.
private:
void allocateAnotherBlock();
IndexedObject& newObject();
public:
IndexedPool() : nBlocks(0), block(NULL), freeObjects(NULL), nextIndex(1) {}
~IndexedPool();
IndexedObject& get(Uint32 index) const;
void release(IndexedObject& object);
void setSize(Uint32 size) {assert(size < nextIndex); nextIndex = size;}
// Return the universe size.
Uint32 getSize() {return nextIndex;}
friend void* operator new(size_t, IndexedPool<IndexedObject>& pool); // Needs to call newObject().
};
// Free all the memory allocated for this object.
//
template <class IndexedObject>
IndexedPool<IndexedObject>::~IndexedPool()
{
for (Uint32 n = 0; n < nBlocks; n++)
free(&((IndexedObject **) &block[n][n*blockSize])[-(n + 1)]);
}
// Release the given. This object will be iserted in the chained
// list of free IndexedObjects. To minimize the fragmentation the chained list
// is ordered by ascending indexes.
//
template <class IndexedObject>
void IndexedPool<IndexedObject>::release(IndexedObject& object)
{
Uint32 index = object.index;
IndexedObject* list = freeObjects;
assert(&object == &get(index)); // Make sure that object is owned by this pool.
if (list == NULL) { // The list is empty.
freeObjects = &object;
object.next = NULL;
} else { // The list contains at least 1 element.
if (index < list->index) { // insert as first element.
freeObjects = &object;
object.next = list;
} else { // Find this object's place.
while ((list->next) != NULL && (list->next->index < index))
list = list->next;
object.next = list->next;
list->next = &object;
}
}
#ifdef DEBUG
// Sanity check to be sure that the list is correctly ordered.
for (IndexedObject* obj = freeObjects; obj != NULL; obj = obj->next)
if (obj->next != NULL)
assert(obj->index < obj->next->index);
#endif
}
// Create a new block of IndexedObjects. We will allocate the memory to
// store IndexedPool::blockSize IndexedObject and the new Array of block
// pointers.
// The newly created IndexedObjects will not be initialized.
//
template <class IndexedObject>
void IndexedPool<IndexedObject>::allocateAnotherBlock()
{
void* memory = (void *) malloc((nBlocks + 1) * sizeof(Uint32) + blockSize * sizeof(IndexedObject));
memcpy(memory, block, nBlocks * sizeof(Uint32));
block = (IndexedObject **) memory;
IndexedObject* objects = (IndexedObject *) &block[nBlocks + 1];
block[nBlocks] = &objects[-(nBlocks * blockSize)];
nBlocks++;
}
// Return the IndexedObject at the position 'index' in the pool.
//
template <class IndexedObject>
IndexedObject& IndexedPool<IndexedObject>::get(Uint32 index) const
{
Uint32 blockIndex = index / blockSize;
assert(blockIndex < nBlocks);
return block[blockIndex][index];
}
// Return the reference of an unused object in the pool.
//
template <class IndexedObject>
IndexedObject& IndexedPool<IndexedObject>::newObject()
{
if (freeObjects != NULL) {
IndexedObject& newObject = *freeObjects;
freeObjects = newObject.next;
return newObject;
}
Uint32 nextIndex = this->nextIndex++;
Uint32 blockIndex = nextIndex / blockSize;
while (blockIndex >= nBlocks)
allocateAnotherBlock();
IndexedObject& newObject = block[blockIndex][nextIndex];
newObject.index = nextIndex;
return newObject;
}
// Return the address of the next unsused object in the given
// indexed pool. The field index of the newly allocated object
// will be initialized to the corresponding index of this object
// in the pool.
//
template <class IndexedObject>
void* operator new(size_t size, IndexedPool<IndexedObject>& pool)
{
assert(size == sizeof(IndexedObject));
return (void *) &pool.newObject();
}
#endif // _INDEXED_POOL_H_

View File

@@ -0,0 +1,258 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _INTERFERENCE_GRAPH_H_
#define _INTERFERENCE_GRAPH_H_
#include "Fundamentals.h"
#include "ControlGraph.h"
#include "Primitives.h"
#include "Instruction.h"
#include "VirtualRegister.h"
#include "RegisterPressure.h"
#include "SparseSet.h"
#include <string.h>
struct InterferenceVector
{
Uint32 count;
InterferenceVector* next;
RegisterName* neighbors;
InterferenceVector() : count(0), next(NULL) {}
};
class RegisterAllocator;
template <class RegisterPressure>
class InterferenceGraph
{
private:
RegisterAllocator& registerAllocator;
RegisterPressure::Set* interferences;
InterferenceVector** vector;
Uint32* offset;
Uint32 rangeCount;
private:
// No copy constructor.
InterferenceGraph(const InterferenceGraph&);
// No copy operator.
void operator = (const InterferenceGraph&);
// Check if reg is a member of the universe.
void checkMember(RegisterName name) {assert(name < rangeCount);}
// Return the edge index for the interference between name1 and name2.
Uint32 getEdgeIndex(RegisterName name1, RegisterName name2);
public:
InterferenceGraph(RegisterAllocator& registerAllocator) : registerAllocator(registerAllocator) {}
// Calculate the interferences.
void build();
// Return true if reg1 and reg2 interfere.
bool interfere(RegisterName name1, RegisterName name2);
// Return the interference vector for the given register or NULL if there is none.
InterferenceVector* getInterferenceVector(RegisterName name) {return vector[name];}
// Set the interference between name1 and name2.
void setInterference(RegisterName name1, RegisterName name2);
// Set the interference vector for the given register.
void setInterferenceVector(RegisterName name, InterferenceVector* v) {vector[name] = v;}
#ifdef DEBUG_LOG
// Print the interferences.
void printPretty(LogModuleObject log);
#endif // DEBUG_LOG
};
template <class RegisterPressure>
void InterferenceGraph<RegisterPressure>::build()
{
Pool& pool = registerAllocator.pool;
Uint32 rangeCount = registerAllocator.rangeCount;
this->rangeCount = rangeCount;
// Initialize the structures.
//
offset = new(pool) Uint32[rangeCount + 1];
vector = new(pool) InterferenceVector*[rangeCount];
memset(vector, '\0', sizeof(InterferenceVector*) * rangeCount);
Uint32 o = 0;
offset[0] = 0;
for (Uint32 i = 1; i <= rangeCount; ++i) {
offset[i] = o;
o += i;
}
interferences = new(pool) RegisterPressure::Set(pool, (rangeCount * rangeCount) / 2);
ControlGraph& controlGraph = registerAllocator.controlGraph;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
RegisterName* name2range = registerAllocator.name2range;
LivenessInfo<RegisterPressure> liveness = Liveness<RegisterPressure>::analysis(controlGraph, rangeCount, name2range);
registerAllocator.liveness = liveness;
SparseSet currentLive(pool, rangeCount);
for (Uint32 n = 0; n < nNodes; n++) {
ControlNode& node = *nodes[n];
currentLive = liveness.liveOut[n];
InstructionList& instructions = node.getInstructions();
for (InstructionList::iterator i = instructions.end(); !instructions.done(i); i = instructions.retreat(i)) {
Instruction& instruction = instructions.get(i);
InstructionUse* useBegin = instruction.getInstructionUseBegin();
InstructionUse* useEnd = instruction.getInstructionUseEnd();
InstructionUse* usePtr;
InstructionDefine* defineBegin = instruction.getInstructionDefineBegin();
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
InstructionDefine* definePtr;
// Handle the copy instruction to avoid unnecessary interference between the 2 registers.
if ((instruction.getFlags() & ifCopy) != 0) {
assert(useBegin != useEnd && useBegin[0].isRegister());
currentLive.clear(name2range[useBegin[0].getRegisterName()]);
}
// Create the interferences.
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
if (definePtr->isRegister()) {
RegisterName define = name2range[definePtr->getRegisterName()];
for (SparseSet::iterator e = currentLive.begin(); !currentLive.done(e); e = currentLive.advance(e)) {
RegisterName live = RegisterName(currentLive.get(e));
if ((live != define) && !interfere(live, define) && registerAllocator.canInterfere(live, define)) {
if (vector[define] == NULL)
vector[define] = new(pool) InterferenceVector();
vector[define]->count++;
if (vector[live] == NULL)
vector[live] = new(pool) InterferenceVector();
vector[live]->count++;
setInterference(live, define);
}
}
}
// Now update the liveness.
//
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
currentLive.clear(name2range[definePtr->getRegisterName()]);
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
if (usePtr->isRegister())
currentLive.set(name2range[usePtr->getRegisterName()]);
}
}
// Allocate the memory to store the interferences.
//
for (Uint32 e = 0; e < rangeCount; e++)
if (vector[e] != NULL) {
InterferenceVector& v = *vector[e];
v.neighbors = new(pool) RegisterName[v.count];
v.count = 0;
}
// Initialize the edges.
//
if (RegisterPressure::Set::isOrdered()) {
RegisterName name1 = RegisterName(0);
for (RegisterPressure::Set::iterator i = interferences->begin(); !interferences->done(i); i = interferences->advance(i)) {
Uint32 interferenceIndex = interferences->get(i);
while(interferenceIndex >= offset[name1 + 1])
name1 = RegisterName(name1 + 1);
assert((interferenceIndex >= offset[name1]) && (interferenceIndex < offset[name1 + 1]));
RegisterName name2 = RegisterName(interferenceIndex - offset[name1]);
assert(interfere(name1, name2));
InterferenceVector& vector1 = *vector[name1];
vector1.neighbors[vector1.count++] = name2;
InterferenceVector& vector2 = *vector[name2];
vector2.neighbors[vector2.count++] = name1;
}
} else {
trespass("not Implemented"); // FIX: need one more pass to initialize the vectors.
}
}
template <class RegisterPressure>
Uint32 InterferenceGraph<RegisterPressure>::getEdgeIndex(RegisterName name1, RegisterName name2)
{
checkMember(name1); checkMember(name2);
assert(name1 != name2); // This is not possible.
return (name1 < name2) ? offset[name2] + name1 : offset[name1] + name2;
}
template <class RegisterPressure>
void InterferenceGraph<RegisterPressure>::setInterference(RegisterName name1, RegisterName name2)
{
interferences->set(getEdgeIndex(name1, name2));
}
template <class RegisterPressure>
bool InterferenceGraph<RegisterPressure>::interfere(RegisterName name1, RegisterName name2)
{
return interferences->test(getEdgeIndex(name1, name2));
}
#ifdef DEBUG_LOG
template <class RegisterPressure>
void InterferenceGraph<RegisterPressure>::printPretty(LogModuleObject log)
{
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Interference Vectors:\n"));
for (Uint32 i = 1; i < rangeCount; i++) {
if (vector[i] != NULL) {
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\tvr%d: (", i));
for (InterferenceVector* v = vector[i]; v != NULL; v = v->next)
for (Uint32 j = 0; j < v->count; j++) {
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("%d", v->neighbors[j]));
if (v->next != NULL || j != (v->count - 1))
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (","));
}
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (")\n"));
}
}
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Interference Matrix:\n"));
for (RegisterName name1 = RegisterName(1); name1 < rangeCount; name1 = RegisterName(name1 + 1)) {
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\t%d:\t", name1));
for (RegisterName name2 = RegisterName(1); name2 < rangeCount; name2 = RegisterName(name2 + 1))
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("%c", ((name1 != name2) && interfere(name1, name2)) ? '1' : '0'));
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
}
}
#endif // DEBUG_LOG
#endif // _INTERFERENCE_GRAPH_H_

View File

@@ -0,0 +1,87 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _LIVE_RANGE_H_
#define _LIVE_RANGE_H_
#include "Fundamentals.h"
#include "ControlGraph.h"
#include "ControlNodes.h"
#include "Primitives.h"
#include "Instruction.h"
#include "RegisterAllocator.h"
#include "RegisterAllocatorTools.h"
template <class RegisterPressure>
struct LiveRange
{
static void build(RegisterAllocator& registerAllocator);
};
template <class RegisterPressure>
void LiveRange<RegisterPressure>::build(RegisterAllocator& registerAllocator)
{
// Intialize the lookup table.
//
Uint32 nameCount = registerAllocator.nameCount;
RegisterName* nameTable = new(registerAllocator.pool) RegisterName[2*nameCount];
RegisterName* rangeName = &nameTable[nameCount];
init(rangeName, nameCount);
// Walk the graph.
//
ControlGraph& controlGraph = registerAllocator.controlGraph;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
SparseSet destination(registerAllocator.pool, nameCount);
for (Uint32 n = 0; n < nNodes; n++) {
InstructionList& phiNodes = nodes[n]->getPhiNodeInstructions();
destination.clear();
for (InstructionList::iterator i = phiNodes.begin(); !phiNodes.done(i); i = phiNodes.advance(i)) {
Instruction& phiNode = phiNodes.get(i);
assert(phiNode.getInstructionDefineBegin() != phiNode.getInstructionDefineEnd() && phiNode.getInstructionDefineBegin()[0].isRegister());
destination.set(findRoot(phiNode.getInstructionDefineBegin()[0].getRegisterName(), rangeName));
}
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
Instruction& phiNode = phiNodes.get(p);
assert(phiNode.getInstructionDefineBegin() != phiNode.getInstructionDefineEnd() && phiNode.getInstructionDefineBegin()[0].isRegister());
RegisterName destinationName = phiNode.getInstructionDefineBegin()[0].getRegisterName();
RegisterName destinationRoot = findRoot(destinationName, rangeName);
InstructionUse* useEnd = phiNode.getInstructionUseEnd();
for (InstructionUse* usePtr = phiNode.getInstructionUseBegin(); usePtr < useEnd; usePtr++) {
assert(usePtr->isRegister());
RegisterName sourceName = usePtr->getRegisterName();
RegisterName sourceRoot = findRoot(sourceName, rangeName);
if (sourceRoot != destinationRoot && !destination.test(sourceRoot))
rangeName[sourceRoot] = destinationRoot;
}
}
}
registerAllocator.rangeCount = compress(registerAllocator.name2range, rangeName, nameCount, nameCount);
}
#endif // _LIVE_RANGE_H_

View File

@@ -0,0 +1,163 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _LIVE_RANGE_GRAPH_
#define _LIVE_RANGE_GRAPH_
#include "Fundamentals.h"
#include "Pool.h"
#include "ControlGraph.h"
#include "ControlNodes.h"
#include "Instruction.h"
#include "RegisterTypes.h"
class RegisterAllocator;
template <class RegisterPressure>
class LiveRangeGraph
{
private:
RegisterAllocator& registerAllocator;
RegisterPressure::Set* edges;
Uint32 rangeCount;
public:
//
//
LiveRangeGraph(RegisterAllocator& registerAllocator) : registerAllocator(registerAllocator) {}
//
//
void build();
//
//
void addEdge(RegisterName name1, RegisterName name2);
//
//
bool haveEdge(RegisterName name1, RegisterName name2);
#ifdef DEBUG_LOG
//
//
void printPretty(LogModuleObject log);
#endif // DEBUG_LOG
};
template <class RegisterPressure>
void LiveRangeGraph<RegisterPressure>::build()
{
Pool& pool = registerAllocator.pool;
Uint32 rangeCount = registerAllocator.rangeCount;
this->rangeCount = rangeCount;
edges = new(pool) RegisterPressure::Set(pool, rangeCount * rangeCount);
ControlGraph& controlGraph = registerAllocator.controlGraph;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
RegisterName* name2range = registerAllocator.name2range;
LivenessInfo<RegisterPressure>& liveness = registerAllocator.liveness;
SparseSet currentLive(pool, rangeCount);
for (Uint32 n = 0; n < nNodes; n++) {
ControlNode& node = *nodes[n];
currentLive = liveness.liveOut[n];
InstructionList& instructions = node.getInstructions();
for (InstructionList::iterator i = instructions.end(); !instructions.done(i); i = instructions.retreat(i)) {
Instruction& instruction = instructions.get(i);
InstructionUse* useBegin = instruction.getInstructionUseBegin();
InstructionUse* useEnd = instruction.getInstructionUseEnd();
InstructionUse* usePtr;
InstructionDefine* defineBegin = instruction.getInstructionDefineBegin();
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
InstructionDefine* definePtr;
if ((instruction.getFlags() & ifCopy) != 0) {
assert(useBegin != useEnd && useBegin[0].isRegister());
currentLive.clear(name2range[useBegin[0].getRegisterName()]);
}
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
if (definePtr->isRegister()) {
RegisterName define = name2range[definePtr->getRegisterName()];
for (SparseSet::iterator l = currentLive.begin(); !currentLive.done(l); l = currentLive.advance(l)) {
RegisterName live = RegisterName(currentLive.get(l));
if (define != live && registerAllocator.canInterfere(define, live))
addEdge(define, live);
}
}
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
currentLive.clear(name2range[definePtr->getRegisterName()]);
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
if (usePtr->isRegister())
currentLive.set(name2range[usePtr->getRegisterName()]);
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
if (usePtr->isRegister()) {
RegisterName use = name2range[usePtr->getRegisterName()];
for (SparseSet::iterator l = currentLive.begin(); !currentLive.done(l); l = currentLive.advance(l)) {
RegisterName live = RegisterName(currentLive.get(l));
if (use != live && registerAllocator.canInterfere(use, live))
addEdge(use, live);
}
}
}
}
}
template <class RegisterPressure>
void LiveRangeGraph<RegisterPressure>::addEdge(RegisterName name1, RegisterName name2)
{
assert(name1 != name2);
edges->set(name1 * rangeCount + name2);
}
template <class RegisterPressure>
bool LiveRangeGraph<RegisterPressure>::haveEdge(RegisterName name1, RegisterName name2)
{
assert(name1 != name2);
return edges->test(name1 * rangeCount + name2);
}
#ifdef DEBUG_LOG
template <class RegisterPressure>
void LiveRangeGraph<RegisterPressure>::printPretty(LogModuleObject log)
{
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Live ranges graph:\n"));
for (RegisterName name1 = RegisterName(1); name1 < rangeCount; name1 = RegisterName(name1 + 1)) {
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\t%d:\t", name1));
for (RegisterName name2 = RegisterName(1); name2 < rangeCount; name2 = RegisterName(name2 + 1))
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("%c", ((name1 != name2) && haveEdge(name1, name2)) ? '1' : '0'));
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
}
}
#endif // DEBUG_LOG
#endif // _LIVE_RANGE_GRAPH_

View File

@@ -0,0 +1,21 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "Fundamentals.h"
#include "Liveness.h"

View File

@@ -0,0 +1,301 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _LIVENESS_H_
#define _LIVENESS_H_
#include "Fundamentals.h"
#include "ControlGraph.h"
#include "ControlNodes.h"
#include "Instruction.h"
#include "RegisterTypes.h"
// ----------------------------------------------------------------------------
// LivenessInfo -
template <class RegisterPressure>
struct LivenessInfo
{
RegisterPressure::Set* liveIn;
RegisterPressure::Set* liveOut;
DEBUG_LOG_ONLY(Uint32 size);
#ifdef DEBUG_LOG
void printPretty(LogModuleObject log);
#endif // DEBUG_LOG
};
// ----------------------------------------------------------------------------
// Liveness
//
// The liveness is defined by the following data-flow equations:
//
// LiveIn(n) = LocalLive(n) U (LiveOut(n) - Killed(n)).
// LiveOut(n) = U LiveIn(s) (s a successor of n).
//
// where LocalLive(n) is the set of used registers in the block n, Killed(n)
// is the set of defined registers in the block n, LiveIn(n) is the set of
// live registers at the begining of the block n and LiveOut(n) is the set
// of live registers at the end of the block n.
//
//
// We will compute the liveness analysis in two stages:
//
// 1- Build LocalLive(n) (wich is an approximation of LiveIn(n)) and Killed(n)
// for each block n.
// 2- Perform a backward data-flow analysis to propagate the liveness information
// through the entire control-flow graph.
//
template <class RegisterPressure>
struct Liveness
{
static LivenessInfo<RegisterPressure> analysis(ControlGraph& controlGraph, Uint32 rangeCount, const RegisterName* name2range);
static LivenessInfo<RegisterPressure> analysis(ControlGraph& controlGraph, Uint32 nameCount);
};
template <class RegisterPressure>
LivenessInfo<RegisterPressure> Liveness<RegisterPressure>::analysis(ControlGraph& controlGraph, Uint32 rangeCount, const RegisterName* name2range)
{
Pool& pool = controlGraph.pool;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
// Allocate the temporary sets.
RegisterPressure::Set* killed = new(pool) RegisterPressure::Set[nNodes](pool, rangeCount);
// Allocate the globals sets.
RegisterPressure::Set* liveIn = new(pool) RegisterPressure::Set[nNodes](pool, rangeCount);
RegisterPressure::Set* liveOut = new(pool) RegisterPressure::Set[nNodes](pool, rangeCount);
// First stage of the liveness analysis: Compute the sets LocalLive(stored in LiveIn) and Killed.
//
for (Uint32 n = 0; n < (nNodes - 1); n++) {
ControlNode& node = *nodes[n];
RegisterPressure::Set& currentLocalLive = liveIn[n];
RegisterPressure::Set& currentKilled = killed[n];
// Find the instructions contributions to the sets LocalLive and Killed.
//
InstructionList& instructions = node.getInstructions();
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
Instruction& instruction = instructions.get(i);
// If a VirtualRegister is 'used' before being 'defined' then we add it to set LocalLive.
InstructionUse* useEnd = instruction.getInstructionUseEnd();
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
if (usePtr->isRegister()) {
Uint32 index = name2range[usePtr->getRegisterName()];
if (!currentKilled.test(index))
currentLocalLive.set(index);
}
// If a Virtualregister is 'defined' then we add it to the set Killed.
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
currentKilled.set(name2range[definePtr->getRegisterName()]);
}
}
// Second stage of the liveness analysis: We propagate the LiveIn & LiveOut through the entire
// control-flow graph.
//
RegisterPressure::Set temp(pool, rangeCount);
bool changed;
do {
changed = false;
// For all nodes is this graph except the endNode.
for (Int32 n = (nNodes - 2); n >= 0; n--) {
ControlNode& node = *nodes[n];
RegisterPressure::Set& currentLiveIn = liveIn[n];
RegisterPressure::Set& currentLiveOut = liveOut[n];
// Compute temp = Union of LiveIn(s) (s a successor of this node) | usedByPhiNodes(n).
// temp will be the new LiveOut(n).
Uint32 nSuccessors = node.nSuccessors();
if (nSuccessors != 0) {
temp = liveIn[node.nthSuccessor(0).getTarget().dfsNum];
for (Uint32 s = 1; s < nSuccessors; s++)
temp |= liveIn[node.nthSuccessor(s).getTarget().dfsNum];
} else
temp.clear();
// If temp and LiveOut(n) differ then set LiveOut(n) = temp and recalculate the
// new LiveIn(n).
if (currentLiveOut != temp) {
currentLiveOut = temp;
temp -= killed[n]; // FIX: could be optimized with one call to unionDiff !
temp |= currentLiveIn;
if (currentLiveIn != temp) {
currentLiveIn = temp;
changed = true;
}
}
}
} while(changed);
LivenessInfo<RegisterPressure> liveness;
liveness.liveIn = liveIn;
liveness.liveOut = liveOut;
DEBUG_LOG_ONLY(liveness.size = nNodes);
return liveness;
}
template <class RegisterPressure>
LivenessInfo<RegisterPressure> Liveness<RegisterPressure>::analysis(ControlGraph& controlGraph, Uint32 nameCount)
{
Pool& pool = controlGraph.pool;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
// Allocate the temporary sets.
RegisterPressure::Set* killed = new(pool) RegisterPressure::Set[nNodes](pool, nameCount);
RegisterPressure::Set* usedByPhiNodes = NULL;
// Allocate the globals sets.
RegisterPressure::Set* liveIn = new(pool) RegisterPressure::Set[nNodes](pool, nameCount);
RegisterPressure::Set* liveOut = new(pool) RegisterPressure::Set[nNodes](pool, nameCount);
// First stage of the liveness analysis: Compute the sets LocalLive(stored in LiveIn) and Killed.
//
for (Uint32 n = 0; n < (nNodes - 1); n++) {
ControlNode& node = *nodes[n];
RegisterPressure::Set& currentLocalLive = liveIn[n];
RegisterPressure::Set& currentKilled = killed[n];
InstructionList& phiNodes = node.getPhiNodeInstructions();
if ((usedByPhiNodes == NULL) && !phiNodes.empty())
usedByPhiNodes = new(pool) RegisterPressure::Set[nNodes](pool, nameCount);
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
Instruction& phiNode = phiNodes.get(p);
InstructionDefine& define = phiNode.getInstructionDefineBegin()[0];
currentKilled.set(define.getRegisterName());
typedef DoublyLinkedList<ControlEdge> ControlEdgeList;
const ControlEdgeList& predecessors = node.getPredecessors();
ControlEdgeList::iterator p = predecessors.begin();
InstructionUse* useEnd = phiNode.getInstructionUseEnd();
for (InstructionUse* usePtr = phiNode.getInstructionUseBegin(); usePtr < useEnd; usePtr++, p = predecessors.advance(p))
if (usePtr->isRegister())
usedByPhiNodes[predecessors.get(p).getSource().dfsNum].set(usePtr->getRegisterName());
}
// Find the instructions contributions to the sets LocalLive and Killed.
//
InstructionList& instructions = node.getInstructions();
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
Instruction& instruction = instructions.get(i);
// If a VirtualRegister is 'used' before being 'defined' then we add it to set LocalLive.
InstructionUse* useEnd = instruction.getInstructionUseEnd();
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
if (usePtr->isRegister()) {
Uint32 index = usePtr->getRegisterName();
if (!currentKilled.test(index))
currentLocalLive.set(index);
}
// If a Virtualregister is 'defined' then we add it to the set Killed.
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
currentKilled.set(definePtr->getRegisterName());
}
}
// Second stage of the liveness analysis: We propagate the LiveIn & LiveOut through the entire
// control-flow graph.
//
RegisterPressure::Set temp(pool, nameCount);
bool changed;
do {
changed = false;
// For all nodes is this graph except the endNode.
for (Int32 n = (nNodes - 2); n >= 0; n--) {
ControlNode& node = *nodes[n];
RegisterPressure::Set& currentLiveIn = liveIn[n];
RegisterPressure::Set& currentLiveOut = liveOut[n];
// Compute temp = Union of LiveIn(s) (s a successor of this node) | usedByPhiNodes(n).
// temp will be the new LiveOut(n).
Uint32 nSuccessors = node.nSuccessors();
if (nSuccessors != 0) {
temp = liveIn[node.nthSuccessor(0).getTarget().dfsNum];
for (Uint32 s = 1; s < nSuccessors; s++)
temp |= liveIn[node.nthSuccessor(s).getTarget().dfsNum];
} else
temp.clear();
// Insert the phiNodes contribution.
if (usedByPhiNodes != NULL)
temp |= usedByPhiNodes[n];
// If temp and LiveOut(n) differ then set LiveOut(n) = temp and recalculate the
// new LiveIn(n).
if (currentLiveOut != temp) {
currentLiveOut = temp;
temp -= killed[n]; // FIX: could be optimized with one call to unionDiff !
temp |= currentLiveIn;
if (currentLiveIn != temp) {
currentLiveIn = temp;
changed = true;
}
}
}
} while(changed);
LivenessInfo<RegisterPressure> liveness;
liveness.liveIn = liveIn;
liveness.liveOut = liveOut;
DEBUG_LOG_ONLY(liveness.size = nNodes);
return liveness;
}
#ifdef DEBUG_LOG
template <class RegisterPressure>
void LivenessInfo<RegisterPressure>::printPretty(LogModuleObject log)
{
for (Uint32 n = 0; n < size; n++) {
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Node N%d:\n\tliveIn = ", n));
liveIn[n].printPretty(log);
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\tliveOut = "));
liveOut[n].printPretty(log);
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
}
}
#endif // DEBUG_LOG
#endif // _LIVENESS_H_

View File

@@ -0,0 +1,40 @@
#! gmake
DEPTH = ../..
MODULE_NAME = RegisterAllocator
include $(DEPTH)/config/config.mk
INCLUDES += \
-I$(DEPTH)/Utilities/General \
-I$(DEPTH)/Utilities/zlib \
-I$(DEPTH)/Runtime/ClassReader \
-I$(DEPTH)/Runtime/NativeMethods \
-I$(DEPTH)/Runtime/System \
-I$(DEPTH)/Runtime/ClassInfo \
-I$(DEPTH)/Runtime/FileReader \
-I$(DEPTH)/Compiler/PrimitiveGraph \
-I$(DEPTH)/Compiler/FrontEnd \
-I$(DEPTH)/Compiler/Optimizer \
-I$(DEPTH)/Compiler/CodeGenerator \
-I$(DEPTH)/Compiler/CodeGenerator/md \
-I$(DEPTH)/Compiler/CodeGenerator/md/$(CPU_ARCH) \
-I$(DEPTH)/Compiler/RegisterAllocator \
-I$(DEPTH)/Driver/StandAloneJava \
-I$(DEPTH)/Debugger \
$(NULL)
CXXSRCS = \
RegisterAllocator.cpp \
RegisterAllocatorTools.cpp \
DominatorGraph.cpp \
VirtualRegister.cpp \
BitSet.cpp \
SparseSet.cpp \
$(NULL)
include $(DEPTH)/config/rules.mk
libs:: $(MODULE)

View File

@@ -0,0 +1,392 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _PHI_NODE_REMOVER_H_
#define _PHI_NODE_REMOVER_H_
#include "Fundamentals.h"
#include "Pool.h"
#include "ControlGraph.h"
#include "DominatorGraph.h"
#include "VirtualRegister.h"
#include "RegisterPressure.h"
#include "Liveness.h"
#include "Instruction.h"
#include "InstructionEmitter.h"
#include "SparseSet.h"
#include <string.h>
//------------------------------------------------------------------------------
// RegisterNameNode -
struct RegisterNameNode
{
RegisterNameNode* next;
RegisterName newName;
Uint32 nextPushed;
};
//------------------------------------------------------------------------------
// CopyData -
struct CopyData
{
RegisterName source;
RegisterClassKind classKind;
Uint32 useCount;
bool isLiveOut;
RegisterName sourceNameToUse;
RegisterName temporaryName;
RegisterNameNode* newName;
};
//------------------------------------------------------------------------------
// PhiNodeRemover<RegisterPressure> -
template <class RegisterPressure>
struct PhiNodeRemover
{
// Replace the phi nodes by copy instructions.
static void replacePhiNodes(ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter);
};
// Split some of the critical edges and return true if there are still some
// in the graph after that.
//
static bool splitCriticalEdges(ControlGraph& /*cg*/)
{
// FIX: not implemented.
return true;
}
inline void pushName(Pool& pool, RegisterNameNode** stack, SparseSet& pushed, Uint32* nodeListPointer, RegisterName oldName, RegisterName newName)
{
RegisterNameNode& newNode = *new(pool) RegisterNameNode();
if (pushed.test(oldName))
(*stack)->newName = newName;
else {
newNode.newName = newName;
newNode.nextPushed = *nodeListPointer;
*nodeListPointer = oldName;
newNode.next = *stack;
*stack = &newNode;
pushed.set(oldName);
}
}
template <class RegisterPressure>
void PhiNodeRemover<RegisterPressure>::replacePhiNodes(ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter)
{
Pool& pool = controlGraph.pool;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
// Initialize the local variables.
//
// When we insert the copies we will also need to create new VirtualRegisters for
// the insertion of temporaries. The maximum number of temporary register will not
// exceed the number of phiNodes in the primitive graph.
Uint32 nameCount = vrManager.getSize();
Uint32 maxNameCount = nameCount;
for (Uint32 n = 0; n < nNodes; n++)
maxNameCount += nodes[n]->getPhiNodes().length();
// If the CFG contains some critical edges (backward edge which source has more than one
// outgoing edge and destination has more than one incomimg edge) then we need the liveness
// information to be able to insert temporary copies.
RegisterPressure::Set* liveOut = NULL;
if (splitCriticalEdges(controlGraph))
liveOut = Liveness<LowRegisterPressure>::analysis(controlGraph, nameCount).liveOut;
DominatorGraph dGraph(controlGraph);
SparseSet pushed(pool, maxNameCount);
SparseSet destinationList(pool, maxNameCount);
SparseSet workList(pool, maxNameCount);
CopyData* copyStats = new(pool) CopyData[maxNameCount];
memset(copyStats, '\0', maxNameCount*sizeof(CopyData));
struct NodeStack {
Uint32* next;
Uint32* limit;
Uint32 pushedList;
};
// Allocate the node stack and initialize the node stack pointer.
NodeStack* nodeStack = new(pool) NodeStack[nNodes + 1];
NodeStack* nodeStackPtr = nodeStack;
// We start by the begin node.
Uint32 startNode = 0;
Uint32* next = &startNode;
Uint32* limit = &startNode + 1;
while (true) {
if (next == limit) {
// If there are no more node in the sibling, we have to pop the current
// frame from the stack and update the copyStats of the pushed nodes.
//
if (nodeStackPtr == nodeStack)
// We are at the bottom of the stack and there are no more nodes
// to look at. We are done !
break;
--nodeStackPtr;
// We are done with all the children of this node in the dominator tree.
// We need to update the copy information of all the new names pushed
// during the walk over this node.
Uint32 pushedList = nodeStackPtr->pushedList;
while (pushedList != 0) {
Uint32 nextName = copyStats[pushedList].newName->nextPushed;
copyStats[pushedList].newName = copyStats[pushedList].newName->next;
pushedList = nextName;
}
// restore the previous frame.
next = nodeStackPtr->next;
limit = nodeStackPtr->limit;
} else {
Uint32 currentNode = *next++;
Uint32 pushedList = 0;
// Initialize the sets.
pushed.clear();
destinationList.clear();
// STEP1:
// Walk the instruction list and to replace all the instruction uses with their new name.
// If the instruction is a phi node and its defined register is alive at the end of this
// block then we push the defined register into the stack.
//
ControlNode& node = *nodes[currentNode];
RegisterPressure::Set* currentLiveOut = (liveOut != NULL) ? &liveOut[currentNode] : (RegisterPressure::Set*) 0;
InstructionList& phiNodes = node.getPhiNodeInstructions();
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
Instruction& phiNode = phiNodes.get(p);
InstructionUse* useEnd = phiNode.getInstructionUseEnd();
for (InstructionUse* usePtr = phiNode.getInstructionUseBegin(); usePtr < useEnd; usePtr++) {
assert(usePtr->isRegister());
RegisterName name = usePtr->getRegisterName();
if (copyStats[name].newName != NULL && copyStats[name].newName->newName != name)
usePtr->setRegisterName(copyStats[name].newName->newName);
}
if (currentLiveOut != NULL) {
// This is a phi node and we have to push its defined name if it is live
// at the end of the node. We only need to do this if the CFG has critical edges.
assert(phiNode.getInstructionDefineBegin() != phiNode.getInstructionDefineEnd() && phiNode.getInstructionDefineBegin()[0].isRegister());
RegisterName name = phiNode.getInstructionDefineBegin()[0].getRegisterName();
if (currentLiveOut->test(name))
pushName(pool, &(copyStats[name].newName), pushed, &pushedList, name, name);
}
}
InstructionList& instructions = node.getInstructions();
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
Instruction& instruction = instructions.get(i);
InstructionUse* useEnd = instruction.getInstructionUseEnd();
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
if (usePtr->isRegister()) {
RegisterName name = usePtr->getRegisterName();
if (copyStats[name].newName != NULL && copyStats[name].newName->newName != name)
usePtr->setRegisterName(copyStats[name].newName->newName);
}
}
// STEP2:
// Look at this node's successors' phiNodes. We keep track of the number of time
// a VR will be used by another copy instruction and insert each definition into the
// destinationList. This is the only pass over this node's successors as we will
// get all the information we need in the CopyData structures.
//
ControlEdge* successorEdgeEnd = node.getSuccessorsEnd();
for (ControlEdge* successorEdgePtr = node.getSuccessorsBegin(); successorEdgePtr < successorEdgeEnd; successorEdgePtr++) {
Uint32 useIndex = successorEdgePtr->getIndex();
ControlNode& successor = successorEdgePtr->getTarget();
// Look at its phi nodes. The phi nodes are at the top of the instruction list. We exit
// as soon as we find an instruction which is not a phi node
InstructionList& phiNodes = successor.getPhiNodeInstructions();
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
Instruction& phiNode = phiNodes.get(p);
assert((phiNode.getInstructionUseBegin() + useIndex) < phiNode.getInstructionUseEnd());
assert(phiNode.getInstructionDefineBegin() != phiNode.getInstructionDefineEnd());
InstructionUse& source = phiNode.getInstructionUseBegin()[useIndex];
InstructionDefine& destination = phiNode.getInstructionDefineBegin()[0];
assert(source.isRegister() && destination.isRegister());
RegisterName sourceName = source.getRegisterName();
RegisterName destinationName = destination.getRegisterName();
// Get the correct name for the source.
if (copyStats[sourceName].newName != NULL)
sourceName = copyStats[sourceName].newName->newName;
// Update the CopyData structures.
if ((sourceName != rnInvalid) && (sourceName != destinationName)) {
copyStats[destinationName].source = sourceName;
copyStats[destinationName].classKind = destination.getRegisterClass();
copyStats[destinationName].isLiveOut = (currentLiveOut != NULL) ? currentLiveOut->test(destinationName) : false;
copyStats[destinationName].sourceNameToUse = destinationName;
copyStats[sourceName].sourceNameToUse = sourceName;
copyStats[sourceName].useCount++;
destinationList.set(destinationName);
}
}
}
// STEP3:
// Insert into the worklist only the destination registers that will be not used in
// another copy instruction in this block.
//
assert(workList.getSize() == 0);
for (SparseSet::iterator d = destinationList.begin(); !destinationList.done(d); d = destinationList.advance(d)) {
Uint32 dest = destinationList.get(d);
if (copyStats[dest].useCount == 0)
workList.set(dest);
}
// STEP4:
// Insert the copy instructions.
//
Uint32 destinationListSize = destinationList.getSize();
InstructionList::iterator endOfTheNode = instructions.end();
// Find the right place to insert the copy instructions.
if (destinationListSize != 0)
while (instructions.get(endOfTheNode).getFlags() & ifControl)
endOfTheNode = instructions.retreat(endOfTheNode);
while (destinationListSize != 0) {
while(workList.getSize()) {
RegisterName destinationName = RegisterName(workList.getOne());
RegisterName sourceName = copyStats[destinationName].source;
workList.clear(destinationName);
if (copyStats[destinationName].isLiveOut && !copyStats[destinationName].temporaryName) {
// Lost copy problem.
copyStats[destinationName].isLiveOut = false;
RegisterName sourceName = destinationName;
RegisterClassKind classKind = copyStats[sourceName].classKind;
RegisterName destinationName = getName(vrManager.newVirtualRegister(classKind));
assert(destinationName < maxNameCount);
copyStats[destinationName].classKind = classKind;
copyStats[sourceName].useCount = 0;
// We need to insert a copy to a temporary register to keep the
// source register valid at the end of the node defining it.
// This copy will be inserted right after the phi node defining it.
RegisterName from = copyStats[sourceName].sourceNameToUse;
Instruction* definingPhiNode = vrManager.getVirtualRegister(from).getDefiningInstruction();
assert(definingPhiNode && (definingPhiNode->getFlags() & ifPhiNode) != 0);
RegisterID fromID = buildRegisterID(from, classKind);
RegisterID toID = buildRegisterID(destinationName, classKind);
Instruction& copy = emitter.newCopy(*definingPhiNode->getPrimitive(), fromID, toID);
vrManager.getVirtualRegister(destinationName).setDefiningInstruction(copy);
definingPhiNode->getPrimitive()->getContainer()->getInstructions().addFirst(copy);
copyStats[sourceName].temporaryName = destinationName;
copyStats[sourceName].sourceNameToUse = destinationName;
pushName(pool, &(copyStats[sourceName].newName), pushed, &pushedList, sourceName, destinationName);
}
// Insert the copy instruction at the end of the current node.
RegisterName from = copyStats[sourceName].sourceNameToUse;
RegisterClassKind classKind = copyStats[destinationName].classKind;
RegisterID fromID = buildRegisterID(from, classKind);
RegisterID toID = buildRegisterID(destinationName, classKind);
Instruction& copy = emitter.newCopy(*vrManager.getVirtualRegister(from).getDefiningInstruction()->getPrimitive(), fromID, toID);
instructions.insertAfter(copy, endOfTheNode);
endOfTheNode = instructions.advance(endOfTheNode);
copyStats[sourceName].useCount = 0;
if (destinationList.test(sourceName) && copyStats[sourceName].isLiveOut)
pushName(pool, &(copyStats[sourceName].newName), pushed, &pushedList, sourceName, destinationName);
copyStats[sourceName].isLiveOut = false;
copyStats[sourceName].sourceNameToUse = destinationName;
if (destinationList.test(sourceName))
workList.set(sourceName);
destinationList.clear(destinationName);
}
destinationListSize = destinationList.getSize();
if (destinationListSize != 0) {
RegisterName sourceName = RegisterName(destinationList.getOne());
RegisterName destinationName;
if (!copyStats[sourceName].temporaryName) {
// Cycle problem.
RegisterClassKind classKind = copyStats[sourceName].classKind;
destinationName = getName(vrManager.newVirtualRegister(classKind));
assert(destinationName < maxNameCount);
copyStats[destinationName].classKind = classKind;
copyStats[sourceName].temporaryName = destinationName;
// Insert the copy instruction at the end of the current node.
RegisterName from = copyStats[sourceName].sourceNameToUse;
RegisterID fromID = buildRegisterID(from, classKind);
RegisterID toID = buildRegisterID(destinationName, classKind);
Instruction& copy = emitter.newCopy(*vrManager.getVirtualRegister(from).getDefiningInstruction()->getPrimitive(), fromID, toID);
vrManager.getVirtualRegister(destinationName).setDefiningInstruction(copy);
instructions.insertAfter(copy, endOfTheNode);
endOfTheNode = instructions.advance(endOfTheNode);
} else
destinationName = copyStats[sourceName].temporaryName;
copyStats[sourceName].useCount = 0;
copyStats[sourceName].isLiveOut = false;
copyStats[sourceName].sourceNameToUse = destinationName;
pushName(pool, &(copyStats[sourceName].newName), pushed, &pushedList, sourceName, destinationName);
workList.set(sourceName);
}
}
nodeStackPtr->pushedList = pushedList;
nodeStackPtr->next = next;
nodeStackPtr->limit = limit;
++nodeStackPtr;
next = dGraph.getSuccessorsBegin(currentNode);
limit = dGraph.getSuccessorsEnd(currentNode);
}
}
}
#endif // _PHI_NODE_REMOVER_H_

View File

@@ -0,0 +1,155 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "Fundamentals.h"
#include "LogModule.h"
#include "RegisterAllocator.h"
#include "RegisterPressure.h"
#include "RegisterAllocatorTools.h"
#include "PhiNodeRemover.h"
#include "LiveRange.h"
#include "Liveness.h"
#include "InterferenceGraph.h"
#include "LiveRangeGraph.h"
#include "Coalescing.h"
#include "Spilling.h"
#include "Coloring.h"
#include "Splits.h"
class Pool;
class ControlGraph;
class VirtualRegisterManager;
class InstructionEmitter;
UT_DEFINE_LOG_MODULE(RegAlloc);
void RegisterAllocator::allocateRegisters(Pool& pool, ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter)
{
// Insert the phi node instructions. We want to do this to have a single defined register per instruction.
// If we keep the PhiNode (as a DataNode) and a PhiNode is of DoubleWordKind then we have to execute
// some special code for the high word annotation.
//
RegisterAllocatorTools::insertPhiNodeInstructions(controlGraph, emitter);
// Perform some tests on the instruction graph.
//
DEBUG_ONLY(RegisterAllocatorTools::testTheInstructionGraph(controlGraph, vrManager));
// Replace the phi node instructions by their equivalent copy instructions.
//
PhiNodeRemover<LowRegisterPressure>::replacePhiNodes(controlGraph, vrManager, emitter);
// Do the register allocation.
//
RegisterAllocator registerAllocator(pool, controlGraph, vrManager, emitter);
registerAllocator.doGraphColoring();
}
void RegisterAllocator::doGraphColoring()
{
// Initialize the liverange map.
//
initLiveRanges();
// Build the live ranges. We do this to compress the number of RegisterNames
// used in the insterference graph.
//
LiveRange<LowRegisterPressure>::build(*this);
// Remove unnecessary copies.
//
RegisterAllocatorTools::removeUnnecessaryCopies(*this);
for (Uint8 loop = 0; loop < 10; loop++) {
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("********* RegisterAllocator loop %d *********\n", loop));
while(true) {
// Build the interference graph.
//
iGraph.build();
// Coalesce the copy instructions.
//
if (!Coalescing<LowRegisterPressure>::coalesce(*this))
break;
}
// Print the interference graph.
//
DEBUG_LOG_ONLY(iGraph.printPretty(UT_LOG_MODULE(RegAlloc)));
// Calculate the spill costs.
//
Spilling<LowRegisterPressure>::calculateSpillCosts(*this);
DEBUG_LOG_ONLY(RegisterAllocatorTools::printSpillCosts(*this));
// Calculate the split costs.
//
Splits<LowRegisterPressure>::calculateSplitCosts(*this);
DEBUG_LOG_ONLY(RegisterAllocatorTools::printSplitCosts(*this));
// Build the live range graph.
//
lGraph.build();
DEBUG_LOG_ONLY(lGraph.printPretty(UT_LOG_MODULE(RegAlloc)));
// Color the graph. If it succeeds then we're done with the
// register allocation.
//
if (Coloring<LowRegisterPressure>::color(*this)) {
// Write the final colors in the instruction graph.
//
Coloring<LowRegisterPressure>::finalColoring(*this);
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("********** RegisterAllocator done **********\n"));
DEBUG_LOG_ONLY(RegisterAllocatorTools::printInstructions(*this));
return;
}
// We need to spill some registers.
//
Spilling<LowRegisterPressure>::insertSpillCode(*this);
// Insert the split instructions.
//
Splits<LowRegisterPressure>::insertSplitCode(*this);
// Update the live ranges.
//
// FIX
}
#ifdef DEBUG_LOG
RegisterAllocatorTools::updateInstructionGraph(*this);
RegisterAllocatorTools::printInstructions(*this);
#endif
fprintf(stderr, "!!! Coloring failed after 10 loops !!!\n");
abort();
}
void RegisterAllocator::initLiveRanges()
{
Uint32 count = this->nameCount;
RegisterName* name2range = new(pool) RegisterName[nameCount];
for (RegisterName r = RegisterName(1); r < count; r = RegisterName(r + 1))
name2range[r] = r;
this->name2range = name2range;
rangeCount = count;
}

View File

@@ -0,0 +1,88 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _REGISTER_ALLOCATOR_H_
#define _REGISTER_ALLOCATOR_H_
class Pool;
class ControlGraph;
class InstructionEmitter;
struct SpillCost;
struct SplitCost;
#include "Liveness.h"
#include "VirtualRegister.h"
#include "RegisterPressure.h" // This should included by Backend.cpp
#include "InterferenceGraph.h"
#include "LiveRangeGraph.h"
//template <class RegisterPressure>
class RegisterAllocator
{
public:
Pool& pool; //
ControlGraph& controlGraph; //
VirtualRegisterManager& vrManager; //
InstructionEmitter& emitter; //
RegisterName* name2range; //
RegisterName* color; //
SpillCost* spillCost; //
SparseSet* willSpill; //
SplitCost* splitCost; //
NameLinkedList** splitAround; //
InterferenceGraph<LowRegisterPressure> iGraph; //
LiveRangeGraph<LowRegisterPressure> lGraph; //
LivenessInfo<LowRegisterPressure> liveness; //
Uint32 nameCount; //
Uint32 rangeCount; //
bool splitFound; //
private:
//
//
void doGraphColoring();
public:
//
//
inline RegisterAllocator(Pool& pool, ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter);
//
//
bool canInterfere(RegisterName /*name1*/, RegisterName /*name2*/) const {return true;}
//
//
void initLiveRanges();
//
//
static void allocateRegisters(Pool& pool, ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter);
};
//
//
inline RegisterAllocator::RegisterAllocator(Pool& pool, ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter)
: pool(pool), controlGraph(controlGraph), vrManager(vrManager), emitter(emitter), iGraph(*this), lGraph(*this), nameCount(vrManager.getSize()) {}
#endif // _REGISTER_ALLOCATOR_H_

View File

@@ -0,0 +1,355 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "Fundamentals.h"
#include "LogModule.h"
#include "RegisterAllocatorTools.h"
#include "Pool.h"
#include "ControlGraph.h"
#include "ControlNodes.h"
#include "Primitives.h"
#include "InstructionEmitter.h"
#include "Instruction.h"
#include "RegisterAllocator.h"
#include "Spilling.h"
#include "Splits.h"
#include "BitSet.h"
UT_EXTERN_LOG_MODULE(RegAlloc);
#ifdef DEBUG
void RegisterAllocatorTools::testTheInstructionGraph(ControlGraph& controlGraph, VirtualRegisterManager& vrManager)
{
// Test the declared VirtualRegisters. The register allocator tries to condense the register universe.
// Any gap in the VirtualRegister names will be a loss of efficiency !!!!
Uint32 nameCount = vrManager.getSize();
BitSet registerSeen(controlGraph.pool, nameCount);
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
for (Uint32 n = 0; n < nNodes; n++) {
InstructionList& instructions = nodes[n]->getInstructions();
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
Instruction& instruction = instructions.get(i);
InstructionUse* useEnd = instruction.getInstructionUseEnd();
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
if (usePtr->isRegister())
registerSeen.set(usePtr->getRegisterName());
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
registerSeen.set(definePtr->getRegisterName());
}
InstructionList& phiNodes = nodes[n]->getPhiNodeInstructions();
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
Instruction& instruction = phiNodes.get(p);
InstructionUse* useEnd = instruction.getInstructionUseEnd();
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
if (usePtr->isRegister())
registerSeen.set(usePtr->getRegisterName());
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
registerSeen.set(definePtr->getRegisterName());
}
}
bool renameRegisters = false;
for (BitSet::iterator i = registerSeen.nextZero(0); !registerSeen.done(i); i = registerSeen.nextZero(i)) {
renameRegisters = true;
fprintf(stderr,
"WARNING: The VirtualRegister vr%d has been allocated during CodeGeneration but\n"
" is never used nor defined by any instruction in the instruction graph\n"
" PLEASE FIX \n",
i);
}
if (renameRegisters) {
Instruction** definingInstruction = new Instruction*[nameCount];
memset(definingInstruction, '\0', nameCount * sizeof(Instruction*));
RegisterName* newName = new RegisterName[nameCount];
memset(newName, '\0', nameCount * sizeof(RegisterName));
RegisterName nextName = RegisterName(1);
for (Uint32 n = 0; n < nNodes; n++) {
InstructionList& instructions = nodes[n]->getInstructions();
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
Instruction& instruction = instructions.get(i);
InstructionUse* useEnd = instruction.getInstructionUseEnd();
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
if (usePtr->isRegister()) {
RegisterName name = usePtr->getRegisterName();
if (newName[name] == rnInvalid) {
newName[name] = nextName;
definingInstruction[nextName] = vrManager.getVirtualRegister(name).getDefiningInstruction();
nextName = RegisterName(nextName + 1);
}
usePtr->setRegisterName(newName[name]);
}
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
if (definePtr->isRegister()) {
RegisterName name = definePtr->getRegisterName();
if (newName[name] == rnInvalid) {
newName[name] = nextName;
definingInstruction[nextName] = vrManager.getVirtualRegister(name).getDefiningInstruction();
nextName = RegisterName(nextName + 1);
}
definePtr->setRegisterName(newName[name]);
}
}
InstructionList& phiNodes = nodes[n]->getPhiNodeInstructions();
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
Instruction& instruction = phiNodes.get(p);
InstructionUse* useEnd = instruction.getInstructionUseEnd();
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
if (usePtr->isRegister()) {
RegisterName name = usePtr->getRegisterName();
if (newName[name] == rnInvalid) {
newName[name] = nextName;
definingInstruction[nextName] = vrManager.getVirtualRegister(name).getDefiningInstruction();
nextName = RegisterName(nextName + 1);
}
usePtr->setRegisterName(newName[name]);
}
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
if (definePtr->isRegister()) {
RegisterName name = definePtr->getRegisterName();
if (newName[name] == rnInvalid) {
newName[name] = nextName;
definingInstruction[nextName] = vrManager.getVirtualRegister(name).getDefiningInstruction();
nextName = RegisterName(nextName + 1);
}
definePtr->setRegisterName(newName[name]);
}
}
}
vrManager.setSize(nextName);
for (RegisterName r = RegisterName(1); r < nextName; r = RegisterName(r + 1))
vrManager.getVirtualRegister(r).definingInstruction = definingInstruction[r];
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("RegisterMap:\n"));
for (Uint32 i = 1; i < nameCount; i++)
if (newName[i] != 0)
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\tvr%d becomes vr%d.\n", i, newName[i]));
else
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\tvr%d is dead.\n", i));
delete newName;
delete definingInstruction;
}
}
#endif // DEBUG
void RegisterAllocatorTools::removeUnnecessaryCopies(RegisterAllocator& registerAllocator)
{
ControlGraph& controlGraph = registerAllocator.controlGraph;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
RegisterName* name2range = registerAllocator.name2range;
for (Uint32 n = 0; n < nNodes; n++) {
InstructionList& instructions = nodes[n]->getInstructions();
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i);) {
Instruction& instruction = instructions.get(i);
i = instructions.advance(i);
if (instruction.getFlags() & ifCopy) {
assert(instruction.getInstructionUseBegin() != instruction.getInstructionUseEnd() && instruction.getInstructionUseBegin()[0].isRegister());
assert(instruction.getInstructionDefineBegin() != instruction.getInstructionDefineEnd() && instruction.getInstructionDefineBegin()[0].isRegister());
RegisterName source = name2range[instruction.getInstructionUseBegin()[0].getRegisterName()];
RegisterName destination = name2range[instruction.getInstructionDefineBegin()[0].getRegisterName()];
if (source == destination)
instruction.remove();
}
}
}
}
void RegisterAllocatorTools::updateInstructionGraph(RegisterAllocator& registerAllocator)
{
ControlGraph& controlGraph = registerAllocator.controlGraph;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
RegisterName* name2range = registerAllocator.name2range;
for (Uint32 n = 0; n < nNodes; n++) {
InstructionList& instructions = nodes[n]->getInstructions();
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
Instruction& instruction = instructions.get(i);
InstructionUse* useEnd = instruction.getInstructionUseEnd();
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
if (usePtr->isRegister())
usePtr->setRegisterName(name2range[usePtr->getRegisterName()]);
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
definePtr->setRegisterName(name2range[definePtr->getRegisterName()]);
}
InstructionList& phiNodes = nodes[n]->getPhiNodeInstructions();
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
Instruction& instruction = phiNodes.get(p);
InstructionUse* useEnd = instruction.getInstructionUseEnd();
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
if (usePtr->isRegister())
usePtr->setRegisterName(name2range[usePtr->getRegisterName()]);
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
definePtr->setRegisterName(name2range[definePtr->getRegisterName()]);
}
}
}
void RegisterAllocatorTools::insertPhiNodeInstructions(ControlGraph& controlGraph, InstructionEmitter& emitter)
{
Pool& pool = controlGraph.pool;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
for (Uint32 n = 0; n < nNodes; n++) {
ControlNode& node = *nodes[n];
DoublyLinkedList<PhiNode>& phiNodes = node.getPhiNodes();
if (!phiNodes.empty()) {
// Set the index of the incoming edges.
Uint32 index = 0;
const DoublyLinkedList<ControlEdge>& predecessors = node.getPredecessors();
for (DoublyLinkedList<ControlEdge>::iterator p = predecessors.begin(); !predecessors.done(p); p = predecessors.advance(p))
predecessors.get(p).setIndex(index++);
// Insert the phi node instruction in the instruction list.
for (DoublyLinkedList<PhiNode>::iterator i = phiNodes.begin(); !phiNodes.done(i); i = phiNodes.advance(i)) {
PhiNode& phiNode = phiNodes.get(i);
ValueKind kind = phiNode.getKind();
if (!isStorableKind(kind))
continue;
RegisterClassKind classKind = rckGeneral; // FIX: get class kind from phi node kind.
Uint32 nInputs = phiNode.nInputs();
PhiNodeInstruction& phiNodeInstruction = *new(pool) PhiNodeInstruction(&phiNode, pool, nInputs);
emitter.defineProducer(phiNode, phiNodeInstruction, 0, classKind, drLow);
for (Uint32 whichInput = 0; whichInput < nInputs; whichInput++)
emitter.useProducer(phiNode.nthInputVariable(whichInput), phiNodeInstruction, whichInput, classKind, drLow);
node.addPhiNodeInstruction(phiNodeInstruction);
if (isDoublewordKind(kind)) {
PhiNodeInstruction& phiNodeInstruction = *new(pool) PhiNodeInstruction(&phiNode, pool, nInputs);
emitter.defineProducer(phiNode, phiNodeInstruction, 0, classKind, drHigh);
for (Uint32 whichInput = 0; whichInput < nInputs; whichInput++)
emitter.useProducer(phiNode.nthInputVariable(whichInput), phiNodeInstruction, whichInput, classKind, drHigh);
node.addPhiNodeInstruction(phiNodeInstruction);
}
}
}
}
}
#ifdef DEBUG_LOG
void RegisterAllocatorTools::printSpillCosts(RegisterAllocator& registerAllocator)
{
LogModuleObject log = UT_LOG_MODULE(RegAlloc);
Uint32 rangeCount = registerAllocator.rangeCount;
SpillCost* cost = registerAllocator.spillCost;
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Spill costs:\n"));
for (Uint32 i = 1; i < rangeCount; i++) {
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\trange %d : ", i));
if (cost[i].infinite)
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("infinite\n"));
else
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("%f\n", cost[i].cost));
}
}
void RegisterAllocatorTools::printSplitCosts(RegisterAllocator& registerAllocator)
{
LogModuleObject log = UT_LOG_MODULE(RegAlloc);
Uint32 rangeCount = registerAllocator.rangeCount;
SplitCost* cost = registerAllocator.splitCost;
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Split costs:\n"));
for (Uint32 i = 1; i < rangeCount; i++) {
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\trange %d : loads = %f stores = %f\n", i, cost[i].loads, cost[i].stores));
}
}
void RegisterAllocatorTools::printInstructions(RegisterAllocator& registerAllocator)
{
LogModuleObject log = UT_LOG_MODULE(RegAlloc);
ControlNode** nodes = registerAllocator.controlGraph.dfsList;
Uint32 nNodes = registerAllocator.controlGraph.nNodes;
for (Uint32 n = 0; n < nNodes; n++) {
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("N%d:\n", n));
InstructionList& phiNodes = nodes[n]->getPhiNodeInstructions();
InstructionList& instructions = nodes[n]->getInstructions();
if (!phiNodes.empty()) {
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (" PhiNodes:\n", n));
for(InstructionList::iterator i = phiNodes.begin(); !phiNodes.done(i); i = phiNodes.advance(i)) {
phiNodes.get(i).printPretty(log);
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
}
if (!instructions.empty())
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (" Instructions:\n", n));
}
for(InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
instructions.get(i).printPretty(log);
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
}
}
}
#endif // DEBUG_LOG

View File

@@ -0,0 +1,117 @@
// -*- mode:C++; tab-width:4; truncate-lines:t -*-
//
// CONFIDENTIAL AND PROPRIETARY SOURCE CODE OF
// NETSCAPE COMMUNICATIONS CORPORATION
// Copyright © 1996, 1997 Netscape Communications Corporation. All Rights
// Reserved. Use of this Source Code is subject to the terms of the
// applicable license agreement from Netscape Communications Corporation.
// The copyright notice(s) in this Source Code does not indicate actual or
// intended publication of this Source Code.
//
// $Id: RegisterAllocatorTools.h,v 1.1.2.1 1999-03-02 16:12:05 fur%netscape.com Exp $
//
#ifndef _REGISTER_ALLOCATOR_TOOLS_H_
#define _REGISTER_ALLOCATOR_TOOLS_H_
#include "LogModule.h"
#include "RegisterTypes.h"
#include <string.h>
class RegisterAllocator;
class ControlGraph;
class InstructionEmitter;
class VirtualRegisterManager;
struct RegisterAllocatorTools
{
//
//
static void insertPhiNodeInstructions(ControlGraph& controlGraph, InstructionEmitter& emitter);
//
//
static void updateInstructionGraph(RegisterAllocator& registerAllocator);
//
//
static void removeUnnecessaryCopies(RegisterAllocator& registerAllocator);
#ifdef DEBUG
//
//
static void testTheInstructionGraph(ControlGraph& controlGraph, VirtualRegisterManager& vrManager);
#endif // DEBUG
#ifdef DEBUG_LOG
//
//
static void printInstructions(RegisterAllocator& registerAllocator);
//
//
static void printSpillCosts(RegisterAllocator& registerAllocator);
//
//
static void printSplitCosts(RegisterAllocator& registerAllocator);
#endif // DEBUG_LOG
};
//
// FIX: this should go in a class (LookupTable ?)
//
inline RegisterName findRoot(RegisterName name, RegisterName* table)
{
RegisterName* stack = table;
RegisterName* stackPtr = stack;
RegisterName newName;
while((newName = table[name]) != name) {
*--stackPtr = name;
name = newName;
}
while (stackPtr != stack)
table[*stackPtr++] = name;
return name;
}
inline void init(RegisterName* table, Uint32 nameCount)
{
for (RegisterName r = RegisterName(0); r < nameCount; r = RegisterName(r + 1))
table[r] = r;
}
inline Uint32 compress(RegisterName* name2range, RegisterName* table, Uint32 nameCount, Uint32 tableSize)
{
RegisterName* liveRange = new RegisterName[tableSize];
memset(liveRange, '\0', tableSize * sizeof(RegisterName));
// Update the lookup table.
for (RegisterName r = RegisterName(1); r < tableSize; r = RegisterName(r + 1))
findRoot(r, table);
// Count the liveranges.
Uint32 liveRangeCount = 1;
for (RegisterName s = RegisterName(1); s < tableSize; s = RegisterName(s + 1))
if (table[s] == s)
liveRange[s] = RegisterName(liveRangeCount++);
for (RegisterName t = RegisterName(1); t < nameCount; t = RegisterName(t + 1))
name2range[t] = liveRange[table[name2range[t]]];
return liveRangeCount;
}
inline double doLog10(Uint32 power)
{
double log = 1.0;
while (power--)
log *= 10.0;
return log;
}
#endif // _REGISTER_ALLOCATOR_TOOLS_H_

View File

@@ -0,0 +1,38 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _REGISTER_ASSIGNER_H_
#define _REGISTER_ASSIGNER_H_
#include "Fundamentals.h"
#include "VirtualRegister.h"
class FastBitMatrix;
class RegisterAssigner
{
protected:
VirtualRegisterManager& vRegManager;
public:
RegisterAssigner(VirtualRegisterManager& vrMan) : vRegManager(vrMan) {}
virtual bool assignRegisters(FastBitMatrix& interferenceMatrix) = 0;
};
#endif /* _REGISTER_ASSIGNER_H_ */

View File

@@ -0,0 +1,25 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _REGISTER_CLASS_H_
#define _REGISTER_CLASS_H_
#include "Fundamentals.h"
#include "RegisterTypes.h"
#endif // _REGISTER_CLASS_H_

View File

@@ -0,0 +1,37 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _REGISTER_PRESSURE_H_
#define _REGISTER_PRESSURE_H_
#include "BitSet.h"
#include "HashSet.h"
struct LowRegisterPressure
{
typedef BitSet Set;
static const bool setIsOrdered = true;
};
struct HighRegisterPressure
{
typedef HashSet Set;
static const bool setIsOrdered = false;
};
#endif // _REGISTER_PRESSURE_H_

View File

@@ -0,0 +1,104 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _REGISTER_TYPES_H_
#define _REGISTER_TYPES_H_
#include "Fundamentals.h"
//------------------------------------------------------------------------------
// RegisterName -
//
enum RegisterName {
rnInvalid = 0,
};
//------------------------------------------------------------------------------
// RegisterClassKind -
//
enum RegisterClassKind {
rckInvalid = 0,
rckGeneral,
rckStackSlot,
nRegisterClassKind
};
//------------------------------------------------------------------------------
// RegisterID -
//
enum RegisterID {
invalidID = 0
};
//------------------------------------------------------------------------------
// RegisterKind -
//
enum RegisterKind {
rkCallerSave = 0,
rkCalleeSave,
};
struct NameLinkedList {
RegisterName name;
NameLinkedList* next;
};
#ifdef DEBUG
const registerNameMask = 0x03ffffff;
const coloredRegisterMask = 0x04000000;
const machineRegisterMask = 0x08000000;
const registerClassMask = 0xf0000000;
const registerNameShift = 0;
const coloredRegisterShift = 26;
const machineRegisterShift = 27;
const registerClassShift = 28;
#else // DEBUG
const registerNameMask = 0x0fffffff;
const registerClassMask = 0xf0000000;
const registerNameShift = 0;
const registerClassShift = 28;
#endif // DEBUG
inline RegisterClassKind getClass(RegisterID registerID) {return RegisterClassKind((registerID & registerClassMask) >> registerClassShift);}
inline RegisterName getName(RegisterID registerID) {return RegisterName((registerID & registerNameMask) >> registerNameShift);}
inline void setClass(RegisterID& registerID, RegisterClassKind classKind) {registerID = RegisterID((registerID & ~registerClassMask) | ((classKind << registerClassShift) & registerClassMask));}
inline void setName(RegisterID& registerID, RegisterName name) {assert((name & ~registerNameMask) == 0); registerID = RegisterID((registerID & ~registerNameMask) | ((name << registerNameShift) & registerNameMask));}
inline RegisterID buildRegisterID(RegisterName name, RegisterClassKind classKind) {return RegisterID(((classKind << registerClassShift) & registerClassMask) | ((name << registerNameShift) & registerNameMask));}
#ifdef DEBUG
inline bool isMachineRegister(RegisterID rid) {return (rid & machineRegisterMask) != 0;}
inline void setMachineRegister(RegisterID& rid) {rid = RegisterID(rid | machineRegisterMask);}
inline bool isColoredRegister(RegisterID rid) {return (rid & coloredRegisterMask) != 0;}
inline void setColoredRegister(RegisterID& rid) {rid = RegisterID(rid | coloredRegisterMask);}
#endif // DEBUG
#endif // _REGISTER_TYPES_H_

View File

@@ -0,0 +1,32 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "Fundamentals.h"
#include "SSATools.h"
#include "ControlGraph.h"
#include "VirtualRegister.h"
#include "Liveness.h"
void replacePhiNodes(ControlGraph& controlGraph, VirtualRegisterManager& vrManager)
{
if (!controlGraph.hasBackEdges)
return;
Liveness liveness(controlGraph.pool);
liveness.buildLivenessAnalysis(controlGraph, vrManager);
}

View File

@@ -0,0 +1,29 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _SSA_TOOLS_H_
#define _SSA_TOOLS_H_
#include "Fundamentals.h"
class ControlGraph;
class VirtualRegisterManager;
extern void replacePhiNodes(ControlGraph& controlGraph, VirtualRegisterManager& vrManager);
#endif // _SSA_TOOLS_H_

View File

@@ -0,0 +1,37 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "Fundamentals.h"
#include "SparseSet.h"
#include "BitSet.h"
#include "Pool.h"
#ifdef DEBUG_LOG
// Print the set.
//
void SparseSet::printPretty(LogModuleObject log)
{
Pool pool;
BitSet set(pool, universeSize);
for (Uint32 i = 0; i < count; i++)
set.set(node[i].element);
set.printPretty(log);
}
#endif // DEBUG_LOG

View File

@@ -0,0 +1,168 @@
// -*- mode:C++; tab-width:4; truncate-lines:t -*-
//
// CONFIDENTIAL AND PROPRIETARY SOURCE CODE OF
// NETSCAPE COMMUNICATIONS CORPORATION
// Copyright © 1996, 1997 Netscape Communications Corporation. All Rights
// Reserved. Use of this Source Code is subject to the terms of the
// applicable license agreement from Netscape Communications Corporation.
// The copyright notice(s) in this Source Code does not indicate actual or
// intended publication of this Source Code.
//
// $Id: SparseSet.h,v 1.1.2.1 1999-03-02 16:12:07 fur%netscape.com Exp $
//
#ifndef _SPARSE_SET_H_
#define _SPARSE_SET_H_
#include "Fundamentals.h"
#include "Pool.h"
#include "LogModule.h"
#include "BitSet.h"
class SparseSet
{
private:
struct Node {
Uint32 element;
Uint32 stackIndex;
};
Node* node;
Uint32 count;
Uint32 universeSize;
private:
// No copy constructor.
SparseSet(const SparseSet&);
// Check if the given set's universe is of the same size than this universe.
void checkUniverseCompatibility(const SparseSet& set) const {assert(set.universeSize == universeSize);}
// Check if pos is valid for this set's universe.
void checkMember(Int32 pos) const {assert(pos >=0 && Uint32(pos) < universeSize);}
public:
SparseSet(Pool& pool, Uint32 universeSize) : universeSize(universeSize) {node = new(pool) Node[universeSize]; clear();}
// Clear the sparse set.
void clear() {count = 0;}
// Clear the element at index.
inline void clear(Uint32 index);
// Set the element at index.
inline void set(Uint32 index);
// Return true if the element at index is set.
inline bool test(Uint32 index) const;
// Union with the given sparse set.
inline void or(const SparseSet& set);
// Intersection with the given sparse set.
inline void and(const SparseSet& set);
// Difference with the given sparse set.
inline void difference(const SparseSet& set);
// Copy set.
inline SparseSet& operator = (const SparseSet& set);
inline SparseSet& operator = (const BitSet& set);
// Return true if the sparse sets are identical.
friend bool operator == (const SparseSet& set1, const SparseSet& set2);
// Return true if the sparse sets are different.
friend bool operator != (const SparseSet& set1, const SparseSet& set2);
// Logical operators.
SparseSet& operator |= (const SparseSet& set) {or(set); return *this;}
SparseSet& operator &= (const SparseSet& set) {and(set); return *this;}
SparseSet& operator -= (const SparseSet& set) {difference(set); return *this;}
// Iterator to conform with the set API.
typedef Int32 iterator;
// Return the iterator for the first element of this set.
iterator begin() const {return count - 1;}
// Return the next iterator.
iterator advance(iterator pos) const {return --pos;}
// Return true if the iterator is at the end of the set.
bool done(iterator pos) const {return pos < 0;}
// Return the element for the given iterator;
Uint32 get(iterator pos) const {return node[pos].element;}
// Return one element of this set.
Uint32 getOne() const {assert(count > 0); return node[0].element;}
// Return the size of this set.
Uint32 getSize() const {return count;}
#ifdef DEBUG_LOG
// Print the set.
void printPretty(LogModuleObject log);
#endif // DEBUG_LOG
};
inline void SparseSet::clear(Uint32 element)
{
checkMember(element);
Uint32 count = this->count;
Node* node = this->node;
Uint32 stackIndex = node[element].stackIndex;
if ((stackIndex < count) && (node[stackIndex].element == element)) {
Uint32 stackTop = node[count - 1].element;
node[stackIndex].element = stackTop;
node[stackTop].stackIndex = stackIndex;
this->count = count - 1;
}
}
inline void SparseSet::set(Uint32 element)
{
checkMember(element);
Uint32 count = this->count;
Node* node = this->node;
Uint32 stackIndex = node[element].stackIndex;
if ((stackIndex >= count) || (node[stackIndex].element != element)) {
node[count].element = element;
node[element].stackIndex = count;
this->count = count + 1;
}
}
inline bool SparseSet::test(Uint32 element) const
{
checkMember(element);
Node* node = this->node;
Uint32 stackIndex = node[element].stackIndex;
return ((stackIndex < count) && (node[stackIndex].element == element));
}
inline SparseSet& SparseSet::operator = (const SparseSet& set)
{
checkUniverseCompatibility(set);
Uint32 sourceCount = set.getSize();
Node* node = this->node;
memcpy(node, set.node, sourceCount * sizeof(Node));
for (Uint32 i = 0; i < sourceCount; i++) {
Uint32 element = node[i].element;
node[element].stackIndex = i;
}
count = sourceCount;
return *this;
}
inline SparseSet& SparseSet::operator = (const BitSet& set)
{
// FIX: there's room for optimization here.
assert(universeSize == set.getSize());
clear();
for (Int32 i = set.firstOne(); i != -1; i = set.nextOne(i))
this->set(i);
return *this;
}
#endif // _SPARSE_SET_H_

View File

@@ -0,0 +1,270 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef NEW_LAURENTM_CODE
#define INCLUDE_EMITTER
#include "CpuInfo.h"
#include "Fundamentals.h"
#include "ControlNodes.h"
#include "Instruction.h"
#include "InstructionEmitter.h"
#include "Spilling.h"
void Spilling::
insertSpillCode(ControlNode** dfsList, Uint32 nNodes)
{
PRUint32 nVirtualRegisters = vRegManager.count();
FastBitSet currentLive(vRegManager.pool, nVirtualRegisters);
FastBitSet usedInThisInstruction(vRegManager.pool, nVirtualRegisters);
RegisterFifo grNeedLoad(nVirtualRegisters);
RegisterFifo fpNeedLoad(nVirtualRegisters);
for (PRInt32 n = nNodes - 1; n >= 0; n--)
{
PR_ASSERT(grNeedLoad.empty() & fpNeedLoad.empty());
ControlNode& node = *dfsList[n];
currentLive = node.liveAtEnd;
PRUint32 nGeneralAlive = 0;
PRUint32 nFloatingPointAlive = 0;
// Get the number of registers alive at the end of this node.
for (PRInt32 j = currentLive.firstOne(); j != -1; j = currentLive.nextOne(j))
{
VirtualRegister& vReg = vRegManager.getVirtualRegister(j);
if (vReg.spillInfo.willSpill)
{
currentLive.clear(j);
}
else
{
switch (vReg.getClass())
{
case vrcInteger:
nGeneralAlive++;
break;
case vrcFloatingPoint:
case vrcFixedPoint:
nFloatingPointAlive++;
break;
default:
break;
}
}
}
// if(node.dfsNum == 8) printf("\n________Begin Node %d________\n", node.dfsNum);
InstructionList& instructions = node.getInstructions();
for (InstructionList::iterator i = instructions.end(); !instructions.done(i); i = instructions.retreat(i))
{
Instruction& instruction = instructions.get(i);
InstructionUse* useBegin = instruction.getInstructionUseBegin();
InstructionUse* useEnd = instruction.getInstructionUseEnd();
InstructionUse* usePtr;
InstructionDefine* defBegin = instruction.getInstructionDefineBegin();
InstructionDefine* defEnd = instruction.getInstructionDefineEnd();
InstructionDefine* defPtr;
// if(node.dfsNum == 8) { printf("\n");
// instruction.printPretty(stdout);
// printf("\n"); }
// Handle definitions
for (defPtr = defBegin; defPtr < defEnd; defPtr++)
if (defPtr->isVirtualRegister())
{
VirtualRegister& vReg = defPtr->getVirtualRegister();
currentLive.clear(vReg.getRegisterIndex());
switch (vReg.getClass())
{
case vrcInteger:
nGeneralAlive--;
break;
case vrcFloatingPoint:
case vrcFixedPoint:
nFloatingPointAlive--;
break;
default:
break;
}
}
// Check for deaths
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
if (usePtr->isVirtualRegister())
{
VirtualRegister& vReg = usePtr->getVirtualRegister();
if (!currentLive.test(vReg.getRegisterIndex()))
// This is the last use of this register.
{
currentLive.set(vReg.getRegisterIndex());
switch (vReg.getClass())
{
case vrcInteger:
nGeneralAlive++;
while (/*(nGeneralAlive > NUMBER_OF_GREGISTERS) &&*/ !grNeedLoad.empty())
{
PRUint32 toLoad = grNeedLoad.get();
currentLive.clear(toLoad);
nGeneralAlive--;
VirtualRegister& nReg = vRegManager.getVirtualRegister(toLoad);
Instruction& lastUsingInstruction = *nReg.spillInfo.lastUsingInstruction;
emitter.emitLoadAfter(*lastUsingInstruction.getPrimitive(), lastUsingInstruction.getLinks().prev,
nReg.getAlias(), *nReg.equivalentRegister[vrcStackSlot]);
nReg.releaseSelf();
}
break;
case vrcFloatingPoint:
case vrcFixedPoint:
nFloatingPointAlive++;
while (/*(nFloatingPointAlive > NUMBER_OF_FPREGISTERS) &&*/ !fpNeedLoad.empty())
{
PRUint32 toLoad = fpNeedLoad.get();
currentLive.clear(toLoad);
nFloatingPointAlive--;
VirtualRegister& nReg = vRegManager.getVirtualRegister(toLoad);
Instruction& lastUsingInstruction = *nReg.spillInfo.lastUsingInstruction;
emitter.emitLoadAfter(*lastUsingInstruction.getPrimitive(), lastUsingInstruction.getLinks().prev,
nReg.getAlias(), *nReg.equivalentRegister[vrcStackSlot]);
nReg.releaseSelf();
}
break;
default:
break;
}
}
}
// Handle uses
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
if (usePtr->isVirtualRegister())
{
VirtualRegister& vReg = usePtr->getVirtualRegister();
PRUint32 registerIndex = vReg.getRegisterIndex();
if (vReg.spillInfo.willSpill) {
#if defined(GENERATE_FOR_X86)
if (!instruction.switchUseToSpill((usePtr - useBegin), *vReg.equivalentRegister[vrcStackSlot]))
#endif
{
switch (vReg.getClass())
{
case vrcInteger:
if (!grNeedLoad.test(registerIndex))
{
grNeedLoad.put(registerIndex);
VirtualRegister& alias = vRegManager.newVirtualRegister(vrcInteger);
if (vReg.isPreColored())
alias.preColorRegister(vReg.getPreColor());
/* if (vReg.hasSpecialInterference) {
alias.specialInterference.sizeTo(NUMBER_OF_REGISTERS);
alias.specialInterference = vReg.specialInterference;
alias.hasSpecialInterference = true;
} */
vReg.setAlias(alias);
vReg.retainSelf();
}
break;
case vrcFloatingPoint:
case vrcFixedPoint:
if (!fpNeedLoad.test(registerIndex))
{
fpNeedLoad.put(registerIndex);
VirtualRegister& alias = vRegManager.newVirtualRegister(vReg.getClass());
if (vReg.isPreColored())
alias.preColorRegister(vReg.getPreColor());
/*if (vReg.hasSpecialInterference) {
alias.specialInterference.sizeTo(NUMBER_OF_REGISTERS);
alias.specialInterference = vReg.specialInterference;
alias.hasSpecialInterference = true;
} */
vReg.setAlias(alias);
vReg.retainSelf();
}
break;
default:
break;
}
usePtr->getVirtualRegisterPtr().initialize(vReg.getAlias());
usedInThisInstruction.set(registerIndex);
vReg.spillInfo.lastUsingInstruction = &instruction;
}
currentLive.clear(registerIndex);
} else { // will not spill
currentLive.set(registerIndex);
}
}
// Handle definitions
for (defPtr = defBegin; defPtr < defEnd; defPtr++)
if (defPtr->isVirtualRegister())
{
VirtualRegister& vReg = defPtr->getVirtualRegister();
if (vReg.spillInfo.willSpill)
#if defined(GENERATE_FOR_X86)
if (!instruction.switchDefineToSpill((defPtr - defBegin), *vReg.equivalentRegister[vrcStackSlot]))
#endif
{
if (usedInThisInstruction.test(vReg.getRegisterIndex()))
// this virtualRegister was used in this instruction and is also defined. We need to move
// this virtual register to its alias first and then save it to memory.
{
emitter.emitStoreAfter(*instruction.getPrimitive(), &instruction.getLinks(),
vReg.getAlias(), *vReg.equivalentRegister[vrcStackSlot]);
defPtr->getVirtualRegisterPtr().initialize(vReg.getAlias());
}
else
{
emitter.emitStoreAfter(*instruction.getPrimitive(), &instruction.getLinks(),
vReg, *vReg.equivalentRegister[vrcStackSlot]);
}
}
}
}
while (!grNeedLoad.empty())
{
PRUint32 nl = grNeedLoad.get();
VirtualRegister& nlReg = vRegManager.getVirtualRegister(nl);
Instruction& lastUse = *nlReg.spillInfo.lastUsingInstruction;
emitter.emitLoadAfter(*lastUse.getPrimitive(), lastUse.getLinks().prev,
nlReg.getAlias(), *nlReg.equivalentRegister[vrcStackSlot]);
nlReg.releaseSelf();
}
while (!fpNeedLoad.empty())
{
PRUint32 nl = fpNeedLoad.get();
VirtualRegister& nlReg = vRegManager.getVirtualRegister(nl);
Instruction& lastUse = *nlReg.spillInfo.lastUsingInstruction;
emitter.emitLoadAfter(*lastUse.getPrimitive(), lastUse.getLinks().prev,
nlReg.getAlias(), *nlReg.equivalentRegister[vrcStackSlot]);
nlReg.releaseSelf();
}
// if(node.dfsNum == 8) printf("\n________End Node %d________\n", node.dfsNum);
}
}
#endif

View File

@@ -0,0 +1,269 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _SPILLING_H_
#define _SPILLING_H_
#include "Fundamentals.h"
#include <string.h>
#include "RegisterAllocator.h"
#include "RegisterAllocatorTools.h"
#include "ControlGraph.h"
#include "ControlNodes.h"
#include "Instruction.h"
#include "SparseSet.h"
template <class RegisterPressure>
class Spilling
{
private:
static void insertStoreAfter(Instruction& instruction, RegisterName name);
static void insertLoadBefore(Instruction& instruction, RegisterName name);
public:
static void calculateSpillCosts(RegisterAllocator& registerAllocator);
static void insertSpillCode(RegisterAllocator& registerAllocator);
};
struct SpillCost
{
double loads;
double stores;
double copies;
double cost;
bool infinite;
};
template <class RegisterPressure>
void Spilling<RegisterPressure>::insertSpillCode(RegisterAllocator& registerAllocator)
{
Uint32 rangeCount = registerAllocator.rangeCount;
RegisterName* name2range = registerAllocator.name2range;
Pool& pool = registerAllocator.pool;
SparseSet currentLive(pool, rangeCount);
SparseSet needLoad(pool, rangeCount);
SparseSet mustSpill(pool, rangeCount);
SparseSet& willSpill = *registerAllocator.willSpill;
ControlGraph& controlGraph = registerAllocator.controlGraph;
RegisterPressure::Set* liveOut = registerAllocator.liveness.liveOut;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
for (Uint32 n = 0; n < nNodes; n++) {
needLoad.clear();
currentLive = liveOut[n];
mustSpill = currentLive;
InstructionList& instructions = nodes[n]->getInstructions();
for (InstructionList::iterator i = instructions.end(); !instructions.done(i);) {
Instruction& instruction = instructions.get(i);
i = instructions.retreat(i);
InstructionUse* useBegin = instruction.getInstructionUseBegin();
InstructionUse* useEnd = instruction.getInstructionUseEnd();
InstructionUse* usePtr;
InstructionDefine* defineBegin = instruction.getInstructionDefineBegin();
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
InstructionDefine* definePtr;
bool foundLiveDefine = false;
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
if (definePtr->isRegister()) {
if (currentLive.test(name2range[definePtr->getRegisterName()])) {
foundLiveDefine = true;
break;
}
} else {
foundLiveDefine = true;
break;
}
if (defineBegin != defineEnd && !foundLiveDefine) {
fprintf(stderr, "!!! Removed instruction because it was only defining unused registers !!!\n");
instruction.remove();
}
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
if (definePtr->isRegister()) {
RegisterName range = name2range[definePtr->getRegisterName()];
#ifdef DEBUG
if (needLoad.test(range))
if (!mustSpill.test(range) && registerAllocator.spillCost[range].infinite && willSpill.test(range)) {
fprintf(stderr, "Tried to spill a register with infinite spill cost\n");
abort();
}
#endif // DEBUG
if (willSpill.test(range))
insertStoreAfter(instruction, range);
needLoad.clear(range);
}
if (instruction.getFlags() & ifCopy)
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
if (usePtr->isRegister()) {
RegisterName range = name2range[usePtr->getRegisterName()];
if (!currentLive.test(range))
for (SparseSet::iterator r = needLoad.begin(); !needLoad.done(r); r = needLoad.advance(r)) {
RegisterName load = RegisterName(needLoad.get(r));
if (willSpill.test(load))
insertLoadBefore(instruction, load);
mustSpill.set(load);
}
needLoad.clear();
}
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
currentLive.clear(name2range[definePtr->getRegisterName()]);
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
if (usePtr->isRegister()) {
RegisterName range = name2range[usePtr->getRegisterName()];
currentLive.set(range);
needLoad.set(range);
}
}
for (SparseSet::iterator l = needLoad.begin(); !needLoad.done(l); l = needLoad.advance(l)) {
RegisterName load = RegisterName(needLoad.get(l));
if (willSpill.test(load))
insertLoadBefore(instructions.first(), load);
}
}
}
template <class RegisterPressure>
void Spilling<RegisterPressure>::insertLoadBefore(Instruction& /*instruction*/, RegisterName name)
{
fprintf(stdout, "will insert load for range %d\n", name);
}
template <class RegisterPressure>
void Spilling<RegisterPressure>::insertStoreAfter(Instruction& /*instruction*/, RegisterName name)
{
fprintf(stdout, "will insert store for range %d\n", name);
}
template <class RegisterPressure>
void Spilling<RegisterPressure>::calculateSpillCosts(RegisterAllocator& registerAllocator)
{
Uint32 rangeCount = registerAllocator.rangeCount;
RegisterName* name2range = registerAllocator.name2range;
Pool& pool = registerAllocator.pool;
SparseSet live(pool, rangeCount);
SparseSet needLoad(pool, rangeCount);
SparseSet mustSpill(pool, rangeCount);
SparseSet alreadyStored(pool, rangeCount); // FIX: should get this from previous spilling.
SpillCost* cost = new SpillCost[rangeCount];
memset(cost, '\0', rangeCount * sizeof(SpillCost));
ControlGraph& controlGraph = registerAllocator.controlGraph;
RegisterPressure::Set* liveOut = registerAllocator.liveness.liveOut;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
for (Uint32 n = 0; n < nNodes; n++) {
ControlNode& node = *nodes[n];
double weight = doLog10(node.loopDepth);
needLoad.clear();
live = liveOut[n];
mustSpill = live;
InstructionList& instructions = nodes[n]->getInstructions();
for (InstructionList::iterator i = instructions.end(); !instructions.done(i); i = instructions.retreat(i)) {
Instruction& instruction = instructions.get(i);
InstructionUse* useBegin = instruction.getInstructionUseBegin();
InstructionUse* useEnd = instruction.getInstructionUseEnd();
InstructionUse* usePtr;
InstructionDefine* defineBegin = instruction.getInstructionDefineBegin();
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
InstructionDefine* definePtr;
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
if (definePtr->isRegister()) {
RegisterName range = name2range[definePtr->getRegisterName()];
if (needLoad.test(range))
if (!mustSpill.test(range))
cost[range].infinite = true;
if ((false /* !rematerializable(range) */ || !needLoad.test(range)) && !alreadyStored.test(range))
cost[range].stores += weight;
needLoad.clear(range);
}
if (instruction.getFlags() & ifCopy)
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
if (usePtr->isRegister())
if (!live.test(name2range[usePtr->getRegisterName()])) {
for (SparseSet::iterator l = needLoad.begin(); !needLoad.done(l); l = needLoad.advance(l)) {
Uint32 range = needLoad.get(l);
cost[range].loads += weight;
mustSpill.set(range);
}
needLoad.clear();
}
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
live.clear(name2range[definePtr->getRegisterName()]);
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
if (usePtr->isRegister()) {
RegisterName range = name2range[usePtr->getRegisterName()];
live.set(range);
needLoad.set(range);
}
if (instruction.getFlags() & ifCopy) {
assert(useBegin != useEnd && useBegin[0].isRegister());
assert(defineBegin != defineEnd && defineBegin[0].isRegister());
RegisterName source = name2range[useBegin[0].getRegisterName()];
RegisterName destination = name2range[defineBegin[0].getRegisterName()];
cost[source].copies += weight;
cost[destination].copies += weight;
}
}
for (SparseSet::iterator s = needLoad.begin(); !needLoad.done(s); s = needLoad.advance(s))
cost[needLoad.get(s)].loads += weight;
}
for (Uint32 r = 0; r < rangeCount; r++) {
SpillCost& c = cost[r];
c.cost = 2 * (c.loads + c.stores) - c.copies;
}
registerAllocator.spillCost = cost;
}
#endif // _SPILLING_H_

View File

@@ -0,0 +1,239 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _SPLITS_H_
#define _SPLITS_H_
#include "Fundamentals.h"
#include <string.h>
#include "Pool.h"
#include "ControlGraph.h"
#include "ControlNodes.h"
#include "Instruction.h"
#include "RegisterAllocator.h"
#include "RegisterAllocatorTools.h"
UT_EXTERN_LOG_MODULE(RegAlloc);
template <class RegisterPressure>
struct Splits
{
static void calculateSplitCosts(RegisterAllocator& registerAllocator);
static bool findSplit(RegisterAllocator& registerAllocator, RegisterName* color, RegisterName range);
static void insertSplitCode(RegisterAllocator& registerAllocator);
};
struct SplitCost
{
double loads;
double stores;
};
template <class RegisterPressure>
void Splits<RegisterPressure>::insertSplitCode(RegisterAllocator& /*registerAllocator*/)
{
// FIX
}
template <class RegisterPressure>
bool Splits<RegisterPressure>::findSplit(RegisterAllocator& registerAllocator, RegisterName* color, RegisterName range)
{
Pool& pool = registerAllocator.pool;
NameLinkedList** neighborsWithColor = new(pool) NameLinkedList*[6]; // FIX
memset(neighborsWithColor, '\0', 6 * sizeof(NameLinkedList*));
InterferenceGraph<RegisterPressure>& iGraph = registerAllocator.iGraph;
for (InterferenceVector* vector = iGraph.getInterferenceVector(range); vector != NULL; vector = vector->next)
for (Int32 i = vector->count - 1; i >=0; --i) {
RegisterName neighbor = vector->neighbors[i];
RegisterName c = color[neighbor];
if (c < 6) { // FIX
NameLinkedList* node = new(pool) NameLinkedList();
node->name = neighbor;
node->next = neighborsWithColor[c];
neighborsWithColor[c] = node;
}
}
bool splitAroundName = true;
LiveRangeGraph<RegisterPressure>& lGraph = registerAllocator.lGraph;
RegisterName bestColor = RegisterName(6); // FIX
double bestCost = registerAllocator.spillCost[range].cost;
SplitCost* splitCost = registerAllocator.splitCost;
for (RegisterName i = RegisterName(0); i < 6; i = RegisterName(i + 1)) { // FIX
double splitAroundNameCost = 0.0;
bool canSplitAroundName = true;
SplitCost& sCost = splitCost[range];
double addedCost = 2.0 * (sCost.stores + sCost.loads);
for (NameLinkedList* node = neighborsWithColor[i]; node != NULL; node = node->next) {
RegisterName neighbor = node->name;
if (lGraph.haveEdge(neighbor, range)) {
canSplitAroundName = false;
break;
} else
splitAroundNameCost += addedCost;
}
if (canSplitAroundName && splitAroundNameCost < bestCost) {
bestCost = splitAroundNameCost;
bestColor = i;
splitAroundName = true;
}
double splitAroundColorCost = 0.0;
bool canSplitAroundColor = true;
for (NameLinkedList* node = neighborsWithColor[i]; node != NULL; node = node->next) {
RegisterName neighbor = node->name;
if (lGraph.haveEdge(range, neighbor)) {
canSplitAroundColor = false;
break;
} else {
SplitCost& sCost = splitCost[neighbor];
double addedCost = 2.0 * (sCost.stores + sCost.loads);
splitAroundColorCost += addedCost;
}
}
if (canSplitAroundColor && splitAroundColorCost < bestCost) {
bestCost = splitAroundColorCost;
bestColor = i;
splitAroundName = false;
}
}
if (bestColor < RegisterName(6)) {
color[range] = bestColor;
registerAllocator.splitFound = true;
NameLinkedList** splitAround = registerAllocator.splitAround;
if (splitAroundName)
for (NameLinkedList* node = neighborsWithColor[bestColor]; node != NULL; node = node->next) {
NameLinkedList* newNode = new(pool) NameLinkedList();
newNode->name = node->name;
newNode->next = splitAround[range];
splitAround[range] = newNode;
}
else
for (NameLinkedList* node = neighborsWithColor[bestColor]; node != NULL; node = node->next) {
NameLinkedList* newNode = new(pool) NameLinkedList();
RegisterName neighbor = node->name;
newNode->name = range;
newNode->next = splitAround[neighbor];
splitAround[neighbor] = newNode;
}
trespass("Found a split");
return true;
}
return false;
}
template <class RegisterPressure>
void Splits<RegisterPressure>::calculateSplitCosts(RegisterAllocator& registerAllocator)
{
Pool& pool = registerAllocator.pool;
Uint32 rangeCount = registerAllocator.rangeCount;
RegisterName* name2range = registerAllocator.name2range;
SplitCost* splitCost = new(pool) SplitCost[rangeCount];
memset(splitCost, '\0', rangeCount * sizeof(SplitCost));
SparseSet live(pool, rangeCount);
RegisterPressure::Set* liveIn = registerAllocator.liveness.liveIn;
RegisterPressure::Set* liveOut = registerAllocator.liveness.liveOut;
ControlGraph& controlGraph = registerAllocator.controlGraph;
ControlNode** nodes = controlGraph.dfsList;
Uint32 nNodes = controlGraph.nNodes;
for (Uint32 n = 0; n < nNodes; n++) {
ControlNode& node = *nodes[n];
double weight = doLog10(node.loopDepth);
live = liveOut[n];
ControlEdge* successorsEnd = node.getSuccessorsEnd();
for (ControlEdge* successorsPtr = node.getSuccessorsBegin(); successorsPtr < successorsEnd; successorsPtr++) {
ControlNode& successor = successorsPtr->getTarget();
if (successor.getControlKind() != ckEnd) {
RegisterPressure::Set& successorLiveIn = liveIn[successor.dfsNum];
for (SparseSet::iterator i = live.begin(); !live.done(i); i = live.advance(i)) {
RegisterName name = RegisterName(live.get(i));
if (!successorLiveIn.test(name))
splitCost[name].loads += doLog10(successor.loopDepth);
}
}
}
InstructionList& instructions = node.getInstructions();
for (InstructionList::iterator i = instructions.end(); !instructions.done(i); i = instructions.retreat(i)) {
Instruction& instruction = instructions.get(i);
InstructionUse* useBegin = instruction.getInstructionUseBegin();
InstructionUse* useEnd = instruction.getInstructionUseEnd();
InstructionUse* usePtr;
InstructionDefine* defineBegin = instruction.getInstructionDefineBegin();
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
InstructionDefine* definePtr;
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
splitCost[name2range[definePtr->getRegisterName()]].stores += weight;
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
if (usePtr->isRegister()) {
RegisterName range = name2range[usePtr->getRegisterName()];
if (!live.test(range)) {
if (&instruction != &instructions.last())
splitCost[range].loads += weight;
else {
ControlEdge* successorsEnd = node.getSuccessorsEnd();
for (ControlEdge* successorsPtr = node.getSuccessorsBegin(); successorsPtr < successorsEnd; successorsPtr++)
splitCost[range].loads += doLog10(successorsPtr->getTarget().loopDepth);
}
}
}
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
if (definePtr->isRegister())
live.clear(name2range[definePtr->getRegisterName()]);
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
if (usePtr->isRegister())
live.set(name2range[usePtr->getRegisterName()]);
}
}
NameLinkedList** splitAround = new(pool) NameLinkedList*[rangeCount];
memset(splitAround, '\0', rangeCount * sizeof(NameLinkedList*));
registerAllocator.splitAround = splitAround;
registerAllocator.splitCost = splitCost;
registerAllocator.splitFound = false;
}
#endif // _SPLITS_H_

View File

@@ -0,0 +1,186 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "Fundamentals.h"
#include "HashTable.h"
#include "Timer.h"
#include "Pool.h"
static Pool pool; // Pool for the Timer class.
static HashTable<TimerEntry*> timerEntries(pool); // Timers hashtable.
const nTimersInABlock = 128; // Number of timers in a block.
static PRTime *timers = new(pool) PRTime[nTimersInABlock]; // A block of timers.
static Uint8 nextTimer = 0; // nextAvailableTimer.
//
// Calibrate the call to PR_Now().
//
static PRTime calibrate()
{
PRTime t = PR_Now();
PRTime& a = *new(pool) PRTime();
// Call 10 times the PR_Now() function.
a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now();
a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now();
t = (PR_Now() - t + 9) / 10;
return t;
}
static PRTime adjust = calibrate();
//
// Return the named timer..
//
TimerEntry& Timer::getTimerEntry(const char* name)
{
if (!timerEntries.exists(name)) {
TimerEntry* newEntry = new(pool) TimerEntry();
newEntry->accumulator = 0;
newEntry->running = false;
timerEntries.add(name, newEntry);
}
return *timerEntries[name];
}
//
// Return a reference to a new timer.
//
PRTime& Timer::getNewTimer()
{
if (nextTimer >= nTimersInABlock) {
timers = new(pool) PRTime[nTimersInABlock];
nextTimer = 0;
}
return timers[nextTimer++];
}
static Uint32 timersAreFrozen = 0;
//
// Start the named timer.
//
void Timer::start(const char* name)
{
if (timersAreFrozen)
return;
freezeTimers();
TimerEntry& timer = getTimerEntry(name);
PR_ASSERT(!timer.running);
timer.accumulator = 0;
timer.running = true;
timer.done = false;
unfreezeTimers();
}
//
// Stop the named timer.
//
void Timer::stop(const char* name)
{
if (timersAreFrozen)
return;
freezeTimers();
TimerEntry& timer = getTimerEntry(name);
PR_ASSERT(timer.running);
timer.running = false;
timer.done = true;
unfreezeTimers();
}
//
// Freeze all the running timers.
//
void Timer::freezeTimers()
{
PRTime when = PR_Now() - adjust;
if (timersAreFrozen == 0) {
Vector<TimerEntry*> entries = timerEntries;
Uint32 count = entries.size();
for (Uint32 i = 0; i < count; i++) {
TimerEntry& entry = *entries[i];
if (entry.running) {
entry.accumulator += (when - *entry.startTime);
}
}
}
timersAreFrozen++;
}
//
// Unfreeze all the running timers.
//
void Timer::unfreezeTimers()
{
PR_ASSERT(timersAreFrozen != 0);
timersAreFrozen--;
if (timersAreFrozen == 0) {
Vector<TimerEntry *> entries = timerEntries;
Uint32 count = entries.size();
PRTime& newStart = getNewTimer();
for (Uint32 i = 0; i < count; i++) {
TimerEntry& entry = *entries[i];
if (entry.running) {
entry.startTime = &newStart;
}
}
newStart = PR_Now();
}
}
//
// Print the named timer in the file f.
//
void Timer::print(FILE* f, const char *name)
{
if (timersAreFrozen)
return;
freezeTimers();
TimerEntry& timer = getTimerEntry(name);
PR_ASSERT(timer.done);
PRTime elapsed = timer.accumulator;
if (elapsed >> 32) {
fprintf(f, "[timer %s out of range]\n", name);
} else {
fprintf(f, "[%dus in %s]\n", Uint32(elapsed), name);
}
fflush(f);
unfreezeTimers();
}

View File

@@ -0,0 +1,80 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _TIMER_H_
#define _TIMER_H_
#include "Fundamentals.h"
#include "HashTable.h"
#include "prtime.h"
//
// Naming convention:
// As the class Timer contains only static methods, the timer's name should start with the
// module name. Otherwise starting 2 timers with the same name will assert.
//
#ifndef NO_TIMER
struct TimerEntry
{
PRTime *startTime; // Current time when we start the timer.
PRTime accumulator; // Time spent in this timer.
bool running; // True if the timer is running.
bool done; // True if the timer was running and was stopped.
};
class Timer
{
private:
// Return the named timer.
static TimerEntry& getTimerEntry(const char* name);
// Return a reference to a new Timer.
static PRTime& getNewTimer();
public:
// Start the timer.
static void start(const char* name);
// Stop the timer.
static void stop(const char* name);
// Freeze all the running timers.
static void freezeTimers();
// Unfreeze all the running timers.
static void unfreezeTimers();
// Print the timer.
static void print(FILE* f, const char *name);
};
inline void startTimer(const char* name) {Timer::start(name);}
inline void stopTimer(const char* name) {Timer::stop(name); Timer::print(stdout, name);}
#define START_TIMER_SAFE Timer::freezeTimers();
#define END_TIMER_SAFE Timer::unfreezeTimers();
#define TIMER_SAFE(x) START_TIMER_SAFE x; END_TIMER_SAFE
#else /* NO_TIMER */
inline void startTimer(const char* /*name*/) {}
inline void stopTimer(const char* /*name*/) {}
#define START_TIMER_SAFE
#define END_TIMER_SAFE
#define TIMER_SAFE(x) x;
#endif /* NO_TIMER */
#endif /* _TIMER_H_ */

View File

@@ -0,0 +1,40 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include "Fundamentals.h"
#include "VirtualRegister.h"
#include "Instruction.h"
//------------------------------------------------------------------------------
// VirtualRegister -
#ifdef MANUAL_TEMPLATES
template class IndexedPool<VirtualRegister>;
#endif
// Set the defining instruction.
//
void VirtualRegister::setDefiningInstruction(Instruction& instruction)
{
if (definingInstruction != NULL) {
if ((instruction.getFlags() & ifCopy) && (definingInstruction->getFlags() & ifPhiNode))
return;
}
definingInstruction = &instruction;
}

View File

@@ -0,0 +1,116 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef _VIRTUAL_REGISTER_H_
#define _VIRTUAL_REGISTER_H_
#include "Fundamentals.h"
#include "IndexedPool.h"
#include <string.h>
#include "RegisterTypes.h"
#include "RegisterClass.h"
//------------------------------------------------------------------------------
// VirtualRegister - 24b
class Instruction;
class VirtualRegister : public IndexedObject<VirtualRegister>
{
public:
Instruction* definingInstruction; // Instruction defining this VR.
// Initialize a VR of the given classKind.
VirtualRegister(RegisterClassKind /*classKind*/) : definingInstruction(NULL) {}
// Return the defining instruction for this VR.
Instruction* getDefiningInstruction() const {return definingInstruction;}
// Set the defining instruction.
void setDefiningInstruction(Instruction& insn);
};
// Return true if the VirtualRegisters are equals. The only way 2 VRs can be equal is if
// they have the same index. If they have the same index then they are at the same
// address in the indexed pool.
//
inline bool operator == (const VirtualRegister& regA, const VirtualRegister& regB) {return &regA == &regB;}
//------------------------------------------------------------------------------
// VirtualRegisterManager -
struct PreColoredRegister
{
RegisterID id;
RegisterName color;
};
class VirtualRegisterManager
{
private:
IndexedPool<VirtualRegister> registerPool;
PreColoredRegister machineRegister[6];
public:
VirtualRegisterManager()
{
for (Uint32 i = 0; i < 6; i++)
machineRegister[i].id = invalidID;
}
// Return the VirtualRegister at the given index.
VirtualRegister& getVirtualRegister(RegisterName name) const {return registerPool.get(name);}
// Return a new VirtualRegister.
RegisterID newVirtualRegister(RegisterClassKind classKind)
{
VirtualRegister& vReg = *new(registerPool) VirtualRegister(classKind);
RegisterID rid;
setName(rid, RegisterName(vReg.getIndex()));
setClass(rid, classKind);
return rid;
}
RegisterID newMachineRegister(RegisterName name, RegisterClassKind classKind)
{
RegisterID rid = machineRegister[name].id;
if (rid == invalidID) {
rid = newVirtualRegister(classKind);
DEBUG_ONLY(setMachineRegister(rid));
machineRegister[name].id = rid;
machineRegister[name].color = name;
}
return rid;
}
PreColoredRegister* getMachineRegistersBegin() const {return (PreColoredRegister*) machineRegister;} // FIX
PreColoredRegister* getMachineRegistersEnd() const {return (PreColoredRegister*) &machineRegister[6];} // FIX
// Return the VirtualRegister universe size.
Uint32 getSize() {return registerPool.getSize();}
void setSize(Uint32 size) {registerPool.setSize(size);}
};
#endif // _VIRTUAL_REGISTER_H_

View File

@@ -1,152 +0,0 @@
#
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the Netscape security libraries.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1994-2000
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Dr Vipul Gupta <vipul.gupta@sun.com> and
# Douglas Stebila <douglas@stebila.ca>, Sun Microsystems
# Laboratories
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
CORE_DEPTH = ../../..
MODULE = nss
ifndef FREEBL_RECURSIVE_BUILD
LIBRARY_NAME = freebl
else
ifdef USE_PURE_32
CORE_DEPTH = ../../../..
LIBRARY_NAME = freebl_pure32
else
LIBRARY_NAME = freebl_hybrid
endif
endif
# same version as rest of freebl
LIBRARY_VERSION = _3
DEFINES += -DSHLIB_SUFFIX=\"$(DLL_SUFFIX)\" -DSHLIB_PREFIX=\"$(DLL_PREFIX)\"
REQUIRES =
EXPORTS = \
blapit.h \
shsign.h \
ecl-exp.h \
$(NULL)
PRIVATE_EXPORTS = \
blapi.h \
secmpi.h \
secrng.h \
ec.h \
ecl.h \
ecl-curve.h \
$(NULL)
MPI_HDRS = mpi-config.h mpi.h mpi-priv.h mplogic.h mpprime.h logtab.h mp_gf2m.h
MPI_SRCS = mpprime.c mpmontg.c mplogic.c mpi.c mp_gf2m.c mpcpucache.c
ECL_HDRS = ecl-exp.h ecl.h ec2.h ecp.h ecl-priv.h
ifdef NSS_ENABLE_ECC
ECL_SRCS = ecl.c ecl_curve.c ecl_mult.c ecl_gf.c \
ec2_aff.c ec2_mont.c ec2_proj.c \
ec2_163.c ec2_193.c ec2_233.c \
ecp_aff.c ecp_jac.c ecp_mont.c \
ecp_192.c ecp_224.c \
ec_naf.c ecp_jm.c
else
ECL_SRCS = $(NULL)
endif
CSRCS = \
ldvector.c \
prng_fips1861.c \
sysrand.c \
sha_fast.c \
md2.c \
md5.c \
sha512.c \
alg2268.c \
arcfour.c \
arcfive.c \
desblapi.c \
des.c \
rijndael.c \
aeskeywrap.c \
dh.c \
ec.c \
pqg.c \
dsa.c \
rsa.c \
shvfy.c \
$(MPI_SRCS) \
$(ECL_SRCS) \
$(NULL)
ALL_CSRCS := $(CSRCS)
ALL_HDRS = \
blapi.h \
blapit.h \
des.h \
ec.h \
loader.h \
rijndael.h \
secmpi.h \
sha.h \
sha_fast.h \
shsign.h \
vis_proto.h \
$(NULL)
ifdef NSS_ENABLE_ECC
DEFINES += -DNSS_ENABLE_ECC
endif
ifdef AES_GEN_TBL
DEFINES += -DRIJNDAEL_GENERATE_TABLES
else
ifdef AES_GEN_TBL_M
DEFINES += -DRIJNDAEL_GENERATE_TABLES_MACRO
else
ifdef AES_GEN_VAL
DEFINES += -DRIJNDAEL_GENERATE_VALUES
else
ifdef AES_GEN_VAL_M
DEFINES += -DRIJNDAEL_GENERATE_VALUES_MACRO
else
DEFINES += -DRIJNDAEL_INCLUDE_TABLES
endif
endif
endif
endif

View File

@@ -1,278 +0,0 @@
#
# Makefile for MPI library
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
#
# The Initial Developer of the Original Code is
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Netscape Communications Corporation
# Richard C. Swift (swift@netscape.com)
# Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
#
# $Id: Makefile,v 1.21 2005-02-02 22:28:22 gerv%gerv.net Exp $
#
## Define CC to be the C compiler you wish to use. The GNU cc
## compiler (gcc) should work, at the very least
#CC=cc
#CC=gcc
##
## Define PERL to point to your local Perl interpreter. It
## should be Perl 5.x, although it's conceivable that Perl 4
## might work ... I haven't tested it.
##
#PERL=/usr/bin/perl
PERL=perl
include target.mk
CFLAGS+= $(XCFLAGS)
##
## Define LIBS to include any libraries you need to link against.
## If NO_TABLE is define, LIBS should include '-lm' or whatever is
## necessary to bring in the math library. Otherwise, it can be
## left alone, unless your system has other peculiar requirements.
##
LIBS=#-lmalloc#-lefence#-lm
##
## Define RANLIB to be the library header randomizer; you might not
## need this on some systems (just set it to 'echo' on these systems,
## such as IRIX)
##
RANLIB=echo
##
## This is the version string used for the documentation and
## building the distribution tarball. Don't mess with it unless
## you are releasing a new version
VERS=1.7p6
## ----------------------------------------------------------------------
## You probably don't need to change anything below this line...
##
##
## This is the list of source files that need to be packed into
## the distribution file
SRCS= mpi.c mpprime.c mplogic.c mp_gf2m.c mpmontg.c mpi-test.c primes.c tests/ \
utils/gcd.c utils/invmod.c utils/lap.c \
utils/ptab.pl utils/sieve.c utils/isprime.c\
utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
utils/bbsrand.c utils/prng.c utils/primegen.c \
utils/basecvt.c utils/makeprime.c\
utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
utils/mpi.h utils/mpprime.h mulsqr.c \
make-test-arrays test-arrays.txt all-tests make-logtab \
types.pl stats timetest multest
## These are the header files that go into the distribution file
HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h mp_gf2m.h \
mp_gf2m-priv.h utils/bbs_rand.h tests/mpi.h tests/mpprime.h
## These are the documentation files that go into the distribution file
DOCS=README doc utils/README utils/PRIMES
## This is the list of tools built by 'make tools'
TOOLS=gcd invmod isprime lap dec2hex hex2dec primegen prng \
basecvt fact exptmod pi makeprime identest
LIBOBJS = mpprime.o mpmontg.o mplogic.o mp_gf2m.o mpi.o $(AS_OBJS)
LIBHDRS = mpi-config.h mpi-priv.h mpi.h
APPHDRS = mpi-config.h mpi.h mplogic.h mp_gf2m.h mpprime.h
help:
@ echo ""
@ echo "The following targets can be built with this Makefile:"
@ echo ""
@ echo "libmpi.a - arithmetic and prime testing library"
@ echo "mpi-test - test driver (requires MP_IOFUNC)"
@ echo "tools - command line tools"
@ echo "doc - manual pages for tools"
@ echo "clean - clean up objects and such"
@ echo "distclean - get ready for distribution"
@ echo "dist - distribution tarball"
@ echo ""
.SUFFIXES: .c .o .i
.c.i:
$(CC) $(CFLAGS) -E $< > $@
#.c.o: $*.h $*.c
# $(CC) $(CFLAGS) -c $<
#---------------------------------------
$(LIBOBJS): $(LIBHDRS)
logtab.h: make-logtab
$(PERL) make-logtab > logtab.h
mpi.o: mpi.c logtab.h $(LIBHDRS)
mplogic.o: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
mp_gf2m.o: mp_gf2m.c mpi-priv.h mp_gf2m.h mp_gf2m-priv.h $(LIBHDRS)
mpmontg.o: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
mpprime.o: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
mpi_mips.o: mpi_mips.s
$(CC) -o $@ $(ASFLAGS) -c mpi_mips.s
mpi_sparc.o : montmulf.h
mpv_sparcv9.s: vis_64.il mpv_sparc.c
$(CC) -o $@ $(SOLARIS_FPU_FLAGS) -S vis_64.il mpv_sparc.c
mpv_sparcv8.s: vis_64.il mpv_sparc.c
$(CC) -o $@ $(SOLARIS_FPU_FLAGS) -S vis_32.il mpv_sparc.c
montmulfv8.o montmulfv9.o mpv_sparcv8.o mpv_sparcv9.o : %.o : %.s
$(CC) -o $@ $(SOLARIS_ASM_FLAGS) -c $<
# This rule is used to build the .s sources, which are then hand optimized.
#montmulfv8.s montmulfv9.s : montmulf%.s : montmulf%.il montmulf.c montmulf.h
# $(CC) -o $@ $(SOLARIS_ASM_FLAGS) -S montmulf$*.il montmulf.c
libmpi.a: $(LIBOBJS)
ar -cvr libmpi.a $(LIBOBJS)
$(RANLIB) libmpi.a
lib libs: libmpi.a
mpi.i: mpi.h
#---------------------------------------
MPTESTOBJS = mptest1.o mptest2.o mptest3.o mptest3a.o mptest4.o mptest4a.o \
mptest4b.o mptest6.o mptest7.o mptest8.o mptest9.o mptestb.o
MPTESTS = $(MPTESTOBJS:.o=)
$(MPTESTOBJS): mptest%.o: tests/mptest-%.c $(LIBHDRS)
$(CC) $(CFLAGS) -o $@ -c $<
$(MPTESTS): mptest%: mptest%.o libmpi.a
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
tests: mptest1 mptest2 mptest3 mptest3a mptest4 mptest4a mptest4b mptest6 \
mptestb bbsrand
utests: mptest7 mptest8 mptest9
#---------------------------------------
EXTRAOBJS = bbsrand.o bbs_rand.o prng.o
UTILOBJS = primegen.o metime.o identest.o basecvt.o fact.o exptmod.o pi.o \
makeprime.o gcd.o invmod.o lap.o isprime.o \
dec2hex.o hex2dec.o
UTILS = $(UTILOBJS:.o=)
$(UTILS): % : %.o libmpi.a
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
$(UTILOBJS) $(EXTRAOBJS): %.o : utils/%.c $(LIBHDRS)
$(CC) $(CFLAGS) -o $@ -c $<
prng: prng.o bbs_rand.o libmpi.a
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
bbsrand: bbsrand.o bbs_rand.o libmpi.a
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
utils: $(UTILS) prng bbsrand
#---------------------------------------
test-info.c: test-arrays.txt
$(PERL) make-test-arrays test-arrays.txt > test-info.c
mpi-test.o: mpi-test.c test-info.c $(LIBHDRS)
$(CC) $(CFLAGS) -o $@ -c $<
mpi-test: mpi-test.o libmpi.a
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
mdxptest.o: mdxptest.c $(LIBHDRS) mpi-priv.h
mdxptest: mdxptest.o libmpi.a
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
mulsqr.o: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h
$(CC) $(CFLAGS) -DMP_SQUARE=1 -o $@ -c mulsqr.c
mulsqr: mulsqr.o libmpi.a
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
#---------------------------------------
alltests: tests utests mpi-test
tools: $(TOOLS)
doc:
(cd doc; ./build)
clean:
rm -f *.o *.a *.i
rm -f core
rm -f *~ .*~
rm -f utils/*.o
rm -f utils/core
rm -f utils/*~ utils/.*~
clobber: clean
rm -f $(TOOLS) $(UTILS)
distclean: clean
rm -f mptest? mpi-test metime mulsqr karatsuba
rm -f mptest?a mptest?b
rm -f utils/mptest?
rm -f test-info.c logtab.h
rm -f libmpi.a
rm -f $(TOOLS)
dist: Makefile $(HDRS) $(SRCS) $(DOCS)
tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
pgps -ab mpi-$(VERS).tar
chmod +r mpi-$(VERS).tar.asc
gzip -9 mpi-$(VERS).tar
# END

View File

@@ -1,280 +0,0 @@
#
# Makefile.win - gmake Makefile for building MPI with VACPP on OS/2
#
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
#
# The Initial Developer of the Original Code is
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Netscape Communications Corporation
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
#
# $Id: Makefile.os2,v 1.3 2005-02-02 22:28:22 gerv%gerv.net Exp $
#
## Define CC to be the C compiler you wish to use. The GNU cc
## compiler (gcc) should work, at the very least
#CC=cc
#CC=gcc
CC=icc.exe
AS=alp.exe
##
## Define PERL to point to your local Perl interpreter. It
## should be Perl 5.x, although it's conceivable that Perl 4
## might work ... I haven't tested it.
##
#PERL=/usr/bin/perl
PERL=perl
##
## Define CFLAGS to contain any local options your compiler
## setup requires.
##
## Conditional compilation options are no longer here; see
## the file 'mpi-config.h' instead.
##
MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
#OS/2
AS_SRCS = mpi_x86.asm
MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
#CFLAGS= -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC \
-DDEBUG -D_DEBUG -UNDEBUG -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
#CFLAGS = -O2 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
-DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
#CFLAGS = -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
-DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
CFLAGS = /Ti+ -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
$(MPICMN)
ASFLAGS =
##
## Define LIBS to include any libraries you need to link against.
## If NO_TABLE is define, LIBS should include '-lm' or whatever is
## necessary to bring in the math library. Otherwise, it can be
## left alone, unless your system has other peculiar requirements.
##
LIBS=#-lmalloc#-lefence#-lm
##
## Define RANLIB to be the library header randomizer; you might not
## need this on some systems (just set it to 'echo' on these systems,
## such as IRIX)
##
RANLIB=echo
##
## This is the version string used for the documentation and
## building the distribution tarball. Don't mess with it unless
## you are releasing a new version
VERS=1.7p6
## ----------------------------------------------------------------------
## You probably don't need to change anything below this line...
##
##
## This is the list of source files that need to be packed into
## the distribution file
SRCS= mpi.c mpprime.c mplogic.c mpmontg.c mpi-test.c primes.c tests/ \
utils/gcd.c utils/invmod.c utils/lap.c \
utils/ptab.pl utils/sieve.c utils/isprime.c\
utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
utils/bbsrand.c utils/prng.c utils/primegen.c \
utils/basecvt.c utils/makeprime.c\
utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
utils/mpi.h utils/mpprime.h mulsqr.c \
make-test-arrays test-arrays.txt all-tests make-logtab \
types.pl stats timetest multest
## These are the header files that go into the distribution file
HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h \
utils/bbs_rand.h tests/mpi.h tests/mpprime.h
## These are the documentation files that go into the distribution file
DOCS=README doc utils/README utils/PRIMES
## This is the list of tools built by 'make tools'
TOOLS=gcd.exe invmod.exe isprime.exe lap.exe dec2hex.exe hex2dec.exe \
primegen.exe prng.exe basecvt.exe fact.exe exptmod.exe pi.exe makeprime.exe
AS_OBJS = $(AS_SRCS:.asm=.obj)
LIBOBJS = mpprime.obj mpmontg.obj mplogic.obj mpi.obj $(AS_OBJS)
LIBHDRS = mpi-config.h mpi-priv.h mpi.h
APPHDRS = mpi-config.h mpi.h mplogic.h mpprime.h
help:
@ echo ""
@ echo "The following targets can be built with this Makefile:"
@ echo ""
@ echo "mpi.lib - arithmetic and prime testing library"
@ echo "mpi-test.exe - test driver (requires MP_IOFUNC)"
@ echo "tools - command line tools"
@ echo "doc - manual pages for tools"
@ echo "clean - clean up objects and such"
@ echo "distclean - get ready for distribution"
@ echo "dist - distribution tarball"
@ echo ""
.SUFFIXES: .c .obj .i .lib .exe .asm
.c.i:
$(CC) $(CFLAGS) -E $< > $@
.c.obj:
$(CC) $(CFLAGS) -c $<
.asm.obj:
$(AS) $(ASFLAGS) $<
.obj.exe:
$(CC) $(CFLAGS) -Fo$@ $<
#---------------------------------------
$(LIBOBJS): $(LIBHDRS)
logtab.h: make-logtab
$(PERL) make-logtab > logtab.h
mpi.obj: mpi.c logtab.h $(LIBHDRS)
mplogic.obj: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
mpmontg.obj: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
mpprime.obj: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
mpi_mips.obj: mpi_mips.s
$(CC) -Fo$@ $(ASFLAGS) -c mpi_mips.s
mpi.lib: $(LIBOBJS)
ilib /out:mpi.lib $(LIBOBJS)
$(RANLIB) mpi.lib
lib libs: mpi.lib
#---------------------------------------
MPTESTOBJS = mptest1.obj mptest2.obj mptest3.obj mptest3a.obj mptest4.obj \
mptest4a.obj mptest4b.obj mptest6.obj mptest7.obj mptest8.obj mptest9.obj
MPTESTS = $(MPTESTOBJS:.obj=.exe)
$(MPTESTOBJS): mptest%.obj: tests/mptest-%.c $(LIBHDRS)
$(CC) $(CFLAGS) -Fo$@ -c $<
$(MPTESTS): mptest%.exe: mptest%.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
tests: mptest1.exe mptest2.exe mptest3.exe mptest3a.exe mptest4.exe \
mptest4a.exe mptest4b.exe mptest6.exe bbsrand.exe
utests: mptest7.exe mptest8.exe mptest9.exe
#---------------------------------------
EXTRAOBJS = bbsrand.obj bbs_rand.obj prng.obj
UTILOBJS = primegen.obj metime.obj identest.obj basecvt.obj fact.obj \
exptmod.obj pi.obj makeprime.obj karatsuba.obj gcd.obj invmod.obj lap.obj \
isprime.obj dec2hex.obj hex2dec.obj
UTILS = $(UTILOBJS:.obj=.exe)
$(UTILS): %.exe : %.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
$(UTILOBJS) $(EXTRAOBJS): %.obj : utils/%.c $(LIBHDRS)
$(CC) $(CFLAGS) -Fo$@ -c $<
prng.exe: prng.obj bbs_rand.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
bbsrand.exe: bbsrand.obj bbs_rand.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
utils: $(UTILS) prng.exe bbsrand.exe
#---------------------------------------
test-info.c: test-arrays.txt
$(PERL) make-test-arrays test-arrays.txt > test-info.c
mpi-test.obj: mpi-test.c test-info.c $(LIBHDRS)
$(CC) $(CFLAGS) -Fo$@ -c $<
mpi-test.exe: mpi-test.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
mdxptest.obj: mdxptest.c $(LIBHDRS) mpi-priv.h
mdxptest.exe: mdxptest.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
mulsqr.obj: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h
$(CC) $(CFLAGS) -DMP_SQUARE=1 -Fo$@ -c mulsqr.c
mulsqr.exe: mulsqr.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
#---------------------------------------
alltests: tests utests mpi-test.exe
tools: $(TOOLS)
doc:
(cd doc; ./build)
clean:
rm -f *.obj *.lib *.pdb *.ilk
cd utils; rm -f *.obj *.lib *.pdb *.ilk
distclean: clean
rm -f mptest? mpi-test metime mulsqr karatsuba
rm -f mptest?a mptest?b
rm -f utils/mptest?
rm -f test-info.c logtab.h
rm -f mpi.lib
rm -f $(TOOLS)
dist: Makefile $(HDRS) $(SRCS) $(DOCS)
tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
pgps -ab mpi-$(VERS).tar
chmod +r mpi-$(VERS).tar.asc
gzip -9 mpi-$(VERS).tar
print:
@echo LIBOBJS = $(LIBOBJS)
# END

View File

@@ -1,280 +0,0 @@
#
# Makefile.win - gmake Makefile for building MPI with MSVC on NT
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
#
# The Initial Developer of the Original Code is
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Netscape Communications Corporation
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
#
# $Id: Makefile.win,v 1.3 2005-02-02 22:28:22 gerv%gerv.net Exp $
#
## Define CC to be the C compiler you wish to use. The GNU cc
## compiler (gcc) should work, at the very least
#CC=cc
#CC=gcc
CC=cl.exe
AS=ml.exe
##
## Define PERL to point to your local Perl interpreter. It
## should be Perl 5.x, although it's conceivable that Perl 4
## might work ... I haven't tested it.
##
#PERL=/usr/bin/perl
PERL=perl
##
## Define CFLAGS to contain any local options your compiler
## setup requires.
##
## Conditional compilation options are no longer here; see
## the file 'mpi-config.h' instead.
##
MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC
#NT
AS_SRCS = mpi_x86.asm
MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
#CFLAGS= -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC \
-DDEBUG -D_DEBUG -UNDEBUG -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
#CFLAGS = -O2 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
-DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
#CFLAGS = -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
-DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
CFLAGS = -O2 -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
-DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
ASFLAGS = -Cp -Sn -Zi -coff -I.
##
## Define LIBS to include any libraries you need to link against.
## If NO_TABLE is define, LIBS should include '-lm' or whatever is
## necessary to bring in the math library. Otherwise, it can be
## left alone, unless your system has other peculiar requirements.
##
LIBS=#-lmalloc#-lefence#-lm
##
## Define RANLIB to be the library header randomizer; you might not
## need this on some systems (just set it to 'echo' on these systems,
## such as IRIX)
##
RANLIB=echo
##
## This is the version string used for the documentation and
## building the distribution tarball. Don't mess with it unless
## you are releasing a new version
VERS=1.7p6
## ----------------------------------------------------------------------
## You probably don't need to change anything below this line...
##
##
## This is the list of source files that need to be packed into
## the distribution file
SRCS= mpi.c mpprime.c mplogic.c mpmontg.c mpi-test.c primes.c tests/ \
utils/gcd.c utils/invmod.c utils/lap.c \
utils/ptab.pl utils/sieve.c utils/isprime.c\
utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
utils/bbsrand.c utils/prng.c utils/primegen.c \
utils/basecvt.c utils/makeprime.c\
utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
utils/mpi.h utils/mpprime.h mulsqr.c \
make-test-arrays test-arrays.txt all-tests make-logtab \
types.pl stats timetest multest
## These are the header files that go into the distribution file
HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h \
utils/bbs_rand.h tests/mpi.h tests/mpprime.h
## These are the documentation files that go into the distribution file
DOCS=README doc utils/README utils/PRIMES
## This is the list of tools built by 'make tools'
TOOLS=gcd.exe invmod.exe isprime.exe lap.exe dec2hex.exe hex2dec.exe \
primegen.exe prng.exe basecvt.exe fact.exe exptmod.exe pi.exe makeprime.exe
AS_OBJS = $(AS_SRCS:.asm=.obj)
LIBOBJS = mpprime.obj mpmontg.obj mplogic.obj mpi.obj $(AS_OBJS)
LIBHDRS = mpi-config.h mpi-priv.h mpi.h
APPHDRS = mpi-config.h mpi.h mplogic.h mpprime.h
help:
@ echo ""
@ echo "The following targets can be built with this Makefile:"
@ echo ""
@ echo "mpi.lib - arithmetic and prime testing library"
@ echo "mpi-test - test driver (requires MP_IOFUNC)"
@ echo "tools - command line tools"
@ echo "doc - manual pages for tools"
@ echo "clean - clean up objects and such"
@ echo "distclean - get ready for distribution"
@ echo "dist - distribution tarball"
@ echo ""
.SUFFIXES: .c .obj .i .lib .exe .asm
.c.i:
$(CC) $(CFLAGS) -E $< > $@
.c.obj:
$(CC) $(CFLAGS) -c $<
.asm.obj:
$(AS) $(ASFLAGS) -c $<
.obj.exe:
$(CC) $(CFLAGS) -Fo$@ $<
#---------------------------------------
$(LIBOBJS): $(LIBHDRS)
logtab.h: make-logtab
$(PERL) make-logtab > logtab.h
mpi.obj: mpi.c logtab.h $(LIBHDRS)
mplogic.obj: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
mpmontg.obj: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
mpprime.obj: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
mpi_mips.obj: mpi_mips.s
$(CC) -Fo$@ $(ASFLAGS) -c mpi_mips.s
mpi.lib: $(LIBOBJS)
ar -cvr mpi.lib $(LIBOBJS)
$(RANLIB) mpi.lib
lib libs: mpi.lib
#---------------------------------------
MPTESTOBJS = mptest1.obj mptest2.obj mptest3.obj mptest3a.obj mptest4.obj \
mptest4a.obj mptest4b.obj mptest6.obj mptest7.obj mptest8.obj mptest9.obj
MPTESTS = $(MPTESTOBJS:.obj=.exe)
$(MPTESTOBJS): mptest%.obj: tests/mptest-%.c $(LIBHDRS)
$(CC) $(CFLAGS) -Fo$@ -c $<
$(MPTESTS): mptest%.exe: mptest%.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
tests: mptest1.exe mptest2.exe mptest3.exe mptest3a.exe mptest4.exe \
mptest4a.exe mptest4b.exe mptest6.exe bbsrand.exe
utests: mptest7.exe mptest8.exe mptest9.exe
#---------------------------------------
EXTRAOBJS = bbsrand.obj bbs_rand.obj prng.obj
UTILOBJS = primegen.obj metime.obj identest.obj basecvt.obj fact.obj \
exptmod.obj pi.obj makeprime.obj karatsuba.obj gcd.obj invmod.obj lap.obj \
isprime.obj dec2hex.obj hex2dec.obj
UTILS = $(UTILOBJS:.obj=.exe)
$(UTILS): %.exe : %.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
$(UTILOBJS) $(EXTRAOBJS): %.obj : utils/%.c $(LIBHDRS)
$(CC) $(CFLAGS) -Fo$@ -c $<
prng.exe: prng.obj bbs_rand.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
bbsrand.exe: bbsrand.obj bbs_rand.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
utils: $(UTILS) prng.exe bbsrand.exe
#---------------------------------------
test-info.c: test-arrays.txt
$(PERL) make-test-arrays test-arrays.txt > test-info.c
mpi-test.obj: mpi-test.c test-info.c $(LIBHDRS)
$(CC) $(CFLAGS) -Fo$@ -c $<
mpi-test.exe: mpi-test.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
mdxptest.obj: mdxptest.c $(LIBHDRS) mpi-priv.h
mdxptest.exe: mdxptest.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
mulsqr.obj: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h
$(CC) $(CFLAGS) -DMP_SQUARE=1 -Fo$@ -c mulsqr.c
mulsqr.exe: mulsqr.obj mpi.lib $(LIBS)
$(CC) $(CFLAGS) -Fo$@ $^
#---------------------------------------
alltests: tests utests mpi-test.exe
tools: $(TOOLS)
doc:
(cd doc; ./build)
clean:
rm -f *.obj *.lib *.pdb *.ilk
cd utils; rm -f *.obj *.lib *.pdb *.ilk
distclean: clean
rm -f mptest? mpi-test metime mulsqr karatsuba
rm -f mptest?a mptest?b
rm -f utils/mptest?
rm -f test-info.c logtab.h
rm -f mpi.lib
rm -f $(TOOLS)
dist: Makefile $(HDRS) $(SRCS) $(DOCS)
tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
pgps -ab mpi-$(VERS).tar
chmod +r mpi-$(VERS).tar.asc
gzip -9 mpi-$(VERS).tar
print:
@echo LIBOBJS = $(LIBOBJS)
# END

View File

@@ -1,799 +0,0 @@
***** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the
License.
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
library.
The Initial Developer of the Original Code is
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Portions created by the Initial Developer are Copyright (C) 1997-2000
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
About the MPI Library
---------------------
The files 'mpi.h' and 'mpi.c' define a simple, arbitrary precision
signed integer arithmetic package. The implementation is not the most
efficient possible, but the code is small and should be fairly easily
portable to just about any machine that supports an ANSI C compiler,
as long as it is capable of at least 16-bit arithmetic (but also see
below for more on this).
This library was written with an eye to cryptographic applications;
thus, some care is taken to make sure that temporary values are not
left lying around in memory when they are no longer in use. This adds
some overhead for zeroing buffers before they are released back into
the free pool; however, it gives you the assurance that there is only
one copy of your important values residing in your process's address
space at a time. Obviously, it is difficult to guarantee anything, in
a pre-emptive multitasking environment, but this at least helps you
keep a lid on the more obvious ways your data can get spread around in
memory.
Using the Library
-----------------
To use the MPI library in your program, you must include the header:
#include "mpi.h"
This header provides all the type and function declarations you'll
need to use the library. Almost all the names defined by the library
begin with the prefix 'mp_', so it should be easy to keep them from
clashing with your program's namespace (he says, glibly, knowing full
well there are always pathological cases).
There are a few things you may want to configure about the library.
By default, the MPI library uses an unsigned short for its digit type,
and an unsigned int for its word type. The word type must be big
enough to contain at least two digits, for the primitive arithmetic to
work out. On my machine, a short is 2 bytes and an int is 4 bytes --
but if you have 64-bit ints, you might want to use a 4-byte digit and
an 8-byte word. I have tested the library using 1-byte digits and
2-byte words, as well. Whatever you choose to do, the things you need
to change are:
(1) The type definitions for mp_digit and mp_word.
(2) The macro DIGIT_FMT which tells mp_print() how to display a
single digit. This is just a printf() format string, so you
can adjust it appropriately.
(3) The macros DIGIT_MAX and MP_WORD_MAX, which specify the
largest value expressible in an mp_digit and an mp_word,
respectively.
Both the mp_digit and mp_word should be UNSIGNED integer types. The
code relies on having the full positive precision of the type used for
digits and words.
The remaining type definitions should be left alone, for the most
part. The code in the library does not make any significant
assumptions about the sizes of things, but there is little if any
reason to change the other parameters, so I would recommend you leave
them as you found them.
The library comes with a Perl script, 'types.pl', which will scan your
current Makefile settings, and attempt to find good definitions for
these types. It relies on a Unix sort of build environment, so it
probably won't work under MacOS or Windows, but it can be convenient
if you're porting to a new flavour of Unix. Just run 'types.pl' at
the command line, and it will spit out its results to the standard
output.
Conventions
-----------
Most functions in the library return a value of type mp_err. This
permits the library to communicate success or various kinds of failure
to the calling program. The return values currently defined are:
MP_OKAY - okay, operation succeeded, all's well
MP_YES - okay, the answer is yes (same as MP_OKAY)
MP_NO - okay, but answer is no (not MP_OKAY)
MP_MEM - operation ran out of memory
MP_RANGE - input parameter was out of range
MP_BADARG - an invalid input parameter was provided
MP_UNDEF - no output value is defined for this input
The only function which currently uses MP_UNDEF is mp_invmod().
Division by zero is undefined, but the division functions will return
MP_RANGE for a zero divisor. MP_BADARG usually means you passed a
bogus mp_int structure to the function. MP_YES and MP_NO are not used
by the library itself; they're defined so you can use them in your own
extensions.
If you need a readable interpretation of these error codes in your
program, you may also use the mp_strerror() function. This function
takes an mp_err as input, and returns a pointer to a human-readable
string describing the meaning of the error. These strings are stored
as constants within the library, so the caller should not attempt to
modify or free the memory associated with these strings.
The library represents values in signed-magnitude format. Values
strictly less than zero are negative, all others are considered
positive (zero is positive by fiat). You can access the 'sign' member
of the mp_int structure directly, but better is to use the mp_cmp_z()
function, to find out which side of zero the value lies on.
Most arithmetic functions have a single-digit variant, as well as the
full arbitrary-precision. An mp_digit is an unsigned value between 0
and DIGIT_MAX inclusive. The radix is available as RADIX. The number
of bits in a given digit is given as DIGIT_BIT.
Generally, input parameters are given before output parameters.
Unless otherwise specified, any input parameter can be re-used as an
output parameter, without confusing anything.
The basic numeric type defined by the library is an mp_int. Virtually
all the functions in the library take a pointer to an mp_int as one of
their parameters. An explanation of how to create and use these
<HR>
<A NAME="p23">
<H3>Problem 23:</H3>
structures follows. And so, without further ado...
Initialization and Cleanup
--------------------------
The basic numeric type defined by the library is an 'mp_int'.
However, it is not sufficient to simply declare a variable of type
mp_int in your program. These variables also need to be initialized
before they can be used, to allocate the internal storage they require
for computation.
This is done using one of the following functions:
mp_init(mp_int *mp);
mp_init_copy(mp_int *mp, mp_int *from);
mp_init_size(mp_int *mp, mp_size p);
Each of these requires a pointer to a structure of type mp_int. The
basic mp_init() simply initializes the mp_int to a default size, and
sets its value to zero. If you would like to initialize a copy of an
existing mp_int, use mp_init_copy(), where the 'from' parameter is the
mp_int you'd like to make a copy of. The third function,
mp_init_size(), permits you to specify how many digits of precision
should be preallocated for your mp_int. This can help the library
avoid unnecessary re-allocations later on.
The default precision used by mp_init() can be retrieved using:
precision = mp_get_prec();
This returns the number of digits that will be allocated. You can
change this value by using:
mp_set_prec(unsigned int prec);
Any positive value is acceptable -- if you pass zero, the default
precision will be re-set to the compiled-in library default (this is
specified in the header file 'mpi-config.h', and typically defaults to
8 or 16).
Just as you must allocate an mp_int before you can use it, you must
clean up the structure when you are done with it. This is performed
using the mp_clear() function. Remember that any mp_int that you
create as a local variable in a function must be mp_clear()'d before
that function exits, or else the memory allocated to that mp_int will
be orphaned and unrecoverable.
To set an mp_int to a given value, the following functions are given:
mp_set(mp_int *mp, mp_digit d);
mp_set_int(mp_int *mp, long z);
The mp_set() function sets the mp_int to a single digit value, while
mp_set_int() sets the mp_int to a signed long integer value.
To set an mp_int to zero, use:
mp_zero(mp_int *mp);
Copying and Moving
------------------
If you have two initialized mp_int's, and you want to copy the value
of one into the other, use:
mp_copy(from, to)
This takes care of clearing the old value of 'to', and copies the new
value into it. If 'to' is not yet initialized, use mp_init_copy()
instead (see above).
Note: The library tries, whenever possible, to avoid allocating
---- new memory. Thus, mp_copy() tries first to satisfy the needs
of the copy by re-using the memory already allocated to 'to'.
Only if this proves insufficient will mp_copy() actually
allocate new memory.
For this reason, if you know a priori that 'to' has enough
available space to hold 'from', you don't need to check the
return value of mp_copy() for memory failure. The USED()
macro tells you how many digits are used by an mp_int, and
the ALLOC() macro tells you how many are allocated.
If you have two initialized mp_int's, and you want to exchange their
values, use:
mp_exch(a, b)
This is better than using mp_copy() with a temporary, since it will
not (ever) touch the memory allocator -- it just swaps the exact
contents of the two structures. The mp_exch() function cannot fail;
if you pass it an invalid structure, it just ignores it, and does
nothing.
Basic Arithmetic
----------------
Once you have initialized your integers, you can operate on them. The
basic arithmetic functions on full mp_int values are:
mp_add(a, b, c) - computes c = a + b
mp_sub(a, b, c) - computes c = a - b
mp_mul(a, b, c) - computes c = a * b
mp_sqr(a, b) - computes b = a * a
mp_div(a, b, q, r) - computes q, r such that a = bq + r
mp_div_2d(a, d, q, r) - computes q = a / 2^d, r = a % 2^d
mp_expt(a, b, c) - computes c = a ** b
mp_2expt(a, k) - computes a = 2^k
mp_sqrt(a, c) - computes c = floor(sqrt(a))
The mp_div_2d() function efficiently computes division by powers of
two. Either the q or r parameter may be NULL, in which case that
portion of the computation will be discarded.
The algorithms used for some of the computations here are described in
the following files which are included with this distribution:
mul.txt Describes the multiplication algorithm
div.txt Describes the division algorithm
expt.txt Describes the exponentiation algorithm
sqrt.txt Describes the square-root algorithm
square.txt Describes the squaring algorithm
There are single-digit versions of most of these routines, as well.
In the following prototypes, 'd' is a single mp_digit:
mp_add_d(a, d, c) - computes c = a + d
mp_sub_d(a, d, c) - computes c = a - d
mp_mul_d(a, d, c) - computes c = a * d
mp_mul_2(a, c) - computes c = a * 2
mp_div_d(a, d, q, r) - computes q, r such that a = bq + r
mp_div_2(a, c) - computes c = a / 2
mp_expt_d(a, d, c) - computes c = a ** d
The mp_mul_2() and mp_div_2() functions take advantage of the internal
representation of an mp_int to do multiplication by two more quickly
than mp_mul_d() would. Other basic functions of an arithmetic variety
include:
mp_zero(a) - assign 0 to a
mp_neg(a, c) - negate a: c = -a
mp_abs(a, c) - absolute value: c = |a|
Comparisons
-----------
Several comparison functions are provided. Each of these, unless
otherwise specified, returns zero if the comparands are equal, < 0 if
the first is less than the second, and > 0 if the first is greater
than the second:
mp_cmp_z(a) - compare a <=> 0
mp_cmp_d(a, d) - compare a <=> d, d is a single digit
mp_cmp(a, b) - compare a <=> b
mp_cmp_mag(a, b) - compare |a| <=> |b|
mp_cmp_int(a, z) - compare a <=> z, z is a signed long integer
mp_isodd(a) - return nonzero if odd, zero otherwise
mp_iseven(a) - return nonzero if even, zero otherwise
Modular Arithmetic
------------------
Modular variations of the basic arithmetic functions are also
supported. These are available if the MP_MODARITH parameter in
mpi-config.h is turned on (it is by default). The modular arithmetic
functions are:
mp_mod(a, m, c) - compute c = a (mod m), 0 <= c < m
mp_mod_d(a, d, c) - compute c = a (mod d), 0 <= c < d (see below)
mp_addmod(a, b, m, c) - compute c = (a + b) mod m
mp_submod(a, b, m, c) - compute c = (a - b) mod m
mp_mulmod(a, b, m, c) - compute c = (a * b) mod m
mp_sqrmod(a, m, c) - compute c = (a * a) mod m
mp_exptmod(a, b, m, c) - compute c = (a ** b) mod m
mp_exptmod_d(a, d, m, c)- compute c = (a ** d) mod m
The mp_sqr() function squares its input argument. A call to mp_sqr(a,
c) is identical in meaning to mp_mul(a, a, c); however, if the
MP_SQUARE variable is set true in mpi-config.h (see below), then it
will be implemented with a different algorithm, that is supposed to
take advantage of the redundant computation that takes place during
squaring. Unfortunately, some compilers result in worse performance
on this code, so you can change the behaviour at will. There is a
utility program "mulsqr.c" that lets you test which does better on
your system.
The mp_sqrmod() function is analogous to the mp_sqr() function; it
uses the mp_sqr() function rather than mp_mul(), and then performs the
modular reduction. This probably won't help much unless you are doing
a lot of them.
See the file 'square.txt' for a synopsis of the algorithm used.
Note: The mp_mod_d() function computes a modular reduction around
---- a single digit d. The result is a single digit c.
Because an inverse is defined for a (mod m) if and only if (a, m) = 1
(that is, if a and m are relatively prime), mp_invmod() may not be
able to compute an inverse for the arguments. In this case, it
returns the value MP_UNDEF, and does not modify c. If an inverse is
defined, however, it returns MP_OKAY, and sets c to the value of the
inverse (mod m).
See the file 'redux.txt' for a description of the modular reduction
algorithm used by mp_exptmod().
Greatest Common Divisor
-----------------------
If The greates common divisor of two values can be found using one of the
following functions:
mp_gcd(a, b, c) - compute c = (a, b) using binary algorithm
mp_lcm(a, b, c) - compute c = [a, b] = ab / (a, b)
mp_xgcd(a, b, g, x, y) - compute g, x, y so that ax + by = g = (a, b)
Also provided is a function to compute modular inverses, if they
exist:
mp_invmod(a, m, c) - compute c = a^-1 (mod m), if it exists
The function mp_xgcd() computes the greatest common divisor, and also
returns values of x and y satisfying Bezout's identity. This is used
by mp_invmod() to find modular inverses. However, if you do not need
these values, you will find that mp_gcd() is MUCH more efficient,
since it doesn't need all the intermediate values that mp_xgcd()
requires in order to compute x and y.
The mp_gcd() (and mp_xgcd()) functions use the binary (extended) GCD
algorithm due to Josef Stein.
Input & Output Functions
------------------------
The following basic I/O routines are provided. These are present at
all times:
mp_read_radix(mp, str, r) - convert a string in radix r to an mp_int
mp_read_raw(mp, s, len) - convert a string of bytes to an mp_int
mp_radix_size(mp, r) - return length of buffer needed by mp_toradix()
mp_raw_size(mp) - return length of buffer needed by mp_toraw()
mp_toradix(mp, str, r) - convert an mp_int to a string of radix r
digits
mp_toraw(mp, str) - convert an mp_int to a string of bytes
mp_tovalue(ch, r) - convert ch to its value when taken as
a radix r digit, or -1 if invalid
mp_strerror(err) - get a string describing mp_err value 'err'
If you compile the MPI library with MP_IOFUNC defined, you will also
have access to the following additional I/O function:
mp_print(mp, ofp) - print an mp_int as text to output stream ofp
Note that mp_radix_size() returns a size in bytes guaranteed to be AT
LEAST big enough for the digits output by mp_toradix(). Because it
uses an approximation technique to figure out how many digits will be
needed, it may return a figure which is larger than necessary. Thus,
the caller should not rely on the value to determine how many bytes
will actually be written by mp_toradix(). The string mp_toradix()
creates will be NUL terminated, so the standard C library function
strlen() should be able to ascertain this for you, if you need it.
The mp_read_radix() and mp_toradix() functions support bases from 2 to
64 inclusive. If you require more general radix conversion facilities
than this, you will need to write them yourself (that's why mp_div_d()
is provided, after all).
Note: mp_read_radix() will accept as digits either capital or
---- lower-case letters. However, the current implementation of
mp_toradix() only outputs upper-case letters, when writing
bases betwee 10 and 36. The underlying code supports using
lower-case letters, but the interface stub does not have a
selector for it. You can add one yourself if you think it
is worthwhile -- I do not. Bases from 36 to 64 use lower-
case letters as distinct from upper-case. Bases 63 and
64 use the characters '+' and '/' as digits.
Note also that compiling with MP_IOFUNC defined will cause
inclusion of <stdio.h>, so if you are trying to write code
which does not depend on the standard C library, you will
probably want to avoid this option. This is needed because
the mp_print() function takes a standard library FILE * as
one of its parameters, and uses the fprintf() function.
The mp_toraw() function converts the integer to a sequence of bytes,
in big-endian ordering (most-significant byte first). Assuming your
bytes are 8 bits wide, this corresponds to base 256. The sign is
encoded as a single leading byte, whose value is 0 for zero or
positive values, or 1 for negative values. The mp_read_raw() function
reverses this process -- it takes a buffer of bytes, interprets the
first as a sign indicator (0 = zero/positive, nonzero = negative), and
the rest as a sequence of 1-byte digits in big-endian ordering.
The mp_raw_size() function returns the exact number of bytes required
to store the given integer in "raw" format (as described in the
previous paragraph). Zero is returned in case of error; a valid
integer will require at least three bytes of storage.
In previous versions of the MPI library, an "external representation
format" was supported. This was removed, however, because I found I
was never using it, it was not as portable as I would have liked, and
I decided it was a waste of space.
Other Functions
---------------
The files 'mpprime.h' and 'mpprime.c' define some routines which are
useful for divisibility testing and probabilistic primality testing.
The routines defined are:
mpp_divis(a, b) - is a divisible by b?
mpp_divis_d(a, d) - is a divisible by digit d?
mpp_random(a) - set a to random value at current precision
mpp_random_size(a, prec) - set a to random value at given precision
Note: The mpp_random() and mpp_random_size() functions use the C
---- library's rand() function to generate random values. It is
up to the caller to seed this generator before it is called.
These functions are not suitable for generating quantities
requiring cryptographic-quality randomness; they are intended
primarily for use in primality testing.
Note too that the MPI library does not call srand(), so your
application should do this, if you ever want the sequence
to change.
mpp_divis_vector(a, v, s, w) - is a divisible by any of the s digits
in v? If so, let w be the index of
that digit
mpp_divis_primes(a, np) - is a divisible by any of the first np
primes? If so, set np to the prime
which divided a.
mpp_fermat(a, d) - test if w^a = w (mod a). If so,
returns MP_YES, otherwise MP_NO.
mpp_pprime(a, nt) - perform nt iterations of the Rabin-
Miller probabilistic primality test
on a. Returns MP_YES if all tests
passed, or MP_NO if any test fails.
The mpp_fermat() function works based on Fermat's little theorem, a
consequence of which is that if p is a prime, and (w, p) = 1, then:
w^p = w (mod p)
Put another way, if w^p != w (mod p), then p is not prime. The test
is expensive to compute, but it helps to quickly eliminate an enormous
class of composite numbers prior to Rabin-Miller testing.
Building the Library
--------------------
The MPI library is designed to be as self-contained as possible. You
should be able to compile it with your favourite ANSI C compiler, and
link it into your program directly. If you are on a Unix system using
the GNU C compiler (gcc), the following should work:
% gcc -ansi -pedantic -Wall -O2 -c mpi.c
The file 'mpi-config.h' defines several configurable parameters for
the library, which you can adjust to suit your application. At the
time of this writing, the available options are:
MP_IOFUNC - Define true to include the mp_print() function,
which is moderately useful for debugging. This
implicitly includes <stdio.h>.
MP_MODARITH - Define true to include the modular arithmetic
functions. If you don't need modular arithmetic
in your application, you can set this to zero to
leave out all the modular routines.
MP_NUMTH - Define true to include number theoretic functions
such as mp_gcd(), mp_lcm(), and mp_invmod().
MP_LOGTAB - If true, the file "logtab.h" is included, which
is basically a static table of base 2 logarithms.
These are used to compute how big the buffers for
radix conversion need to be. If you set this false,
the library includes <math.h> and uses log(). This
typically forces you to link against math libraries.
MP_MEMSET - If true, use memset() to zero buffers. If you run
into weird alignment related bugs, set this to zero
and an explicit loop will be used.
MP_MEMCPY - If true, use memcpy() to copy buffers. If you run
into weird alignment bugs, set this to zero and an
explicit loop will be used.
MP_CRYPTO - If true, whenever arrays of digits are free'd, they
are zeroed first. This is useful if you're using
the library in a cryptographic environment; however,
it does add overhead to each free operation. For
performance, if you don't care about zeroing your
buffers, set this to false.
MP_ARGCHK - Set to 0, 1, or 2. This defines how the argument
checking macro, ARGCHK(), gets expanded. If this
is set to zero, ARGCHK() expands to nothing; no
argument checks are performed. If this is 1, the
ARGCHK() macro expands to code that returns MP_BADARG
or similar at runtime. If it is 2, ARGCHK() expands
to an assert() call that aborts the program on a
bad input.
MP_DEBUG - Turns on debugging output. This is probably not at
all useful unless you are debugging the library. It
tends to spit out a LOT of output.
MP_DEFPREC - The default precision of a newly-created mp_int, in
digits. The precision can be changed at runtime by
the mp_set_prec() function, but this is its initial
value.
MP_SQUARE - If this is set to a nonzero value, the mp_sqr()
function will use an alternate algorithm that takes
advantage of the redundant inner product computation
when both multiplicands are identical. Unfortunately,
with some compilers this is actually SLOWER than just
calling mp_mul() with the same argument twice. So
if you set MP_SQUARE to zero, mp_sqr() will be expan-
ded into a call to mp_mul(). This applies to all
the uses of mp_sqr(), including mp_sqrmod() and the
internal calls to s_mp_sqr() inside mpi.c
The program 'mulsqr' (mulsqr.c) can be used to test
which works best for your configuration. Set up the
CC and CFLAGS variables in the Makefile, then type:
make mulsqr
Invoke it with arguments similar to the following:
mulsqr 25000 1024
That is, 25000 products computed on 1024-bit values.
The output will compare the two timings, and recommend
a setting for MP_SQUARE. It is off by default.
If you would like to use the mp_print() function (see above), be sure
to define MP_IOFUNC in mpi-config.h. Many of the test drivers in the
'tests' subdirectory expect this to be defined (although the test
driver 'mpi-test' doesn't need it)
The Makefile which comes with the library should take care of building
the library for you, if you have set the CC and CFLAGS variables at
the top of the file appropriately. By default, they are set up to
use the GNU C compiler:
CC=gcc
CFLAGS=-ansi -pedantic -Wall -O2
If all goes well, the library should compile without warnings using
this combination. You should, of course, make whatever adjustments
you find necessary.
The MPI library distribution comes with several additional programs
which are intended to demonstrate the use of the library, and provide
a framework for testing it. There are a handful of test driver
programs, in the files named 'mptest-X.c', where X is a digit. Also,
there are some simple command-line utilities (in the 'utils'
directory) for manipulating large numbers. These include:
basecvt.c A radix-conversion program, supporting bases from
2 to 64 inclusive.
bbsrand.c A BBS (quadratic residue) pseudo-random number
generator. The file 'bbsrand.c' is just the driver
for the program; the real code lives in the files
'bbs_rand.h' and 'bbs_rand.c'
dec2hex.c Converts decimal to hexadecimal
gcd.c Computes the greatest common divisor of two values.
If invoked as 'xgcd', also computes constants x and
y such that (a, b) = ax + by, in accordance with
Bezout's identity.
hex2dec.c Converts hexadecimal to decimal
invmod.c Computes modular inverses
isprime.c Performs the Rabin-Miller probabilistic primality
test on a number. Values which fail this test are
definitely composite, and those which pass are very
likely to be prime (although there are no guarantees)
lap.c Computes the order (least annihilating power) of
a value v modulo m. Very dumb algorithm.
primegen.c Generates large (probable) primes.
prng.c A pseudo-random number generator based on the
BBS generator code in 'bbs_rand.c'
sieve.c Implements the Sieve of Eratosthenes, using a big
bitmap, to generate a list of prime numbers.
fact.c Computes the factorial of an arbitrary precision
integer (iterative).
exptmod.c Computes arbitrary precision modular exponentiation
from the command line (exptmod a b m -> a^b (mod m))
Most of these can be built from the Makefile that comes with the
library. Try 'make tools', if your environment supports it. (If you
are compiling on a Macintosh, I'm afraid you'll have to build them by
hand -- fortunately, this is not difficult -- the library itself
should compile just fine under Metrowerks CodeWarrior).
Testing the Library
-------------------
Automatic test vectors are included, in the form of a program called
'mpi-test'. To build this program and run all the tests, simply
invoke the shell script 'all-tests'. If all the tests pass, you
should see a message:
All tests passed
If something went wrong, you'll get:
One or more tests failed.
If this happens, scan back through the preceding lines, to see which
test failed. Any failure indicates a bug in the library, which needs
to be fixed before it will give accurate results. If you get any such
thing, please let me know, and I'll try to fix it. Please let me know
what platform and compiler you were using, as well as which test
failed. If a reason for failure was given, please send me that text
as well.
If you're on a system such as the Macintosh, where the standard Unix
build tools don't work, you can build the 'mpi-test' program manually,
and run it by hand. This is tedious and obnoxious, sorry.
Further manual testing can be performed by building the manual testing
programs, whose source is found in the 'tests' subdirectory. Each
test is in a source file called 'mptest-X.c'. The Makefile contains a
target to build all of them at once:
make tests
Read the comments at the top of each source file to see what the
driver is supposed to test. You probably don't need to do this; these
programs were only written to help me as I was developing the library.
The relevant files are:
mpi-test.c The source for the test driver
make-test-arrays A Perl script to generate some of the internal
data structures used by mpi-test.c
test-arrays.txt The source file for make-test-arrays
all-tests A Bourne shell script which runs all the
tests in the mpi-test suite
Running 'make mpi-test' should build the mpi-test program. If you
cannot use make, here is what needs to be done:
(1) Use 'make-test-arrays' to generate the file 'test-info.c' from
the 'test-arrays.txt' file. Since Perl can be found everywhere,
even on the Macintosh, this should be no trouble. Under Unix,
this looks like:
make-test-arrays test-arrays.txt > test-info.c
(2) Build the MPI library:
gcc -ansi -pedantic -Wall -c mpi.c
(3) Build the mpi-test program:
gcc -ansi -pedantic -Wall -o mpi-test mpi.o mpi-test.c
When you've got mpi-test, you can use 'all-tests' to run all the tests
made available by mpi-test. If any of them fail, there should be a
diagnostic indicating what went wrong. These are fairly high-level
diagnostics, and won't really help you debug the problem; they're
simply intended to help you isolate which function caused the problem.
If you encounter a problem of this sort, feel free to e-mail me, and I
will certainly attempt to help you debug it.
Note: Several of the tests hard-wired into 'mpi-test' operate under
---- the assumption that you are using at least a 16-bit mp_digit
type. If that is not true, several tests might fail, because
of range problems with the maximum digit value.
If you are using an 8-bit digit, you will also need to
modify the code for mp_read_raw(), which assumes that
multiplication by 256 can be done with mp_mul_d(), a
fact that fails when DIGIT_MAX is 255. You can replace
the call with s_mp_lshd(), which will give you the same
effect, and without doing as much work. :)
Acknowledgements:
----------------
The algorithms used in this library were drawn primarily from Volume
2 of Donald Knuth's magnum opus, _The Art of Computer Programming_,
"Semi-Numerical Methods". Barrett's algorithm for modular reduction
came from Menezes, Oorschot, and Vanstone's _Handbook of Applied
Cryptography_, Chapter 14.
Thanks are due to Tom St. Denis, for finding an obnoxious sign-related
bug in mp_read_raw() that made things break on platforms which use
signed chars.
About the Author
----------------
This software was written by Michael J. Fromberger. You can contact
the author as follows:
E-mail: <sting@linguist.dartmouth.edu>
Postal: 8000 Cummings Hall, Thayer School of Engineering
Dartmouth College, Hanover, New Hampshire, USA
PGP key: http://linguist.dartmouth.edu/~sting/keys/mjf.html
9736 188B 5AFA 23D6 D6AA BE0D 5856 4525 289D 9907
Last updated: 16-Jan-2000

View File

@@ -1,115 +0,0 @@
#!/bin/sh
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
#
# The Initial Developer of the Original Code is
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
# Portions created by the Initial Developer are Copyright (C) 1997
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
ECHO=/bin/echo
MAKE=gmake
$ECHO "\n** Running unit tests for MPI library\n"
# Build the mpi-test program, which comprises all the unit tests for
# the MPI library...
$ECHO "Bringing mpi-test up to date ... "
if $MAKE mpi-test ; then
:
else
$ECHO " "
$ECHO "Make failed to build mpi-test."
$ECHO " "
exit 1
fi
if [ ! -x mpi-test ] ; then
$ECHO " "
$ECHO "Cannot find 'mpi-test' program, testing cannot continue."
$ECHO " "
exit 1
fi
# Get the list of available test suites...
tests=`mpi-test list | awk '{print $1}'`
errs=0
# Run each test suite and check the result code of mpi-test
for test in $tests ; do
$ECHO "$test ... \c"
if mpi-test $test ; then
$ECHO "passed"
else
$ECHO "FAILED"
errs=1
fi
done
# If any tests failed, we'll stop at this point
if [ "$errs" = "0" ] ; then
$ECHO "All unit tests passed"
else
$ECHO "One or more tests failed"
exit 1
fi
# Now try to build the 'pi' program, and see if it can compute the
# first thousand digits of pi correctly
$ECHO "\n** Running other tests\n"
$ECHO "Bringing 'pi' up to date ... "
if $MAKE pi ; then
:
else
$ECHO "\nMake failed to build pi.\n"
exit 1
fi
if [ ! -x pi ] ; then
$ECHO "\nCannot find 'pi' program; testing cannot continue.\n"
exit 1
fi
./pi 2000 > /tmp/pi.tmp.$$
if cmp tests/pi2k.txt /tmp/pi.tmp.$$ ; then
$ECHO "Okay! The pi test passes."
else
$ECHO "Oops! The pi test failed. :("
exit 1
fi
rm -f /tmp/pi.tmp.$$
exit 0
# Here there be dragons

View File

@@ -1,11 +0,0 @@
Within this directory, each of the file listed below is licensed under
the terms given in the file LICENSE-MPL, also in this directory.
basecvt.pod
gcd.pod
invmod.pod
isprime.pod
lap.pod
mpi-test.pod
prime.txt
prng.pod

View File

@@ -1,32 +0,0 @@
The contents of this file are subject to the Mozilla Public
License Version 1.1 (the "License"); you may not use this file
except in compliance with the License. You may obtain a copy of
the License at http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS
IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
implied. See the License for the specific language governing
rights and limitations under the License.
The Original Code is the Netscape security libraries.
The Initial Developer of the Original Code is Netscape
Communications Corporation. Portions created by Netscape are
Copyright (C) 1994-2000 Netscape Communications Corporation. All
Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the
terms of the GNU General Public License Version 2 or later (the
"GPL"), in which case the provisions of the GPL are applicable
instead of those above. If you wish to allow use of your
version of this file only under the terms of the GPL and not to
allow others to use your version of this file under the MPL,
indicate your decision by deleting the provisions above and
replace them with the notice and other provisions required by
the GPL. If you do not delete the provisions above, a recipient
may use your version of this file under either the MPL or the
GPL.

View File

@@ -1,63 +0,0 @@
=head1 NAME
basecvt - radix conversion for arbitrary precision integers
=head1 SYNOPSIS
basecvt <ibase> <obase> [values]
=head1 DESCRIPTION
The B<basecvt> program is a command-line tool for converting integers
of arbitrary precision from one radix to another. The current version
supports radix values from 2 (binary) to 64, inclusive. The first two
command line arguments specify the input and output radix, in base 10.
Any further arguments are taken to be integers notated in the input
radix, and these are converted to the output radix. The output is
written, one integer per line, to standard output.
When reading integers, only digits considered "valid" for the input
radix are considered. Processing of an integer terminates when an
invalid input digit is encountered. So, for example, if you set the
input radix to 10 and enter '10ACF', B<basecvt> would assume that you
had entered '10' and ignore the rest of the string.
If no values are provided, no output is written, but the program
simply terminates with a zero exit status. Error diagnostics are
written to standard error in the event of out-of-range radix
specifications. Regardless of the actual values of the input and
output radix, the radix arguments are taken to be in base 10 (decimal)
notation.
=head1 DIGITS
For radices from 2-10, standard ASCII decimal digits 0-9 are used for
both input and output. For radices from 11-36, the ASCII letters A-Z
are also included, following the convention used in hexadecimal. In
this range, input is accepted in either upper or lower case, although
on output only lower-case letters are used.
For radices from 37-62, the output includes both upper- and lower-case
ASCII letters, and case matters. In this range, case is distinguished
both for input and for output values.
For radices 63 and 64, the characters '+' (plus) and '/' (forward
solidus) are also used. These are derived from the MIME base64
encoding scheme. The overall encoding is not the same as base64,
because the ASCII digits are used for the bottom of the range, and the
letters are shifted upward; however, the output will consist of the
same character set.
This input and output behaviour is inherited from the MPI library used
by B<basecvt>, and so is not configurable at runtime.
=head1 SEE ALSO
dec2hex(1), hex2dec(1)
=head1 AUTHOR
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Thayer School of Engineering, Hanover, New Hampshire, USA
$Date: 2000-07-14 00:44:31 $

View File

@@ -1,66 +0,0 @@
#!/bin/sh
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
#
# The Initial Developer of the Original Code is
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Netscape Communications Corporation
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
# $Id: build,v 1.3 2005-02-02 22:28:22 gerv%gerv.net Exp $
#
VERS="1.7p6"
SECT="1"
NAME="MPI Tools"
echo "Building manual pages ..."
case $# in
0)
files=`ls *.pod`
;;
*)
files=$*
;;
esac
for name in $files
do
echo -n "$name ... "
# sname=`noext $name`
sname=`basename $name .pod`
pod2man --section="$SECT" --center="$NAME" --release="$VERS" $name > $sname.$SECT
echo "(done)"
done
echo "Finished building."

View File

@@ -1,101 +0,0 @@
Division
This describes the division algorithm used by the MPI library.
Input: a, b; a > b
Compute: Q, R; a = Qb + R
The input numbers are normalized so that the high-order digit of b is
at least half the radix. This guarantees that we have a reasonable
way to guess at the digits of the quotient (this method was taken from
Knuth, vol. 2, with adaptations).
To normalize, test the high-order digit of b. If it is less than half
the radix, multiply both a and b by d, where:
radix - 1
d = -----------
bmax + 1
...where bmax is the high-order digit of b. Otherwise, set d = 1.
Given normalize values for a and b, let the notation a[n] denote the
nth digit of a. Let #a be the number of significant figures of a (not
including any leading zeroes).
Let R = 0
Let p = #a - 1
while(p >= 0)
do
R = (R * radix) + a[p]
p = p - 1
while(R < b and p >= 0)
if(R < b)
break
q = (R[#R - 1] * radix) + R[#R - 2]
q = q / b[#b - 1]
T = b * q
while(T > L)
q = q - 1
T = T - b
endwhile
L = L - T
Q = (Q * radix) + q
endwhile
At this point, Q is the quotient, and R is the normalized remainder.
To denormalize R, compute:
R = (R / d)
At this point, you are finished.
------------------------------------------------------------------
***** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the
License.
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
library.
The Initial Developer of the Original Code is
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Portions created by the Initial Developer are Copyright (C) 1998, 2000
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
$Id: div.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $

View File

@@ -1,132 +0,0 @@
Exponentiation
For exponentiation, the MPI library uses a simple and fairly standard
square-and-multiply method. The algorithm is this:
Input: a, b
Output: a ** b
s = 1
while(b != 0)
if(b is odd)
s = s * a
endif
b = b / 2
x = x * x
endwhile
return s
The modular exponentiation is done the same way, except replacing:
s = s * a
with
s = (s * a) mod m
and replacing
x = x * x
with
x = (x * x) mod m
Here is a sample exponentiation using the MPI library, as compared to
the same problem solved by the Unix 'bc' program on my system:
Computation of 2,381,283 ** 235
'bc' says:
4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
CFFF2E1AC93F3CA264A1B
MPI says:
4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
CFFF2E1AC93F3CA264A1B
Diff says:
% diff bc.txt mp.txt
%
------------------------------------------------------------------
***** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the
License.
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
library.
The Initial Developer of the Original Code is
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Portions created by the Initial Developer are Copyright (C) 1998, 2000
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
$Id: expt.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $

View File

@@ -1,27 +0,0 @@
=head1 NAME
gcd - compute greatest common divisor of two integers
=head1 SYNOPSIS
gcd <a> <b>
=head1 DESCRIPTION
The B<gcd> program computes the greatest common divisor of two
arbitrary-precision integers I<a> and I<b>. The result is written in
standard decimal notation to the standard output.
If I<b> is zero, B<gcd> will print an error message and exit.
=head1 SEE ALSO
invmod(1), isprime(1), lap(1)
=head1 AUTHOR
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Thayer School of Engineering, Hanover, New Hampshire, USA
$Date: 2000-07-14 00:44:32 $

View File

@@ -1,33 +0,0 @@
=head1 NAME
invmod - compute modular inverse of an integer
=head1 SYNOPSIS
invmod <a> <m>
=head1 DESCRIPTION
The B<invmod> program computes the inverse of I<a>, modulo I<m>, if
that inverse exists. Both I<a> and I<m> are arbitrary-precision
integers in decimal notation. The result is written in standard
decimal notation to the standard output.
If there is no inverse, the message:
No inverse
...will be printed to the standard output (an inverse exists if and
only if the greatest common divisor of I<a> and I<m> is 1).
=head1 SEE ALSO
gcd(1), isprime(1), lap(1)
=head1 AUTHOR
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Thayer School of Engineering, Hanover, New Hampshire, USA
$Date: 2000-07-14 00:44:33 $

View File

@@ -1,62 +0,0 @@
=head1 NAME
isprime - probabilistic primality testing
=head1 SYNOPSIS
isprime <a>
=head1 DESCRIPTION
The B<isprime> program attempts to determine whether the arbitrary
precision integer I<a> is prime. It first tests I<a> for divisibility
by the first 170 or so small primes, and assuming I<a> is not
divisible by any of these, applies 15 iterations of the Rabin-Miller
probabilistic primality test.
If the program discovers that the number is composite, it will print:
Not prime (reason)
Where I<reason> is either:
divisible by small prime x
Or:
failed nth pseudoprime test
In the first case, I<x> indicates the first small prime factor that
was found. In the second case, I<n> indicates which of the
pseudoprime tests failed (numbered from 1)
If this happens, the number is definitely not prime. However, if the
number succeeds, this message results:
Probably prime, 1 in 4^15 chance of false positive
If this happens, the number is prime with very high probability, but
its primality has not been absolutely proven, only demonstrated to a
very convincing degree.
The value I<a> can be input in standard decimal notation, or, if it is
prefixed with I<Ox>, it will be read as hexadecimal.
=head1 ENVIRONMENT
You can control how many iterations of Rabin-Miller are performed on
the candidate number by setting the I<RM_TESTS> environment variable
to an integer value before starting up B<isprime>. This will change
the output slightly if the number passes all the tests.
=head1 SEE ALSO
gcd(1), invmod(1), lap(1)
=head1 AUTHOR
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Thayer School of Engineering, Hanover, New Hampshire, USA
$Date: 2000-07-14 00:44:33 $

View File

@@ -1,35 +0,0 @@
=head1 NAME
lap - compute least annihilating power of a number
=head1 SYNOPSIS
lap <a> <m>
=head1 DESCRIPTION
The B<lap> program computes the order of I<a> modulo I<m>, for
arbitrary precision integers I<a> and I<m>. The B<order> of I<a>
modulo I<m> is defined as the smallest positive value I<n> for which
I<a> raised to the I<n>th power, modulo I<m>, is equal to 1. The
order may not exist, if I<m> is composite.
=head1 RESTRICTIONS
This program is very slow, especially for large moduli. It is
intended as a way to help find primitive elements in a modular field,
but it does not do so in a particularly inefficient manner. It was
written simply to help verify that a particular candidate does not
have an obviously short cycle mod I<m>.
=head1 SEE ALSO
gcd(1), invmod(1), isprime(1)
=head1 AUTHOR
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Thayer School of Engineering, Hanover, New Hampshire, USA
$Date: 2000-07-14 00:44:34 $

View File

@@ -1,49 +0,0 @@
=head1 NAME
mpi-test - automated test program for MPI library
=head1 SYNOPSIS
mpi-test <suite-name> [quiet]
mpi-test list
mpi-test help
=head1 DESCRIPTION
The B<mpi-test> program is a general unit test driver for the MPI
library. It is used to verify that the library works as it is
supposed to on your architecture. As with most such things, passing
all the tests in B<mpi-test> does not guarantee the code is correct,
but if any of them fail, there are certainly problems.
Each major function of the library can be tested individually. For a
list of the test suites understood by B<mpi-test>, run it with the
I<list> command line option:
mpi-test list
This will display a list of the available test suites and a brief
synopsis of what each one does. For a brief overview of this
document, run B<mpi-test> I<help>.
B<mpi-test> exits with a zero status if the selected test succeeds, or
a nonzero status if it fails. If a I<suite-name> which is not
understood by B<mpi-test> is given, a diagnostic is printed to the
standard error, and the program exits with a result code of 2. If a
test fails, the result code will be 1, and a diagnostic is ordinarily
printed to the standard error. However, if the I<quiet> option is
provided, these diagnostics will be suppressed.
=head1 RESTRICTIONS
Only a few canned test cases are provided. The solutions have been
verified using the GNU bc(1) program, so bugs there may cause problems
here; however, this is very unlikely, so if a test fails, it is almost
certainly my fault, not bc(1)'s.
=head1 AUTHOR
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Thayer School of Engineering, Hanover, New Hampshire, USA
$Date: 2000-07-14 00:44:34 $

View File

@@ -1,114 +0,0 @@
Multiplication
This describes the multiplication algorithm used by the MPI library.
This is basically a standard "schoolbook" algorithm. It is slow --
O(mn) for m = #a, n = #b -- but easy to implement and verify.
Basically, we run two nested loops, as illustrated here (R is the
radix):
k = 0
for j <- 0 to (#b - 1)
for i <- 0 to (#a - 1)
w = (a[j] * b[i]) + k + c[i+j]
c[i+j] = w mod R
k = w div R
endfor
c[i+j] = k;
k = 0;
endfor
It is necessary that 'w' have room for at least two radix R digits.
The product of any two digits in radix R is at most:
(R - 1)(R - 1) = R^2 - 2R + 1
Since a two-digit radix-R number can hold R^2 - 1 distinct values,
this insures that the product will fit into the two-digit register.
To insure that two digits is enough for w, we must also show that
there is room for the carry-in from the previous multiplication, and
the current value of the product digit that is being recomputed.
Assuming each of these may be as big as R - 1 (and no larger,
certainly), two digits will be enough if and only if:
(R^2 - 2R + 1) + 2(R - 1) <= R^2 - 1
Solving this equation shows that, indeed, this is the case:
R^2 - 2R + 1 + 2R - 2 <= R^2 - 1
R^2 - 1 <= R^2 - 1
This suggests that a good radix would be one more than the largest
value that can be held in half a machine word -- so, for example, as
in this implementation, where we used a radix of 65536 on a machine
with 4-byte words. Another advantage of a radix of this sort is that
binary-level operations are easy on numbers in this representation.
Here's an example multiplication worked out longhand in radix-10,
using the above algorithm:
a = 999
b = x 999
-------------
p = 98001
w = (a[jx] * b[ix]) + kin + c[ix + jx]
c[ix+jx] = w % RADIX
k = w / RADIX
product
ix jx a[jx] b[ix] kin w c[i+j] kout 000000
0 0 9 9 0 81+0+0 1 8 000001
0 1 9 9 8 81+8+0 9 8 000091
0 2 9 9 8 81+8+0 9 8 000991
8 0 008991
1 0 9 9 0 81+0+9 0 9 008901
1 1 9 9 9 81+9+9 9 9 008901
1 2 9 9 9 81+9+8 8 9 008901
9 0 098901
2 0 9 9 0 81+0+9 0 9 098001
2 1 9 9 9 81+9+8 8 9 098001
2 2 9 9 9 81+9+9 9 9 098001
------------------------------------------------------------------
***** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the
License.
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
library.
The Initial Developer of the Original Code is
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Portions created by the Initial Developer are Copyright (C) 1998, 2000
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
$Id: mul.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $

View File

@@ -1,90 +0,0 @@
This file describes how pi is computed by the program in 'pi.c' (see
the utils subdirectory).
Basically, we use Machin's formula, which is what everyone in the
world uses as a simple method for computing approximations to pi.
This works for up to a few thousand digits without too much effort.
Beyond that, though, it gets too slow.
Machin's formula states:
pi := 16 * arctan(1/5) - 4 * arctan(1/239)
We compute this in integer arithmetic by first multiplying everything
through by 10^d, where 'd' is the number of digits of pi we wanted to
compute. It turns out, the last few digits will be wrong, but the
number that are wrong is usually very small (ordinarly only 2-3).
Having done this, we compute the arctan() function using the formula:
1 1 1 1 1
arctan(1/x) := --- - ----- + ----- - ----- + ----- - ...
x 3 x^3 5 x^5 7 x^7 9 x^9
This is done iteratively by computing the first term manually, and
then iteratively dividing x^2 and k, where k = 3, 5, 7, ... out of the
current figure. This is then added to (or subtracted from) a running
sum, as appropriate. The iteration continues until we overflow our
available precision and the current figure goes to zero under integer
division. At that point, we're finished.
Actually, we get a couple extra bits of precision out of the fact that
we know we're computing y * arctan(1/x), by setting up the multiplier
as:
y * 10^d
... instead of just 10^d. There is also a bit of cleverness in how
the loop is constructed, to avoid special-casing the first term.
Check out the code for arctan() in 'pi.c', if you are interested in
seeing how it is set up.
Thanks to Jason P. for this algorithm, which I assembled from notes
and programs found on his cool "Pile of Pi Programs" page, at:
http://www.isr.umd.edu/~jasonp/pipage.html
Thanks also to Henrik Johansson <Henrik.Johansson@Nexus.Comm.SE>, from
whose pi program I borrowed the clever idea of pre-multiplying by x in
order to avoid a special case on the loop iteration.
------------------------------------------------------------------
***** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the
License.
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
library.
The Initial Developer of the Original Code is
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Portions created by the Initial Developer are Copyright (C) 1998, 2000
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
$Id: pi.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $

File diff suppressed because it is too large Load Diff

View File

@@ -1,41 +0,0 @@
=head1 NAME
prng - pseudo-random number generator
=head1 SYNOPSIS
prng [count]
=head1 DESCRIPTION
B<Prng> generates 32-bit pseudo-random integers using the
Blum-Blum-Shub (BBS) quadratic residue generator. It is seeded using
the standard C library's rand() function, which itself seeded from the
system clock and the process ID number. Thus, the values generated
are not particularly useful for cryptographic applications, but they
are in general much better than the typical output of the usual
multiplicative congruency generator used by most runtime libraries.
You may optionally specify how many random values should be generated
by giving a I<count> argument on the command line. If you do not
specify a count, only one random value will be generated. The results
are output to the standard output in decimal notation, one value per
line.
=head1 RESTRICTIONS
As stated above, B<prng> uses the C library's rand() function to seed
the generator, so it is not terribly suitable for cryptographic
applications. Also note that each time you run the program, a new
seed is generated, so it is better to run it once with a I<count>
parameter than it is to run it multiple times to generate several
values.
=head1 AUTHOR
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved
Thayer School of Engineering, Dartmouth College, Hanover, NH USA
$Date: 2000-07-14 00:44:36 $

View File

@@ -1,121 +0,0 @@
Modular Reduction
Usually, modular reduction is accomplished by long division, using the
mp_div() or mp_mod() functions. However, when performing modular
exponentiation, you spend a lot of time reducing by the same modulus
again and again. For this purpose, doing a full division for each
multiplication is quite inefficient.
For this reason, the mp_exptmod() function does not perform modular
reductions in the usual way, but instead takes advantage of an
algorithm due to Barrett, as described by Menezes, Oorschot and
VanStone in their book _Handbook of Applied Cryptography_, published
by the CRC Press (see Chapter 14 for details). This method reduces
most of the computation of reduction to efficient shifting and masking
operations, and avoids the multiple-precision division entirely.
Here is a brief synopsis of Barrett reduction, as it is implemented in
this library.
Let b denote the radix of the computation (one more than the maximum
value that can be denoted by an mp_digit). Let m be the modulus, and
let k be the number of significant digits of m. Let x be the value to
be reduced modulo m. By the Division Theorem, there exist unique
integers Q and R such that:
x = Qm + R, 0 <= R < m
Barrett reduction takes advantage of the fact that you can easily
approximate Q to within two, given a value M such that:
2k
b
M = floor( ----- )
m
Computation of M requires a full-precision division step, so if you
are only doing a single reduction by m, you gain no advantage.
However, when multiple reductions by the same m are required, this
division need only be done once, beforehand. Using this, we can use
the following equation to compute Q', an approximation of Q:
x
floor( ------ ) M
k-1
b
Q' = floor( ----------------- )
k+1
b
The divisions by b^(k-1) and b^(k+1) and the floor() functions can be
efficiently implemented with shifts and masks, leaving only a single
multiplication to be performed to get this approximation. It can be
shown that Q - 2 <= Q' <= Q, so in the worst case, we can get out with
two additional subtractions to bring the value into line with the
actual value of Q.
Once we've got Q', we basically multiply that by m and subtract from
x, yielding:
x - Q'm = Qm + R - Q'm
Since we know the constraint on Q', this is one of:
R
m + R
2m + R
Since R < m by the Division Theorem, we can simply subtract off m
until we get a value in the correct range, which will happen with no
more than 2 subtractions:
v = x - Q'm
while(v >= m)
v = v - m
endwhile
In random performance trials, modular exponentiation using this method
of reduction gave around a 40% speedup over using the division for
reduction.
------------------------------------------------------------------
***** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the
License.
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
library.
The Initial Developer of the Original Code is
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Portions created by the Initial Developer are Copyright (C) 1998, 2000
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
$Id: redux.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $

View File

@@ -1,87 +0,0 @@
Square Root
A simple iterative algorithm is used to compute the greatest integer
less than or equal to the square root. Essentially, this is Newton's
linear approximation, computed by finding successive values of the
equation:
x[k]^2 - V
x[k+1] = x[k] - ------------
2 x[k]
...where V is the value for which the square root is being sought. In
essence, what is happening here is that we guess a value for the
square root, then figure out how far off we were by squaring our guess
and subtracting the target. Using this value, we compute a linear
approximation for the error, and adjust the "guess". We keep doing
this until the precision gets low enough that the above equation
yields a quotient of zero. At this point, our last guess is one
greater than the square root we're seeking.
The initial guess is computed by dividing V by 4, which is a heuristic
I have found to be fairly good on average. This also has the
advantage of being very easy to compute efficiently, even for large
values.
So, the resulting algorithm works as follows:
x = V / 4 /* compute initial guess */
loop
t = (x * x) - V /* Compute absolute error */
u = 2 * x /* Adjust by tangent slope */
t = t / u
/* Loop is done if error is zero */
if(t == 0)
break
/* Adjust guess by error term */
x = x - t
end
x = x - 1
The result of the computation is the value of x.
------------------------------------------------------------------
***** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the
License.
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
library.
The Initial Developer of the Original Code is
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Portions created by the Initial Developer are Copyright (C) 1998, 2000
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
$Id: sqrt.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $

View File

@@ -1,109 +0,0 @@
Squaring Algorithm
When you are squaring a value, you can take advantage of the fact that
half the multiplications performed by the more general multiplication
algorithm (see 'mul.txt' for a description) are redundant when the
multiplicand equals the multiplier.
In particular, the modified algorithm is:
k = 0
for j <- 0 to (#a - 1)
w = c[2*j] + (a[j] ^ 2);
k = w div R
for i <- j+1 to (#a - 1)
w = (2 * a[j] * a[i]) + k + c[i+j]
c[i+j] = w mod R
k = w div R
endfor
c[i+j] = k;
k = 0;
endfor
On the surface, this looks identical to the multiplication algorithm;
however, note the following differences:
- precomputation of the leading term in the outer loop
- i runs from j+1 instead of from zero
- doubling of a[i] * a[j] in the inner product
Unfortunately, the construction of the inner product is such that we
need more than two digits to represent the inner product, in some
cases. In a C implementation, this means that some gymnastics must be
performed in order to handle overflow, for which C has no direct
abstraction. We do this by observing the following:
If we have multiplied a[i] and a[j], and the product is more than half
the maximum value expressible in two digits, then doubling this result
will overflow into a third digit. If this occurs, we take note of the
overflow, and double it anyway -- C integer arithmetic ignores
overflow, so the two digits we get back should still be valid, modulo
the overflow.
Having doubled this value, we now have to add in the remainders and
the digits already computed by earlier steps. If we did not overflow
in the previous step, we might still cause an overflow here. That
will happen whenever the maximum value expressible in two digits, less
the amount we have to add, is greater than the result of the previous
step. Thus, the overflow computation is:
u = 0
w = a[i] * a[j]
if(w > (R - 1)/ 2)
u = 1;
w = w * 2
v = c[i + j] + k
if(u == 0 && (R - 1 - v) < w)
u = 1
If there is an overflow, u will be 1, otherwise u will be 0. The rest
of the parameters are the same as they are in the above description.
------------------------------------------------------------------
***** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the
License.
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
library.
The Initial Developer of the Original Code is
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Portions created by the Initial Developer are Copyright (C) 1998, 2000
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
$Id: square.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $

View File

@@ -1,250 +0,0 @@
MPI Library Timing Tests
Hardware/OS
(A) SGI O2 1 x MIPS R10000 250MHz IRIX 6.5.3
(B) IBM RS/6000 43P-240 1 x PowerPC 603e 223MHz AIX 4.3
(C) Dell GX1/L+ 1 x Pentium III 550MHz Linux 2.2.12-20
(D) PowerBook G3 1 x PowerPC 750 266MHz LinuxPPC 2.2.6-15apmac
(E) PowerBook G3 1 x PowerPC 750 266MHz MacOS 8.5.1
(F) PowerBook G3 1 x PowerPC 750 400MHz MacOS 9.0.2
Compiler
(1) MIPSpro C 7.2.1 -O3 optimizations
(2) GCC 2.95.1 -O3 optimizations
(3) IBM AIX xlc -O3 optimizations (version unknown)
(4) EGCS 2.91.66 -O3 optimizations
(5) Metrowerks CodeWarrior 5.0 C, all optimizations
(6) MIPSpro C 7.30 -O3 optimizations
(7) same as (6), with optimized libmalloc.so
Timings are given in seconds, computed using the C library's clock()
function. The first column gives the hardware and compiler
configuration used for the test. The second column indicates the
number of tests that were aggregated to get the statistics for that
size. These were compiled using 16 bit digits.
Source data were generated randomly using a fixed seed, so they should
be internally consistent, but may vary on different systems depending
on the C library. Also, since the resolution of the timer accessed by
clock() varies, there may be some variance in the precision of these
measurements.
Prime Generation (primegen)
128 bits:
A1 200 min=0.03, avg=0.19, max=0.72, sum=38.46
A2 200 min=0.02, avg=0.16, max=0.62, sum=32.55
B3 200 min=0.01, avg=0.07, max=0.22, sum=13.29
C4 200 min=0.00, avg=0.03, max=0.20, sum=6.14
D4 200 min=0.00, avg=0.05, max=0.33, sum=9.70
A6 200 min=0.01, avg=0.09, max=0.36, sum=17.48
A7 200 min=0.00, avg=0.05, max=0.24, sum=10.07
192 bits:
A1 200 min=0.05, avg=0.45, max=3.13, sum=89.96
A2 200 min=0.04, avg=0.39, max=2.61, sum=77.55
B3 200 min=0.02, avg=0.18, max=1.25, sum=36.97
C4 200 min=0.01, avg=0.09, max=0.33, sum=18.24
D4 200 min=0.02, avg=0.15, max=0.54, sum=29.63
A6 200 min=0.02, avg=0.24, max=1.70, sum=47.84
A7 200 min=0.01, avg=0.15, max=1.05, sum=30.88
256 bits:
A1 200 min=0.08, avg=0.92, max=6.13, sum=184.79
A2 200 min=0.06, avg=0.76, max=5.03, sum=151.11
B3 200 min=0.04, avg=0.41, max=2.68, sum=82.35
C4 200 min=0.02, avg=0.19, max=0.69, sum=37.91
D4 200 min=0.03, avg=0.31, max=1.15, sum=63.00
A6 200 min=0.04, avg=0.48, max=3.13, sum=95.46
A7 200 min=0.03, avg=0.37, max=2.36, sum=73.60
320 bits:
A1 200 min=0.11, avg=1.59, max=6.14, sum=318.81
A2 200 min=0.09, avg=1.27, max=4.93, sum=254.03
B3 200 min=0.07, avg=0.82, max=3.13, sum=163.80
C4 200 min=0.04, avg=0.44, max=1.91, sum=87.59
D4 200 min=0.06, avg=0.73, max=3.22, sum=146.73
A6 200 min=0.07, avg=0.93, max=3.50, sum=185.01
A7 200 min=0.05, avg=0.76, max=2.94, sum=151.78
384 bits:
A1 200 min=0.16, avg=2.69, max=11.41, sum=537.89
A2 200 min=0.13, avg=2.15, max=9.03, sum=429.14
B3 200 min=0.11, avg=1.54, max=6.49, sum=307.78
C4 200 min=0.06, avg=0.81, max=4.84, sum=161.13
D4 200 min=0.10, avg=1.38, max=8.31, sum=276.81
A6 200 min=0.11, avg=1.73, max=7.36, sum=345.55
A7 200 min=0.09, avg=1.46, max=6.12, sum=292.02
448 bits:
A1 200 min=0.23, avg=3.36, max=15.92, sum=672.63
A2 200 min=0.17, avg=2.61, max=12.25, sum=522.86
B3 200 min=0.16, avg=2.10, max=9.83, sum=420.86
C4 200 min=0.09, avg=1.44, max=7.64, sum=288.36
D4 200 min=0.16, avg=2.50, max=13.29, sum=500.17
A6 200 min=0.15, avg=2.31, max=10.81, sum=461.58
A7 200 min=0.14, avg=2.03, max=9.53, sum=405.16
512 bits:
A1 200 min=0.30, avg=6.12, max=22.18, sum=1223.35
A2 200 min=0.25, avg=4.67, max=16.90, sum=933.18
B3 200 min=0.23, avg=4.13, max=14.94, sum=825.45
C4 200 min=0.13, avg=2.08, max=9.75, sum=415.22
D4 200 min=0.24, avg=4.04, max=20.18, sum=808.11
A6 200 min=0.22, avg=4.47, max=16.19, sum=893.83
A7 200 min=0.20, avg=4.03, max=14.65, sum=806.02
Modular Exponentation (metime)
The following results are aggregated from 200 pseudo-randomly
generated tests, based on a fixed seed.
base, exponent, and modulus size (bits)
P/C 128 192 256 320 384 448 512 640 768 896 1024
------- -----------------------------------------------------------------
A1 0.015 0.027 0.047 0.069 0.098 0.133 0.176 0.294 0.458 0.680 1.040
A2 0.013 0.024 0.037 0.053 0.077 0.102 0.133 0.214 0.326 0.476 0.668
B3 0.005 0.011 0.021 0.036 0.056 0.084 0.121 0.222 0.370 0.573 0.840
C4 0.002 0.006 0.011 0.020 0.032 0.048 0.069 0.129 0.223 0.344 0.507
D4 0.004 0.010 0.019 0.034 0.056 0.085 0.123 0.232 0.390 0.609 0.899
E5 0.007 0.015 0.031 0.055 0.088 0.133 0.183 0.342 0.574 0.893 1.317
A6 0.008 0.016 0.038 0.042 0.064 0.093 0.133 0.239 0.393 0.604 0.880
A7 0.005 0.011 0.020 0.036 0.056 0.083 0.121 0.223 0.374 0.583 0.855
Multiplication and Squaring tests, (mulsqr)
The following results are aggregated from 500000 pseudo-randomly
generated tests, based on a per-run wall-clock seed. Times are given
in seconds, except where indicated in microseconds (us).
(A1)
bits multiply square ad percent time/mult time/square
64 9.33 9.15 > 1.9 18.7us 18.3us
128 10.88 10.44 > 4.0 21.8us 20.9us
192 13.30 11.89 > 10.6 26.7us 23.8us
256 14.88 12.64 > 15.1 29.8us 25.3us
320 18.64 15.01 > 19.5 37.3us 30.0us
384 23.11 17.70 > 23.4 46.2us 35.4us
448 28.28 20.88 > 26.2 56.6us 41.8us
512 34.09 24.51 > 28.1 68.2us 49.0us
640 47.86 33.25 > 30.5 95.7us 66.5us
768 64.91 43.54 > 32.9 129.8us 87.1us
896 84.49 55.48 > 34.3 169.0us 111.0us
1024 107.25 69.21 > 35.5 214.5us 138.4us
1536 227.97 141.91 > 37.8 456.0us 283.8us
2048 394.05 242.15 > 38.5 788.1us 484.3us
(A2)
bits multiply square ad percent time/mult time/square
64 7.87 7.95 < 1.0 15.7us 15.9us
128 9.40 9.19 > 2.2 18.8us 18.4us
192 11.15 10.59 > 5.0 22.3us 21.2us
256 12.02 11.16 > 7.2 24.0us 22.3us
320 14.62 13.43 > 8.1 29.2us 26.9us
384 17.72 15.80 > 10.8 35.4us 31.6us
448 21.24 18.51 > 12.9 42.5us 37.0us
512 25.36 21.78 > 14.1 50.7us 43.6us
640 34.57 29.00 > 16.1 69.1us 58.0us
768 46.10 37.60 > 18.4 92.2us 75.2us
896 58.94 47.72 > 19.0 117.9us 95.4us
1024 73.76 59.12 > 19.8 147.5us 118.2us
1536 152.00 118.80 > 21.8 304.0us 237.6us
2048 259.41 199.57 > 23.1 518.8us 399.1us
(B3)
bits multiply square ad percent time/mult time/square
64 2.60 2.47 > 5.0 5.20us 4.94us
128 4.43 4.06 > 8.4 8.86us 8.12us
192 7.03 6.10 > 13.2 14.1us 12.2us
256 10.44 8.59 > 17.7 20.9us 17.2us
320 14.44 11.64 > 19.4 28.9us 23.3us
384 19.12 15.08 > 21.1 38.2us 30.2us
448 24.55 19.09 > 22.2 49.1us 38.2us
512 31.03 23.53 > 24.2 62.1us 47.1us
640 45.05 33.80 > 25.0 90.1us 67.6us
768 63.02 46.05 > 26.9 126.0us 92.1us
896 83.74 60.29 > 28.0 167.5us 120.6us
1024 106.73 76.65 > 28.2 213.5us 153.3us
1536 228.94 160.98 > 29.7 457.9us 322.0us
2048 398.08 275.93 > 30.7 796.2us 551.9us
(C4)
bits multiply square ad percent time/mult time/square
64 1.34 1.28 > 4.5 2.68us 2.56us
128 2.76 2.59 > 6.2 5.52us 5.18us
192 4.52 4.16 > 8.0 9.04us 8.32us
256 6.64 5.99 > 9.8 13.3us 12.0us
320 9.20 8.13 > 11.6 18.4us 16.3us
384 12.01 10.58 > 11.9 24.0us 21.2us
448 15.24 13.33 > 12.5 30.5us 26.7us
512 19.02 16.46 > 13.5 38.0us 32.9us
640 27.56 23.54 > 14.6 55.1us 47.1us
768 37.89 31.78 > 16.1 75.8us 63.6us
896 49.24 41.42 > 15.9 98.5us 82.8us
1024 62.59 52.18 > 16.6 125.2us 104.3us
1536 131.66 107.72 > 18.2 263.3us 215.4us
2048 226.45 182.95 > 19.2 453.0us 365.9us
(A7)
bits multiply square ad percent time/mult time/square
64 1.74 1.71 > 1.7 3.48us 3.42us
128 3.48 2.96 > 14.9 6.96us 5.92us
192 5.74 4.60 > 19.9 11.5us 9.20us
256 8.75 6.61 > 24.5 17.5us 13.2us
320 12.5 8.99 > 28.1 25.0us 18.0us
384 16.9 11.9 > 29.6 33.8us 23.8us
448 22.2 15.2 > 31.7 44.4us 30.4us
512 28.3 19.0 > 32.7 56.6us 38.0us
640 42.4 28.0 > 34.0 84.8us 56.0us
768 59.4 38.5 > 35.2 118.8us 77.0us
896 79.5 51.2 > 35.6 159.0us 102.4us
1024 102.6 65.5 > 36.2 205.2us 131.0us
1536 224.3 140.6 > 37.3 448.6us 281.2us
2048 393.4 244.3 > 37.9 786.8us 488.6us
------------------------------------------------------------------
***** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the
License.
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
library.
The Initial Developer of the Original Code is
Michael J. Fromberger <sting@linguist.dartmouth.edu>
Portions created by the Initial Developer are Copyright (C) 1998, 2000
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
$Id: timing.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $

View File

@@ -1,640 +0,0 @@
/*
* The contents of this file are subject to the Mozilla Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is multacc512 multiple-precision integer arithmetic.
*
* The Initial Developer of the Original Code is Hewlett-Packard Company.
* Portions created by Hewlett-Packard Company are
* Copyright (C) March 1999, Hewlett-Packard Company. All Rights Reserved.
*
* Contributor(s):
* coded by: Bill Worley, Hewlett-Packard labs
*
* Alternatively, the contents of this file may be used under the
* terms of the GNU General Public License Version 2 or later (the
* "GPL"), in which case the provisions of the GPL are applicable
* instead of those above. If you wish to allow use of your
* version of this file only under the terms of the GPL and not to
* allow others to use your version of this file under the MPL,
* indicate your decision by deleting the provisions above and
* replace them with the notice and other provisions required by
* the GPL. If you do not delete the provisions above, a recipient
* may use your version of this file under either the MPL or the
* GPL.
*
* This PA-RISC 2.0 function computes the product of two unsigned integers,
* and adds the result to a previously computed integer. The multiplicand
* is a 512-bit (64-byte, eight doubleword) unsigned integer, stored in
* memory in little-double-wordian order. The multiplier is an unsigned
* 64-bit integer. The previously computed integer to which the product is
* added is located in the result ("res") area, and is assumed to be a
* 576-bit (72-byte, nine doubleword) unsigned integer, stored in memory
* in little-double-wordian order. This value normally will be the result
* of a previously computed nine doubleword result. It is not necessary
* to pad the multiplicand with an additional 64-bit zero doubleword.
*
* Multiplicand, multiplier, and addend ideally should be aligned at
* 16-byte boundaries for best performance. The code will function
* correctly for alignment at eight-byte boundaries which are not 16-byte
* boundaries, but the execution may be slightly slower due to even/odd
* bank conflicts on PA-RISC 8000 processors.
*
* This function is designed to accept the same calling sequence as Bill
* Ackerman's "maxpy_little" function. The carry from the ninth doubleword
* of the result is written to the tenth word of the result, as is done by
* Bill Ackerman's function. The final carry also is returned as an
* integer, which may be ignored. The function prototype may be either
* of the following:
*
* void multacc512( int l, chunk* m, const chunk* a, chunk* res );
* or
* int multacc512( int l, chunk* m, const chunk* a, chunk* res );
*
* where: "l" originally denoted vector lengths. This parameter is
* ignored. This function always assumes a multiplicand length of
* 512 bits (eight doublewords), and addend and result lengths of
* 576 bits (nine doublewords).
*
* "m" is a pointer to the doubleword multiplier, ideally aligned
* on a 16-byte boundary.
*
* "a" is a pointer to the eight-doubleword multiplicand, stored
* in little-double-wordian order, and ideally aligned on a 16-byte
* boundary.
*
* "res" is a pointer to the nine doubleword addend, and to the
* nine-doubleword product computed by this function. The result
* also is stored in little-double-wordian order, and ideally is
* aligned on a 16-byte boundary. It is expected that the alignment
* of the "res" area may alternate between even/odd doubleword
* boundaries for successive calls for 512-bit x 512-bit
* multiplications.
*
* The code for this function has been scheduled to use the parallelism
* of the PA-RISC 8000 series microprocessors as well as the author was
* able. Comments and/or suggestions for improvement are welcomed.
*
* The code is "64-bit safe". This means it may be called in either
* the 32ILP context or the 64LP context. All 64-bits of registers are
* saved and restored.
*
* This code is self-contained. It requires no other header files in order
* to compile and to be linkable on a PA-RISC 2.0 machine. Symbolic
* definitions for registers and stack offsets are included within this
* one source file.
*
* This is a leaf routine. As such, minimal use is made of the stack area.
* Of the 192 bytes allocated, 64 bytes are used for saving/restoring eight
* general registers, and 128 bytes are used to move intermediate products
* from the floating-point registers to the general registers. Stack
* protocols assure proper alignment of these areas.
*
*/
/* ====================================================================*/
/* symbolic definitions for PA-RISC registers */
/* in the MIPS style, avoids lots of case shifts */
/* assigments (except t4) preserve register number parity */
/* ====================================================================*/
#define zero %r0 /* permanent zero */
#define t5 %r1 /* temp register, altered by addil */
#define rp %r2 /* return pointer */
#define s1 %r3 /* callee saves register*/
#define s0 %r4 /* callee saves register*/
#define s3 %r5 /* callee saves register*/
#define s2 %r6 /* callee saves register*/
#define s5 %r7 /* callee saves register*/
#define s4 %r8 /* callee saves register*/
#define s7 %r9 /* callee saves register*/
#define s6 %r10 /* callee saves register*/
#define t1 %r19 /* caller saves register*/
#define t0 %r20 /* caller saves register*/
#define t3 %r21 /* caller saves register*/
#define t2 %r22 /* caller saves register*/
#define a3 %r23 /* fourth argument register, high word */
#define a2 %r24 /* third argument register, low word*/
#define a1 %r25 /* second argument register, high word*/
#define a0 %r26 /* first argument register, low word*/
#define v0 %r28 /* high order return value*/
#define v1 %r29 /* low order return value*/
#define sp %r30 /* stack pointer*/
#define t4 %r31 /* temporary register */
#define fa0 %fr4 /* first argument register*/
#define fa1 %fr5 /* second argument register*/
#define fa2 %fr6 /* third argument register*/
#define fa3 %fr7 /* fourth argument register*/
#define fa0r %fr4R /* first argument register*/
#define fa1r %fr5R /* second argument register*/
#define fa2r %fr6R /* third argument register*/
#define fa3r %fr7R /* fourth argument register*/
#define ft0 %fr8 /* caller saves register*/
#define ft1 %fr9 /* caller saves register*/
#define ft2 %fr10 /* caller saves register*/
#define ft3 %fr11 /* caller saves register*/
#define ft0r %fr8R /* caller saves register*/
#define ft1r %fr9R /* caller saves register*/
#define ft2r %fr10R /* caller saves register*/
#define ft3r %fr11R /* caller saves register*/
#define ft4 %fr22 /* caller saves register*/
#define ft5 %fr23 /* caller saves register*/
#define ft6 %fr24 /* caller saves register*/
#define ft7 %fr25 /* caller saves register*/
#define ft8 %fr26 /* caller saves register*/
#define ft9 %fr27 /* caller saves register*/
#define ft10 %fr28 /* caller saves register*/
#define ft11 %fr29 /* caller saves register*/
#define ft12 %fr30 /* caller saves register*/
#define ft13 %fr31 /* caller saves register*/
#define ft4r %fr22R /* caller saves register*/
#define ft5r %fr23R /* caller saves register*/
#define ft6r %fr24R /* caller saves register*/
#define ft7r %fr25R /* caller saves register*/
#define ft8r %fr26R /* caller saves register*/
#define ft9r %fr27R /* caller saves register*/
#define ft10r %fr28R /* caller saves register*/
#define ft11r %fr29R /* caller saves register*/
#define ft12r %fr30R /* caller saves register*/
#define ft13r %fr31R /* caller saves register*/
/* ================================================================== */
/* functional definitions for PA-RISC registers */
/* ================================================================== */
/* general registers */
#define T1 a0 /* temp, (length parameter ignored) */
#define pM a1 /* -> 64-bit multiplier */
#define T2 a1 /* temp, (after fetching multiplier) */
#define pA a2 /* -> multiplicand vector (8 64-bit words) */
#define T3 a2 /* temp, (after fetching multiplicand) */
#define pR a3 /* -> addend vector (8 64-bit doublewords,
result vector (9 64-bit words) */
#define S0 s0 /* callee saves summand registers */
#define S1 s1
#define S2 s2
#define S3 s3
#define S4 s4
#define S5 s5
#define S6 s6
#define S7 s7
#define S8 v0 /* caller saves summand registers */
#define S9 v1
#define S10 t0
#define S11 t1
#define S12 t2
#define S13 t3
#define S14 t4
#define S15 t5
/* floating-point registers */
#define M fa0 /* multiplier double word */
#define MR fa0r /* low order half of multiplier double word */
#define ML fa0 /* high order half of multiplier double word */
#define A0 fa2 /* multiplicand double word 0 */
#define A0R fa2r /* low order half of multiplicand double word */
#define A0L fa2 /* high order half of multiplicand double word */
#define A1 fa3 /* multiplicand double word 1 */
#define A1R fa3r /* low order half of multiplicand double word */
#define A1L fa3 /* high order half of multiplicand double word */
#define A2 ft0 /* multiplicand double word 2 */
#define A2R ft0r /* low order half of multiplicand double word */
#define A2L ft0 /* high order half of multiplicand double word */
#define A3 ft1 /* multiplicand double word 3 */
#define A3R ft1r /* low order half of multiplicand double word */
#define A3L ft1 /* high order half of multiplicand double word */
#define A4 ft2 /* multiplicand double word 4 */
#define A4R ft2r /* low order half of multiplicand double word */
#define A4L ft2 /* high order half of multiplicand double word */
#define A5 ft3 /* multiplicand double word 5 */
#define A5R ft3r /* low order half of multiplicand double word */
#define A5L ft3 /* high order half of multiplicand double word */
#define A6 ft4 /* multiplicand double word 6 */
#define A6R ft4r /* low order half of multiplicand double word */
#define A6L ft4 /* high order half of multiplicand double word */
#define A7 ft5 /* multiplicand double word 7 */
#define A7R ft5r /* low order half of multiplicand double word */
#define A7L ft5 /* high order half of multiplicand double word */
#define P0 ft6 /* product word 0 */
#define P1 ft7 /* product word 0 */
#define P2 ft8 /* product word 0 */
#define P3 ft9 /* product word 0 */
#define P4 ft10 /* product word 0 */
#define P5 ft11 /* product word 0 */
#define P6 ft12 /* product word 0 */
#define P7 ft13 /* product word 0 */
/* ====================================================================== */
/* symbolic definitions for HP-UX stack offsets */
/* symbolic definitions for memory NOPs */
/* ====================================================================== */
#define ST_SZ 192 /* stack area total size */
#define SV0 -192(sp) /* general register save area */
#define SV1 -184(sp)
#define SV2 -176(sp)
#define SV3 -168(sp)
#define SV4 -160(sp)
#define SV5 -152(sp)
#define SV6 -144(sp)
#define SV7 -136(sp)
#define XF0 -128(sp) /* data transfer area */
#define XF1 -120(sp) /* for floating-pt to integer regs */
#define XF2 -112(sp)
#define XF3 -104(sp)
#define XF4 -96(sp)
#define XF5 -88(sp)
#define XF6 -80(sp)
#define XF7 -72(sp)
#define XF8 -64(sp)
#define XF9 -56(sp)
#define XF10 -48(sp)
#define XF11 -40(sp)
#define XF12 -32(sp)
#define XF13 -24(sp)
#define XF14 -16(sp)
#define XF15 -8(sp)
#define mnop proberi (sp),3,zero /* memory NOP */
/* ====================================================================== */
/* assembler formalities */
/* ====================================================================== */
#ifdef __LP64__
.level 2.0W
#else
.level 2.0
#endif
.space $TEXT$
.subspa $CODE$
.align 16
/* ====================================================================== */
/* here to compute 64-bit x 512-bit product + 512-bit addend */
/* ====================================================================== */
multacc512
.PROC
.CALLINFO
.ENTER
fldd 0(pM),M ; multiplier double word
ldo ST_SZ(sp),sp ; push stack
fldd 0(pA),A0 ; multiplicand double word 0
std S1,SV1 ; save s1
fldd 16(pA),A2 ; multiplicand double word 2
std S3,SV3 ; save s3
fldd 32(pA),A4 ; multiplicand double word 4
std S5,SV5 ; save s5
fldd 48(pA),A6 ; multiplicand double word 6
std S7,SV7 ; save s7
std S0,SV0 ; save s0
fldd 8(pA),A1 ; multiplicand double word 1
xmpyu MR,A0L,P0 ; A0 cross 32-bit word products
xmpyu ML,A0R,P2
std S2,SV2 ; save s2
fldd 24(pA),A3 ; multiplicand double word 3
xmpyu MR,A2L,P4 ; A2 cross 32-bit word products
xmpyu ML,A2R,P6
std S4,SV4 ; save s4
fldd 40(pA),A5 ; multiplicand double word 5
std S6,SV6 ; save s6
fldd 56(pA),A7 ; multiplicand double word 7
fstd P0,XF0 ; MR * A0L
xmpyu MR,A0R,P0 ; A0 right 32-bit word product
xmpyu MR,A1L,P1 ; A1 cross 32-bit word product
fstd P2,XF2 ; ML * A0R
xmpyu ML,A0L,P2 ; A0 left 32-bit word product
xmpyu ML,A1R,P3 ; A1 cross 32-bit word product
fstd P4,XF4 ; MR * A2L
xmpyu MR,A2R,P4 ; A2 right 32-bit word product
xmpyu MR,A3L,P5 ; A3 cross 32-bit word product
fstd P6,XF6 ; ML * A2R
xmpyu ML,A2L,P6 ; A2 parallel 32-bit word product
xmpyu ML,A3R,P7 ; A3 cross 32-bit word product
ldd XF0,S0 ; MR * A0L
fstd P1,XF1 ; MR * A1L
ldd XF2,S2 ; ML * A0R
fstd P3,XF3 ; ML * A1R
ldd XF4,S4 ; MR * A2L
fstd P5,XF5 ; MR * A3L
xmpyu MR,A1R,P1 ; A1 parallel 32-bit word products
xmpyu ML,A1L,P3
ldd XF6,S6 ; ML * A2R
fstd P7,XF7 ; ML * A3R
xmpyu MR,A3R,P5 ; A3 parallel 32-bit word products
xmpyu ML,A3L,P7
fstd P0,XF0 ; MR * A0R
ldd XF1,S1 ; MR * A1L
nop
add S0,S2,T1 ; A0 cross product sum
fstd P2,XF2 ; ML * A0L
ldd XF3,S3 ; ML * A1R
add,dc zero,zero,S0 ; A0 cross product sum carry
depd,z T1,31,32,S2 ; A0 cross product sum << 32
fstd P4,XF4 ; MR * A2R
ldd XF5,S5 ; MR * A3L
shrpd S0,T1,32,S0 ; A0 carry | cross product sum >> 32
add S4,S6,T3 ; A2 cross product sum
fstd P6,XF6 ; ML * A2L
ldd XF7,S7 ; ML * A3R
add,dc zero,zero,S4 ; A2 cross product sum carry
depd,z T3,31,32,S6 ; A2 cross product sum << 32
ldd XF0,S8 ; MR * A0R
fstd P1,XF1 ; MR * A1R
xmpyu MR,A4L,P0 ; A4 cross 32-bit word product
xmpyu MR,A5L,P1 ; A5 cross 32-bit word product
ldd XF2,S10 ; ML * A0L
fstd P3,XF3 ; ML * A1L
xmpyu ML,A4R,P2 ; A4 cross 32-bit word product
xmpyu ML,A5R,P3 ; A5 cross 32-bit word product
ldd XF4,S12 ; MR * A2R
fstd P5,XF5 ; MR * A3L
xmpyu MR,A6L,P4 ; A6 cross 32-bit word product
xmpyu MR,A7L,P5 ; A7 cross 32-bit word product
ldd XF6,S14 ; ML * A2L
fstd P7,XF7 ; ML * A3L
xmpyu ML,A6R,P6 ; A6 cross 32-bit word product
xmpyu ML,A7R,P7 ; A7 cross 32-bit word product
fstd P0,XF0 ; MR * A4L
ldd XF1,S9 ; MR * A1R
shrpd S4,T3,32,S4 ; A2 carry | cross product sum >> 32
add S1,S3,T1 ; A1 cross product sum
fstd P2,XF2 ; ML * A4R
ldd XF3,S11 ; ML * A1L
add,dc zero,zero,S1 ; A1 cross product sum carry
depd,z T1,31,32,S3 ; A1 cross product sum << 32
fstd P4,XF4 ; MR * A6L
ldd XF5,S13 ; MR * A3R
shrpd S1,T1,32,S1 ; A1 carry | cross product sum >> 32
add S5,S7,T3 ; A3 cross product sum
fstd P6,XF6 ; ML * A6R
ldd XF7,S15 ; ML * A3L
add,dc zero,zero,S5 ; A3 cross product sum carry
depd,z T3,31,32,S7 ; A3 cross product sum << 32
shrpd S5,T3,32,S5 ; A3 carry | cross product sum >> 32
add S2,S8,S8 ; M * A0 right doubleword, P0 doubleword
add,dc S0,S10,S10 ; M * A0 left doubleword
add S3,S9,S9 ; M * A1 right doubleword
add,dc S1,S11,S11 ; M * A1 left doubleword
add S6,S12,S12 ; M * A2 right doubleword
ldd 24(pR),S3 ; Addend word 3
fstd P1,XF1 ; MR * A5L
add,dc S4,S14,S14 ; M * A2 left doubleword
xmpyu MR,A5R,P1 ; A5 right 32-bit word product
ldd 8(pR),S1 ; Addend word 1
fstd P3,XF3 ; ML * A5R
add S7,S13,S13 ; M * A3 right doubleword
xmpyu ML,A5L,P3 ; A5 left 32-bit word product
ldd 0(pR),S7 ; Addend word 0
fstd P5,XF5 ; MR * A7L
add,dc S5,S15,S15 ; M * A3 left doubleword
xmpyu MR,A7R,P5 ; A7 right 32-bit word product
ldd 16(pR),S5 ; Addend word 2
fstd P7,XF7 ; ML * A7R
add S10,S9,S9 ; P1 doubleword
xmpyu ML,A7L,P7 ; A7 left 32-bit word products
ldd XF0,S0 ; MR * A4L
fstd P1,XF9 ; MR * A5R
add,dc S11,S12,S12 ; P2 doubleword
xmpyu MR,A4R,P0 ; A4 right 32-bit word product
ldd XF2,S2 ; ML * A4R
fstd P3,XF11 ; ML * A5L
add,dc S14,S13,S13 ; P3 doubleword
xmpyu ML,A4L,P2 ; A4 left 32-bit word product
ldd XF6,S6 ; ML * A6R
fstd P5,XF13 ; MR * A7R
add,dc zero,S15,T2 ; P4 partial doubleword
xmpyu MR,A6R,P4 ; A6 right 32-bit word product
ldd XF4,S4 ; MR * A6L
fstd P7,XF15 ; ML * A7L
add S7,S8,S8 ; R0 + P0, new R0 doubleword
xmpyu ML,A6L,P6 ; A6 left 32-bit word product
fstd P0,XF0 ; MR * A4R
ldd XF7,S7 ; ML * A7R
add,dc S1,S9,S9 ; c + R1 + P1, new R1 doubleword
fstd P2,XF2 ; ML * A4L
ldd XF1,S1 ; MR * A5L
add,dc S5,S12,S12 ; c + R2 + P2, new R2 doubleword
fstd P4,XF4 ; MR * A6R
ldd XF5,S5 ; MR * A7L
add,dc S3,S13,S13 ; c + R3 + P3, new R3 doubleword
fstd P6,XF6 ; ML * A6L
ldd XF3,S3 ; ML * A5R
add,dc zero,T2,T2 ; c + partial P4
add S0,S2,T1 ; A4 cross product sum
std S8,0(pR) ; save R0
add,dc zero,zero,S0 ; A4 cross product sum carry
depd,z T1,31,32,S2 ; A4 cross product sum << 32
std S9,8(pR) ; save R1
shrpd S0,T1,32,S0 ; A4 carry | cross product sum >> 32
add S4,S6,T3 ; A6 cross product sum
std S12,16(pR) ; save R2
add,dc zero,zero,S4 ; A6 cross product sum carry
depd,z T3,31,32,S6 ; A6 cross product sum << 32
std S13,24(pR) ; save R3
shrpd S4,T3,32,S4 ; A6 carry | cross product sum >> 32
add S1,S3,T1 ; A5 cross product sum
ldd XF0,S8 ; MR * A4R
add,dc zero,zero,S1 ; A5 cross product sum carry
depd,z T1,31,32,S3 ; A5 cross product sum << 32
ldd XF2,S10 ; ML * A4L
ldd XF9,S9 ; MR * A5R
shrpd S1,T1,32,S1 ; A5 carry | cross product sum >> 32
add S5,S7,T3 ; A7 cross product sum
ldd XF4,S12 ; MR * A6R
ldd XF11,S11 ; ML * A5L
add,dc zero,zero,S5 ; A7 cross product sum carry
depd,z T3,31,32,S7 ; A7 cross product sum << 32
ldd XF6,S14 ; ML * A6L
ldd XF13,S13 ; MR * A7R
shrpd S5,T3,32,S5 ; A7 carry | cross product sum >> 32
add S2,S8,S8 ; M * A4 right doubleword
ldd XF15,S15 ; ML * A7L
add,dc S0,S10,S10 ; M * A4 left doubleword
add S3,S9,S9 ; M * A5 right doubleword
add,dc S1,S11,S11 ; M * A5 left doubleword
add S6,S12,S12 ; M * A6 right doubleword
ldd 32(pR),S0 ; Addend word 4
ldd 40(pR),S1 ; Addend word 5
add,dc S4,S14,S14 ; M * A6 left doubleword
add S7,S13,S13 ; M * A7 right doubleword
ldd 48(pR),S2 ; Addend word 6
ldd 56(pR),S3 ; Addend word 7
add,dc S5,S15,S15 ; M * A7 left doubleword
add S8,T2,S8 ; P4 doubleword
ldd 64(pR),S4 ; Addend word 8
ldd SV5,s5 ; restore s5
add,dc S10,S9,S9 ; P5 doubleword
add,dc S11,S12,S12 ; P6 doubleword
ldd SV6,s6 ; restore s6
ldd SV7,s7 ; restore s7
add,dc S14,S13,S13 ; P7 doubleword
add,dc zero,S15,S15 ; P8 doubleword
add S0,S8,S8 ; new R4 doubleword
ldd SV0,s0 ; restore s0
std S8,32(pR) ; save R4
add,dc S1,S9,S9 ; new R5 doubleword
ldd SV1,s1 ; restore s1
std S9,40(pR) ; save R5
add,dc S2,S12,S12 ; new R6 doubleword
ldd SV2,s2 ; restore s2
std S12,48(pR) ; save R6
add,dc S3,S13,S13 ; new R7 doubleword
ldd SV3,s3 ; restore s3
std S13,56(pR) ; save R7
add,dc S4,S15,S15 ; new R8 doubleword
ldd SV4,s4 ; restore s4
std S15,64(pR) ; save result[8]
add,dc zero,zero,v0 ; return carry from R8
CMPIB,*= 0,v0,$L0 ; if no overflow, exit
LDO 8(pR),pR
$FINAL1 ; Final carry propagation
LDD 64(pR),v0
LDO 8(pR),pR
ADDI 1,v0,v0
CMPIB,*= 0,v0,$FINAL1 ; Keep looping if there is a carry.
STD v0,56(pR)
$L0
bv zero(rp) ; -> caller
ldo -ST_SZ(sp),sp ; pop stack
/* ====================================================================== */
/* end of module */
/* ====================================================================== */
.LEAVE
.PROCEND
.SPACE $TEXT$
.SUBSPA $CODE$
.EXPORT multacc512,ENTRY
.end

View File

@@ -1,929 +0,0 @@
; The contents of this file are subject to the Mozilla Public
; License Version 1.1 (the "License"); you may not use this file
; except in compliance with the License. You may obtain a copy of
; the License at http://www.mozilla.org/MPL/
;
; Software distributed under the License is distributed on an "AS
; IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
; implied. See the License for the specific language governing
; rights and limitations under the License.
;
; The Original Code is MAXPY multiple-precision integer arithmetic.
;
; The Initial Developer of the Original Code is the Hewlett-Packard Company.
; Portions created by Hewlett-Packard Company are
; Copyright (C) 1997 Hewlett-Packard Company. All Rights Reserved.
;
; Contributor(s):
; coded by: William B. Ackerman
;
; Alternatively, the contents of this file may be used under the
; terms of the GNU General Public License Version 2 or later (the
; "GPL"), in which case the provisions of the GPL are applicable
; instead of those above. If you wish to allow use of your
; version of this file only under the terms of the GPL and not to
; allow others to use your version of this file under the MPL,
; indicate your decision by deleting the provisions above and
; replace them with the notice and other provisions required by
; the GPL. If you do not delete the provisions above, a recipient
; may use your version of this file under either the MPL or the
; GPL.
#ifdef __LP64__
.LEVEL 2.0W
#else
; .LEVEL 1.1
; .ALLOW 2.0N
.LEVEL 2.0N
#endif
.SPACE $TEXT$,SORT=8
.SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24
; ***************************************************************
;
; maxpy_[little/big]
;
; ***************************************************************
; There is no default -- you must specify one or the other.
#define LITTLE_WORDIAN 1
#ifdef LITTLE_WORDIAN
#define EIGHT 8
#define SIXTEEN 16
#define THIRTY_TWO 32
#define UN_EIGHT -8
#define UN_SIXTEEN -16
#define UN_TWENTY_FOUR -24
#endif
#ifdef BIG_WORDIAN
#define EIGHT -8
#define SIXTEEN -16
#define THIRTY_TWO -32
#define UN_EIGHT 8
#define UN_SIXTEEN 16
#define UN_TWENTY_FOUR 24
#endif
; This performs a multiple-precision integer version of "daxpy",
; Using the selected addressing direction. "Little-wordian" means that
; the least significant word of a number is stored at the lowest address.
; "Big-wordian" means that the most significant word is at the lowest
; address. Either way, the incoming address of the vector is that
; of the least significant word. That means that, for little-wordian
; addressing, we move the address upward as we propagate carries
; from the least significant word to the most significant. For
; big-wordian we move the address downward.
; We use the following registers:
;
; r2 return PC, of course
; r26 = arg1 = length
; r25 = arg2 = address of scalar
; r24 = arg3 = multiplicand vector
; r23 = arg4 = result vector
;
; fr9 = scalar loaded once only from r25
; The cycle counts shown in the bodies below are simply the result of a
; scheduling by hand. The actual PCX-U hardware does it differently.
; The intention is that the overall speed is the same.
; The pipeline startup and shutdown code is constructed in the usual way,
; by taking the loop bodies and removing unnecessary instructions.
; We have left the comments describing cycle numbers in the code.
; These are intended for reference when comparing with the main loop,
; and have no particular relationship to actual cycle numbers.
#ifdef LITTLE_WORDIAN
maxpy_little
#else
maxpy_big
#endif
.PROC
.CALLINFO FRAME=120,ENTRY_GR=%r4
.ENTER
; Of course, real men don't use the sissy "enter" and "leave" commands.
; They write their own stack manipulation stuff. Unfortunately,
; that doesn't generate complete unwind info, whereas "enter" and
; "leave" (if the documentation is to be believed) do so. Therefore,
; we use the sissy commands. We have verified (by real-man methods)
; that the above command generates what we want:
; STW,MA %r3,128(%sp)
; STW %r4,-124(%sp)
ADDIB,< -1,%r26,$L0 ; If N = 0, exit immediately.
FLDD 0(%r25),%fr9 ; fr9 = scalar
; First startup
FLDD 0(%r24),%fr24 ; Cycle 1
XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
CMPIB,> 3,%r26,$N_IS_SMALL ; Pick out cases N = 1, 2, or 3
XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
FLDD EIGHT(%r24),%fr28 ; Cycle 8
XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
FSTD %fr24,-96(%sp)
XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
FSTD %fr25,-80(%sp)
LDO SIXTEEN(%r24),%r24 ; Cycle 12
FSTD %fr31,-64(%sp)
XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
FSTD %fr27,-48(%sp)
; Second startup
XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
FSTD %fr30,-56(%sp)
FLDD 0(%r24),%fr24
FSTD %fr26,-88(%sp) ; Cycle 2
XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
FSTD %fr28,-104(%sp)
XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
LDD -96(%sp),%r3
FSTD %fr29,-72(%sp)
XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
LDD -64(%sp),%r19
LDD -80(%sp),%r21
XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
LDD -56(%sp),%r20
ADD %r21,%r3,%r3
ADD,DC %r20,%r19,%r19 ; Cycle 7
LDD -88(%sp),%r4
SHRPD %r3,%r0,32,%r21
LDD -48(%sp),%r1
FLDD EIGHT(%r24),%fr28 ; Cycle 8
LDD -104(%sp),%r31
ADD,DC %r0,%r0,%r20
SHRPD %r19,%r3,32,%r3
LDD -72(%sp),%r29 ; Cycle 9
SHRPD %r20,%r19,32,%r20
ADD %r21,%r1,%r1
XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
ADD,DC %r3,%r4,%r4
FSTD %fr24,-96(%sp)
XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
ADD,DC %r0,%r20,%r20
LDD 0(%r23),%r3
FSTD %fr25,-80(%sp)
LDO SIXTEEN(%r24),%r24 ; Cycle 12
FSTD %fr31,-64(%sp)
XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
ADD %r0,%r0,%r0 ; clear the carry bit
ADDIB,<= -4,%r26,$ENDLOOP ; actually happens in cycle 12
FSTD %fr27,-48(%sp)
; MFCTL %cr16,%r21 ; for timing
; STD %r21,-112(%sp)
; Here is the loop.
$LOOP XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
ADD,DC %r29,%r4,%r4
FSTD %fr30,-56(%sp)
FLDD 0(%r24),%fr24
LDO SIXTEEN(%r23),%r23 ; Cycle 2
ADD,DC %r0,%r20,%r20
FSTD %fr26,-88(%sp)
XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
ADD %r3,%r1,%r1
FSTD %fr28,-104(%sp)
LDD UN_EIGHT(%r23),%r21
XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
ADD,DC %r21,%r4,%r28
FSTD %fr29,-72(%sp)
LDD -96(%sp),%r3
XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
ADD,DC %r20,%r31,%r22
LDD -64(%sp),%r19
LDD -80(%sp),%r21
XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
ADD %r21,%r3,%r3
LDD -56(%sp),%r20
STD %r1,UN_SIXTEEN(%r23)
ADD,DC %r20,%r19,%r19 ; Cycle 7
SHRPD %r3,%r0,32,%r21
LDD -88(%sp),%r4
LDD -48(%sp),%r1
ADD,DC %r0,%r0,%r20 ; Cycle 8
SHRPD %r19,%r3,32,%r3
FLDD EIGHT(%r24),%fr28
LDD -104(%sp),%r31
SHRPD %r20,%r19,32,%r20 ; Cycle 9
ADD %r21,%r1,%r1
STD %r28,UN_EIGHT(%r23)
LDD -72(%sp),%r29
XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
ADD,DC %r3,%r4,%r4
FSTD %fr24,-96(%sp)
XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
ADD,DC %r0,%r20,%r20
FSTD %fr25,-80(%sp)
LDD 0(%r23),%r3
LDO SIXTEEN(%r24),%r24 ; Cycle 12
FSTD %fr31,-64(%sp)
XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
ADD %r22,%r1,%r1
ADDIB,> -2,%r26,$LOOP ; actually happens in cycle 12
FSTD %fr27,-48(%sp)
$ENDLOOP
; Shutdown code, first stage.
; MFCTL %cr16,%r21 ; for timing
; STD %r21,UN_SIXTEEN(%r23)
; LDD -112(%sp),%r21
; STD %r21,UN_EIGHT(%r23)
XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
ADD,DC %r29,%r4,%r4
CMPIB,= 0,%r26,$ONEMORE
FSTD %fr30,-56(%sp)
LDO SIXTEEN(%r23),%r23 ; Cycle 2
ADD,DC %r0,%r20,%r20
FSTD %fr26,-88(%sp)
ADD %r3,%r1,%r1 ; Cycle 3
FSTD %fr28,-104(%sp)
LDD UN_EIGHT(%r23),%r21
ADD,DC %r21,%r4,%r28 ; Cycle 4
FSTD %fr29,-72(%sp)
STD %r28,UN_EIGHT(%r23) ; moved up from cycle 9
LDD -96(%sp),%r3
ADD,DC %r20,%r31,%r22 ; Cycle 5
STD %r1,UN_SIXTEEN(%r23)
$JOIN4
LDD -64(%sp),%r19
LDD -80(%sp),%r21
ADD %r21,%r3,%r3 ; Cycle 6
LDD -56(%sp),%r20
ADD,DC %r20,%r19,%r19 ; Cycle 7
SHRPD %r3,%r0,32,%r21
LDD -88(%sp),%r4
LDD -48(%sp),%r1
ADD,DC %r0,%r0,%r20 ; Cycle 8
SHRPD %r19,%r3,32,%r3
LDD -104(%sp),%r31
SHRPD %r20,%r19,32,%r20 ; Cycle 9
ADD %r21,%r1,%r1
LDD -72(%sp),%r29
ADD,DC %r3,%r4,%r4 ; Cycle 10
ADD,DC %r0,%r20,%r20 ; Cycle 11
LDD 0(%r23),%r3
ADD %r22,%r1,%r1 ; Cycle 13
; Shutdown code, second stage.
ADD,DC %r29,%r4,%r4 ; Cycle 1
LDO SIXTEEN(%r23),%r23 ; Cycle 2
ADD,DC %r0,%r20,%r20
LDD UN_EIGHT(%r23),%r21 ; Cycle 3
ADD %r3,%r1,%r1
ADD,DC %r21,%r4,%r28 ; Cycle 4
ADD,DC %r20,%r31,%r22 ; Cycle 5
STD %r1,UN_SIXTEEN(%r23); Cycle 6
STD %r28,UN_EIGHT(%r23) ; Cycle 9
LDD 0(%r23),%r3 ; Cycle 11
; Shutdown code, third stage.
LDO SIXTEEN(%r23),%r23
ADD %r3,%r22,%r1
$JOIN1 ADD,DC %r0,%r0,%r21
CMPIB,*= 0,%r21,$L0 ; if no overflow, exit
STD %r1,UN_SIXTEEN(%r23)
; Final carry propagation
$FINAL1 LDO EIGHT(%r23),%r23
LDD UN_SIXTEEN(%r23),%r21
ADDI 1,%r21,%r21
CMPIB,*= 0,%r21,$FINAL1 ; Keep looping if there is a carry.
STD %r21,UN_SIXTEEN(%r23)
B $L0
NOP
; Here is the code that handles the difficult cases N=1, N=2, and N=3.
; We do the usual trick -- branch out of the startup code at appropriate
; points, and branch into the shutdown code.
$N_IS_SMALL
CMPIB,= 0,%r26,$N_IS_ONE
FSTD %fr24,-96(%sp) ; Cycle 10
FLDD EIGHT(%r24),%fr28 ; Cycle 8
XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
FSTD %fr25,-80(%sp)
FSTD %fr31,-64(%sp) ; Cycle 12
XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
FSTD %fr27,-48(%sp)
XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
CMPIB,= 2,%r26,$N_IS_THREE
FSTD %fr30,-56(%sp)
; N = 2
FSTD %fr26,-88(%sp) ; Cycle 2
FSTD %fr28,-104(%sp) ; Cycle 3
LDD -96(%sp),%r3 ; Cycle 4
FSTD %fr29,-72(%sp)
B $JOIN4
ADD %r0,%r0,%r22
$N_IS_THREE
FLDD SIXTEEN(%r24),%fr24
FSTD %fr26,-88(%sp) ; Cycle 2
XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
FSTD %fr28,-104(%sp)
XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
LDD -96(%sp),%r3
FSTD %fr29,-72(%sp)
XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
LDD -64(%sp),%r19
LDD -80(%sp),%r21
B $JOIN3
ADD %r0,%r0,%r22
$N_IS_ONE
FSTD %fr25,-80(%sp)
FSTD %fr27,-48(%sp)
FSTD %fr26,-88(%sp) ; Cycle 2
B $JOIN5
ADD %r0,%r0,%r22
; We came out of the unrolled loop with wrong parity. Do one more
; single cycle. This is quite tricky, because of the way the
; carry chains and SHRPD chains have been chopped up.
$ONEMORE
FLDD 0(%r24),%fr24
LDO SIXTEEN(%r23),%r23 ; Cycle 2
ADD,DC %r0,%r20,%r20
FSTD %fr26,-88(%sp)
XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
FSTD %fr28,-104(%sp)
LDD UN_EIGHT(%r23),%r21
ADD %r3,%r1,%r1
XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
ADD,DC %r21,%r4,%r28
STD %r28,UN_EIGHT(%r23) ; moved from cycle 9
LDD -96(%sp),%r3
FSTD %fr29,-72(%sp)
XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
ADD,DC %r20,%r31,%r22
LDD -64(%sp),%r19
LDD -80(%sp),%r21
STD %r1,UN_SIXTEEN(%r23); Cycle 6
$JOIN3
XMPYU %fr9L,%fr24R,%fr24
LDD -56(%sp),%r20
ADD %r21,%r3,%r3
ADD,DC %r20,%r19,%r19 ; Cycle 7
LDD -88(%sp),%r4
SHRPD %r3,%r0,32,%r21
LDD -48(%sp),%r1
LDD -104(%sp),%r31 ; Cycle 8
ADD,DC %r0,%r0,%r20
SHRPD %r19,%r3,32,%r3
LDD -72(%sp),%r29 ; Cycle 9
SHRPD %r20,%r19,32,%r20
ADD %r21,%r1,%r1
ADD,DC %r3,%r4,%r4 ; Cycle 10
FSTD %fr24,-96(%sp)
ADD,DC %r0,%r20,%r20 ; Cycle 11
LDD 0(%r23),%r3
FSTD %fr25,-80(%sp)
ADD %r22,%r1,%r1 ; Cycle 13
FSTD %fr27,-48(%sp)
; Shutdown code, stage 1-1/2.
ADD,DC %r29,%r4,%r4 ; Cycle 1
LDO SIXTEEN(%r23),%r23 ; Cycle 2
ADD,DC %r0,%r20,%r20
FSTD %fr26,-88(%sp)
LDD UN_EIGHT(%r23),%r21 ; Cycle 3
ADD %r3,%r1,%r1
ADD,DC %r21,%r4,%r28 ; Cycle 4
STD %r28,UN_EIGHT(%r23) ; moved from cycle 9
ADD,DC %r20,%r31,%r22 ; Cycle 5
STD %r1,UN_SIXTEEN(%r23)
$JOIN5
LDD -96(%sp),%r3 ; moved from cycle 4
LDD -80(%sp),%r21
ADD %r21,%r3,%r3 ; Cycle 6
ADD,DC %r0,%r0,%r19 ; Cycle 7
LDD -88(%sp),%r4
SHRPD %r3,%r0,32,%r21
LDD -48(%sp),%r1
SHRPD %r19,%r3,32,%r3 ; Cycle 8
ADD %r21,%r1,%r1 ; Cycle 9
ADD,DC %r3,%r4,%r4 ; Cycle 10
LDD 0(%r23),%r3 ; Cycle 11
ADD %r22,%r1,%r1 ; Cycle 13
; Shutdown code, stage 2-1/2.
ADD,DC %r0,%r4,%r4 ; Cycle 1
LDO SIXTEEN(%r23),%r23 ; Cycle 2
LDD UN_EIGHT(%r23),%r21 ; Cycle 3
ADD %r3,%r1,%r1
STD %r1,UN_SIXTEEN(%r23)
ADD,DC %r21,%r4,%r1
B $JOIN1
LDO EIGHT(%r23),%r23
; exit
$L0
.LEAVE
; We have verified that the above command generates what we want:
; LDW -124(%sp),%r4
; BVE (%r2)
; LDW,MB -128(%sp),%r3
.PROCEND
; ***************************************************************
;
; add_diag_[little/big]
;
; ***************************************************************
; The arguments are as follows:
; r2 return PC, of course
; r26 = arg1 = length
; r25 = arg2 = vector to square
; r24 = arg3 = result vector
#ifdef LITTLE_WORDIAN
add_diag_little
#else
add_diag_big
#endif
.PROC
.CALLINFO FRAME=120,ENTRY_GR=%r4
.ENTER
ADDIB,< -1,%r26,$Z0 ; If N=0, exit immediately.
NOP
; Startup code
FLDD 0(%r25),%fr7 ; Cycle 2 (alternate body)
XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4
XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5
XMPYU %fr7L,%fr7L,%fr30
LDO SIXTEEN(%r25),%r25 ; Cycle 6
FSTD %fr29,-88(%sp)
FSTD %fr27,-72(%sp) ; Cycle 7
CMPIB,= 0,%r26,$DIAG_N_IS_ONE ; Cycle 1 (main body)
FSTD %fr30,-96(%sp)
FLDD UN_EIGHT(%r25),%fr7 ; Cycle 2
LDD -88(%sp),%r22 ; Cycle 3
LDD -72(%sp),%r31 ; Cycle 4
XMPYU %fr7R,%fr7R,%fr28
XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5
XMPYU %fr7L,%fr7L,%fr31
LDD -96(%sp),%r20 ; Cycle 6
FSTD %fr28,-80(%sp)
ADD %r0,%r0,%r0 ; clear the carry bit
ADDIB,<= -2,%r26,$ENDDIAGLOOP ; Cycle 7
FSTD %fr24,-64(%sp)
; Here is the loop. It is unrolled twice, modelled after the "alternate body" and then the "main body".
$DIAGLOOP
SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body)
LDO SIXTEEN(%r25),%r25
LDD 0(%r24),%r1
FSTD %fr31,-104(%sp)
SHRPD %r0,%r31,31,%r4 ; Cycle 2
ADD,DC %r22,%r3,%r3
FLDD UN_SIXTEEN(%r25),%fr7
ADD,DC %r0,%r20,%r20 ; Cycle 3
ADD %r1,%r3,%r3
XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4
LDD -80(%sp),%r21
STD %r3,0(%r24)
XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5
XMPYU %fr7L,%fr7L,%fr30
LDD -64(%sp),%r29
LDD EIGHT(%r24),%r1
ADD,DC %r4,%r20,%r20 ; Cycle 6
LDD -104(%sp),%r19
FSTD %fr29,-88(%sp)
ADD %r20,%r1,%r1 ; Cycle 7
FSTD %fr27,-72(%sp)
SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
LDO THIRTY_TWO(%r24),%r24
LDD UN_SIXTEEN(%r24),%r28
FSTD %fr30,-96(%sp)
SHRPD %r0,%r29,31,%r3 ; Cycle 2
ADD,DC %r21,%r4,%r4
FLDD UN_EIGHT(%r25),%fr7
STD %r1,UN_TWENTY_FOUR(%r24)
ADD,DC %r0,%r19,%r19 ; Cycle 3
ADD %r28,%r4,%r4
XMPYU %fr7R,%fr7R,%fr28 ; Cycle 4
LDD -88(%sp),%r22
STD %r4,UN_SIXTEEN(%r24)
XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5
XMPYU %fr7L,%fr7L,%fr31
LDD -72(%sp),%r31
LDD UN_EIGHT(%r24),%r28
ADD,DC %r3,%r19,%r19 ; Cycle 6
LDD -96(%sp),%r20
FSTD %fr28,-80(%sp)
ADD %r19,%r28,%r28 ; Cycle 7
FSTD %fr24,-64(%sp)
ADDIB,> -2,%r26,$DIAGLOOP ; Cycle 8
STD %r28,UN_EIGHT(%r24)
$ENDDIAGLOOP
ADD,DC %r0,%r22,%r22
CMPIB,= 0,%r26,$ONEMOREDIAG
SHRPD %r31,%r0,31,%r3
; Shutdown code, first stage.
FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body)
LDD 0(%r24),%r28
SHRPD %r0,%r31,31,%r4 ; Cycle 2
ADD %r3,%r22,%r3
ADD,DC %r0,%r20,%r20 ; Cycle 3
LDD -80(%sp),%r21
ADD %r3,%r28,%r3
LDD -64(%sp),%r29 ; Cycle 4
STD %r3,0(%r24)
LDD EIGHT(%r24),%r1 ; Cycle 5
LDO SIXTEEN(%r25),%r25 ; Cycle 6
LDD -104(%sp),%r19
ADD,DC %r4,%r20,%r20
ADD %r20,%r1,%r1 ; Cycle 7
ADD,DC %r0,%r21,%r21 ; Cycle 8
STD %r1,EIGHT(%r24)
; Shutdown code, second stage.
SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
LDO THIRTY_TWO(%r24),%r24
LDD UN_SIXTEEN(%r24),%r1
SHRPD %r0,%r29,31,%r3 ; Cycle 2
ADD %r4,%r21,%r4
ADD,DC %r0,%r19,%r19 ; Cycle 3
ADD %r4,%r1,%r4
STD %r4,UN_SIXTEEN(%r24); Cycle 4
LDD UN_EIGHT(%r24),%r28 ; Cycle 5
ADD,DC %r3,%r19,%r19 ; Cycle 6
ADD %r19,%r28,%r28 ; Cycle 7
ADD,DC %r0,%r0,%r22 ; Cycle 8
CMPIB,*= 0,%r22,$Z0 ; if no overflow, exit
STD %r28,UN_EIGHT(%r24)
; Final carry propagation
$FDIAG2
LDO EIGHT(%r24),%r24
LDD UN_EIGHT(%r24),%r26
ADDI 1,%r26,%r26
CMPIB,*= 0,%r26,$FDIAG2 ; Keep looping if there is a carry.
STD %r26,UN_EIGHT(%r24)
B $Z0
NOP
; Here is the code that handles the difficult case N=1.
; We do the usual trick -- branch out of the startup code at appropriate
; points, and branch into the shutdown code.
$DIAG_N_IS_ONE
LDD -88(%sp),%r22
LDD -72(%sp),%r31
B $JOINDIAG
LDD -96(%sp),%r20
; We came out of the unrolled loop with wrong parity. Do one more
; single cycle. This is the "alternate body". It will, of course,
; give us opposite registers from the other case, so we need
; completely different shutdown code.
$ONEMOREDIAG
FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body)
LDD 0(%r24),%r28
FLDD 0(%r25),%fr7 ; Cycle 2
SHRPD %r0,%r31,31,%r4
ADD %r3,%r22,%r3
ADD,DC %r0,%r20,%r20 ; Cycle 3
LDD -80(%sp),%r21
ADD %r3,%r28,%r3
LDD -64(%sp),%r29 ; Cycle 4
STD %r3,0(%r24)
XMPYU %fr7R,%fr7R,%fr29
LDD EIGHT(%r24),%r1 ; Cycle 5
XMPYU %fr7L,%fr7R,%fr27
XMPYU %fr7L,%fr7L,%fr30
LDD -104(%sp),%r19 ; Cycle 6
FSTD %fr29,-88(%sp)
ADD,DC %r4,%r20,%r20
FSTD %fr27,-72(%sp) ; Cycle 7
ADD %r20,%r1,%r1
ADD,DC %r0,%r21,%r21 ; Cycle 8
STD %r1,EIGHT(%r24)
; Shutdown code, first stage.
SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
LDO THIRTY_TWO(%r24),%r24
FSTD %fr30,-96(%sp)
LDD UN_SIXTEEN(%r24),%r1
SHRPD %r0,%r29,31,%r3 ; Cycle 2
ADD %r4,%r21,%r4
ADD,DC %r0,%r19,%r19 ; Cycle 3
LDD -88(%sp),%r22
ADD %r4,%r1,%r4
LDD -72(%sp),%r31 ; Cycle 4
STD %r4,UN_SIXTEEN(%r24)
LDD UN_EIGHT(%r24),%r28 ; Cycle 5
LDD -96(%sp),%r20 ; Cycle 6
ADD,DC %r3,%r19,%r19
ADD %r19,%r28,%r28 ; Cycle 7
ADD,DC %r0,%r22,%r22 ; Cycle 8
STD %r28,UN_EIGHT(%r24)
; Shutdown code, second stage.
$JOINDIAG
SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body)
LDD 0(%r24),%r28
SHRPD %r0,%r31,31,%r4 ; Cycle 2
ADD %r3,%r22,%r3
ADD,DC %r0,%r20,%r20 ; Cycle 3
ADD %r3,%r28,%r3
STD %r3,0(%r24) ; Cycle 4
LDD EIGHT(%r24),%r1 ; Cycle 5
ADD,DC %r4,%r20,%r20
ADD %r20,%r1,%r1 ; Cycle 7
ADD,DC %r0,%r0,%r21 ; Cycle 8
CMPIB,*= 0,%r21,$Z0 ; if no overflow, exit
STD %r1,EIGHT(%r24)
; Final carry propagation
$FDIAG1
LDO EIGHT(%r24),%r24
LDD EIGHT(%r24),%r26
ADDI 1,%r26,%r26
CMPIB,*= 0,%r26,$FDIAG1 ; Keep looping if there is a carry.
STD %r26,EIGHT(%r24)
$Z0
.LEAVE
.PROCEND
; .ALLOW
.SPACE $TEXT$
.SUBSPA $CODE$
#ifdef LITTLE_WORDIAN
.EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
.EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
#else
.EXPORT maxpy_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
.EXPORT add_diag_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
#endif
.END
; How to use "maxpy_PA20_little" and "maxpy_PA20_big"
;
; The routine "maxpy_PA20_little" or "maxpy_PA20_big"
; performs a 64-bit x any-size multiply, and adds the
; result to an area of memory. That is, it performs
; something like
;
; A B C D
; * Z
; __________
; P Q R S T
;
; and then adds the "PQRST" vector into an area of memory,
; handling all carries.
;
; Digression on nomenclature and endian-ness:
;
; Each of the capital letters in the above represents a 64-bit
; quantity. That is, you could think of the discussion as
; being in terms of radix-16-quintillion arithmetic. The data
; type being manipulated is "unsigned long long int". This
; requires the 64-bit extension of the HP-UX C compiler,
; available at release 10. You need these compiler flags to
; enable these extensions:
;
; -Aa +e +DA2.0 +DS2.0
;
; (The first specifies ANSI C, the second enables the
; extensions, which are beyond ANSI C, and the third and
; fourth tell the compiler to use whatever features of the
; PA2.0 architecture it wishes, in order to made the code more
; efficient. Since the presence of the assembly code will
; make the program unable to run on anything less than PA2.0,
; you might as well gain the performance enhancements in the C
; code as well.)
;
; Questions of "endian-ness" often come up, usually in the
; context of byte ordering in a word. These routines have a
; similar issue, that could be called "wordian-ness".
; Independent of byte ordering (PA is always big-endian), one
; can make two choices when representing extremely large
; numbers as arrays of 64-bit doublewords in memory.
;
; "Little-wordian" layout means that the least significant
; word of a number is stored at the lowest address.
;
; MSW LSW
; | |
; V V
;
; A B C D E
;
; ^ ^ ^
; | | |____ address 0
; | |
; | |_______address 8
; |
; address 32
;
; "Big-wordian" means that the most significant word is at the
; lowest address.
;
; MSW LSW
; | |
; V V
;
; A B C D E
;
; ^ ^ ^
; | | |____ address 32
; | |
; | |_______address 24
; |
; address 0
;
; When you compile the file, you must specify one or the other, with
; a switch "-DLITTLE_WORDIAN" or "-DBIG_WORDIAN".
;
; Incidentally, you assemble this file as part of your
; project with the same C compiler as the rest of the program.
; My "makefile" for a superprecision arithmetic package has
; the following stuff:
;
; # definitions:
; CC = cc -Aa +e -z +DA2.0 +DS2.0 +w1
; CFLAGS = +O3
; LDFLAGS = -L /usr/lib -Wl,-aarchive
;
; # general build rule for ".s" files:
; .s.o:
; $(CC) $(CFLAGS) -c $< -DBIG_WORDIAN
;
; # Now any bind step that calls for pa20.o will assemble pa20.s
;
; End of digression, back to arithmetic:
;
; The way we multiply two huge numbers is, of course, to multiply
; the "ABCD" vector by each of the "WXYZ" doublewords, adding
; the result vectors with increasing offsets, the way we learned
; in school, back before we all used calculators:
;
; A B C D
; * W X Y Z
; __________
; P Q R S T
; E F G H I
; M N O P Q
; + R S T U V
; _______________
; F I N A L S U M
;
; So we call maxpy_PA20_big (in my case; my package is
; big-wordian) repeatedly, giving the W, X, Y, and Z arguments
; in turn as the "scalar", and giving the "ABCD" vector each
; time. We direct it to add its result into an area of memory
; that we have cleared at the start. We skew the exact
; location into that area with each call.
;
; The prototype for the function is
;
; extern void maxpy_PA20_big(
; int length, /* Number of doublewords in the multiplicand vector. */
; const long long int *scalaraddr, /* Address to fetch the scalar. */
; const long long int *multiplicand, /* The multiplicand vector. */
; long long int *result); /* Where to accumulate the result. */
;
; (You should place a copy of this prototype in an include file
; or in your C file.)
;
; Now, IN ALL CASES, the given address for the multiplicand or
; the result is that of the LEAST SIGNIFICANT DOUBLEWORD.
; That word is, of course, the word at which the routine
; starts processing. "maxpy_PA20_little" then increases the
; addresses as it computes. "maxpy_PA20_big" decreases them.
;
; In our example above, "length" would be 4 in each case.
; "multiplicand" would be the "ABCD" vector. Specifically,
; the address of the element "D". "scalaraddr" would be the
; address of "W", "X", "Y", or "Z" on the four calls that we
; would make. (The order doesn't matter, of course.)
; "result" would be the appropriate address in the result
; area. When multiplying by "Z", that would be the least
; significant word. When multiplying by "Y", it would be the
; next higher word (8 bytes higher if little-wordian; 8 bytes
; lower if big-wordian), and so on. The size of the result
; area must be the the sum of the sizes of the multiplicand
; and multiplier vectors, and must be initialized to zero
; before we start.
;
; Whenever the routine adds its partial product into the result
; vector, it follows carry chains as far as they need to go.
;
; Here is the super-precision multiply routine that I use for
; my package. The package is big-wordian. I have taken out
; handling of exponents (it's a floating point package):
;
; static void mul_PA20(
; int size,
; const long long int *arg1,
; const long long int *arg2,
; long long int *result)
; {
; int i;
;
; for (i=0 ; i<2*size ; i++) result[i] = 0ULL;
;
; for (i=0 ; i<size ; i++) {
; maxpy_PA20_big(size, &arg2[i], &arg1[size-1], &result[size+i]);
; }
; }

View File

@@ -1,54 +0,0 @@
#/bin/sh
#
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is script to change the system id in an object file from PA-RISC 2.0 to 1.1.
#
# The Initial Developer of the Original Code is
# Hewlett-Packard Company.
# Portions created by the Initial Developer are Copyright (C) 1999
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# wrapped by Dennis Handly on Tue Mar 23 15:23:43 1999
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
# script to change the system id in an object file from PA-RISC 2.0 to 1.1
adb -w $1 << EOF
?m 0 -1 0
0x0?X
0x0?W (@0x0&~0x40000)|(~@0x0&0x40000)
0?"change checksum"
0x7c?X
0x7c?W (@0x7c&~0x40000)|(~@0x7c&0x40000)
$q
EOF
exit 0

View File

@@ -1,62 +0,0 @@
/*
* logtab.h
*
* Arbitrary precision integer arithmetic library
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
*
* The Initial Developer of the Original Code is
* Michael J. Fromberger.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* $Id: logtab.h,v 1.5 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
const float s_logv_2[] = {
0.000000000f, 0.000000000f, 1.000000000f, 0.630929754f, /* 0 1 2 3 */
0.500000000f, 0.430676558f, 0.386852807f, 0.356207187f, /* 4 5 6 7 */
0.333333333f, 0.315464877f, 0.301029996f, 0.289064826f, /* 8 9 10 11 */
0.278942946f, 0.270238154f, 0.262649535f, 0.255958025f, /* 12 13 14 15 */
0.250000000f, 0.244650542f, 0.239812467f, 0.235408913f, /* 16 17 18 19 */
0.231378213f, 0.227670249f, 0.224243824f, 0.221064729f, /* 20 21 22 23 */
0.218104292f, 0.215338279f, 0.212746054f, 0.210309918f, /* 24 25 26 27 */
0.208014598f, 0.205846832f, 0.203795047f, 0.201849087f, /* 28 29 30 31 */
0.200000000f, 0.198239863f, 0.196561632f, 0.194959022f, /* 32 33 34 35 */
0.193426404f, 0.191958720f, 0.190551412f, 0.189200360f, /* 36 37 38 39 */
0.187901825f, 0.186652411f, 0.185449023f, 0.184288833f, /* 40 41 42 43 */
0.183169251f, 0.182087900f, 0.181042597f, 0.180031327f, /* 44 45 46 47 */
0.179052232f, 0.178103594f, 0.177183820f, 0.176291434f, /* 48 49 50 51 */
0.175425064f, 0.174583430f, 0.173765343f, 0.172969690f, /* 52 53 54 55 */
0.172195434f, 0.171441601f, 0.170707280f, 0.169991616f, /* 56 57 58 59 */
0.169293808f, 0.168613099f, 0.167948779f, 0.167300179f, /* 60 61 62 63 */
0.166666667f
};

View File

@@ -1,64 +0,0 @@
#!/usr/linguist/bin/perl
#
# make-logtab
#
# Generate a table of logarithms of 2 in various bases, for use in
# estimating the output sizes of various bases.
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic
# library.
#
# The Initial Developer of the Original Code is
# Michael J. Fromberger <sting@linguist.dartmouth.edu>
# Portions created by the Initial Developer are Copyright (C) 1998, 2000
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
# $Id: make-logtab,v 1.4 2005-02-02 22:28:22 gerv%gerv.net Exp $
$ARRAYNAME = $ENV{'ARRAYNAME'} || "s_logv_2";
$ARRAYTYPE = $ENV{'ARRAYTYPE'} || "float";
printf("const %s %s[] = {\n %0.9ff, %0.9ff, ",
$ARRAYTYPE, $ARRAYNAME, 0, 0);
$brk = 2;
for($ix = 2; $ix < 64; $ix++) {
printf("%0.9ff, ", (log(2)/log($ix)));
$brk = ($brk + 1) & 3;
if(!$brk) {
printf(" /* %2d %2d %2d %2d */\n ",
$ix - 3, $ix - 2, $ix - 1, $ix);
}
}
printf("%0.9ff\n};\n\n", (log(2)/log($ix)));
exit 0;

View File

@@ -1,133 +0,0 @@
#!/usr/linguist/bin/perl
#
# make-test-arrays
#
# Given a test-arrays file, which specifies the test suite names, the
# names of the functions which perform those test suites, and
# descriptive comments, this script generates C structures for the
# mpi-test program. The input consists of lines of the form:
#
# suite-name:function-name:comment
#
# The output is written to the standard output. Blank lines are
# ignored, and comments beginning with '#' are stripped.
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
#
# The Initial Developer of the Original Code is
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
# $Id: make-test-arrays,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
#
# Read parameters from the environment, if available
$NAMEVAR = $ENV{'NAMEVAR'} || "g_names";
$COUNTVAR = $ENV{'COUNTVAR'} || "g_count";
$FUNCVAR = $ENV{'FUNCVAR'} || "g_tests";
$DESCVAR = $ENV{'DESCVAR'} || "g_descs";
$FUNCLEN = 13;
$NAMELEN = 18;
$DESCLEN = 45;
#------------------------------------------------------------------------
# Suck in input from the files on the command line, or standard input
while(<>) {
chomp;
s/\#.*$//;
next if /^\s*$/;
($suite, $func, $desc) = split(/:/, $_);
$tmp = { "suite" => $suite,
"func" => $func,
"desc" => $desc };
push(@item, $tmp);
}
$count = scalar(@item);
$last = pop(@item);
#------------------------------------------------------------------------
# Output the table of names
print "/* Table mapping test suite names to index numbers */\n";
printf("const int %s = %d;\n", $COUNTVAR, $count);
printf("const char *%s[] = {\n", $NAMEVAR);
foreach $elt (@item) {
printf(" \"%s\",%s/* %s%s */\n", $elt->{"suite"},
" " x ($NAMELEN - length($elt->{"suite"})),
$elt->{"desc"},
" " x ($DESCLEN - length($elt->{"desc"})));
}
printf(" \"%s\" %s/* %s%s */\n", $last->{"suite"},
" " x ($NAMELEN - length($last->{"suite"})),
$last->{"desc"},
" " x ($DESCLEN - length($last->{"desc"})));
print "};\n\n";
#------------------------------------------------------------------------
# Output the driver function prototypes
print "/* Test function prototypes */\n";
foreach $elt (@item, $last) {
printf("int %s(void);\n", $elt->{"func"});
}
print "\n";
#------------------------------------------------------------------------
# Output the table of functions
print "/* Table mapping index numbers to functions */\n";
printf("int (*%s[])(void) = {\n ", $FUNCVAR);
$brk = 0;
foreach $elt (@item) {
print($elt->{"func"}, ", ",
" " x ($FUNCLEN - length($elt->{"func"})));
$brk = ($brk + 1) & 3;
print "\n " unless($brk);
}
print $last->{"func"}, "\n};\n\n";
#------------------------------------------------------------------------
# Output the table of descriptions
print "/* Table mapping index numbers to descriptions */\n";
printf("const char *%s[] = {\n", $DESCVAR);
foreach $elt (@item) {
printf(" \"%s\",\n", $elt->{"desc"});
}
printf(" \"%s\"\n};\n\n", $last->{"desc"});
exit 0;

View File

@@ -1,342 +0,0 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the Netscape security libraries.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2000
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <time.h>
#include "mpi.h"
#include "mpi-priv.h"
/* #define OLD_WAY 1 */
/* This key is the 1024-bit test key used for speed testing of RSA private
** key ops.
*/
#define CONST const
static CONST unsigned char default_n[128] = {
0xc2,0xae,0x96,0x89,0xaf,0xce,0xd0,0x7b,0x3b,0x35,0xfd,0x0f,0xb1,0xf4,0x7a,0xd1,
0x3c,0x7d,0xb5,0x86,0xf2,0x68,0x36,0xc9,0x97,0xe6,0x82,0x94,0x86,0xaa,0x05,0x39,
0xec,0x11,0x51,0xcc,0x5c,0xa1,0x59,0xba,0x29,0x18,0xf3,0x28,0xf1,0x9d,0xe3,0xae,
0x96,0x5d,0x6d,0x87,0x73,0xf6,0xf6,0x1f,0xd0,0x2d,0xfb,0x2f,0x7a,0x13,0x7f,0xc8,
0x0c,0x7a,0xe9,0x85,0xfb,0xce,0x74,0x86,0xf8,0xef,0x2f,0x85,0x37,0x73,0x0f,0x62,
0x4e,0x93,0x17,0xb7,0x7e,0x84,0x9a,0x94,0x11,0x05,0xca,0x0d,0x31,0x4b,0x2a,0xc8,
0xdf,0xfe,0xe9,0x0c,0x13,0xc7,0xf2,0xad,0x19,0x64,0x28,0x3c,0xb5,0x6a,0xc8,0x4b,
0x79,0xea,0x7c,0xce,0x75,0x92,0x45,0x3e,0xa3,0x9d,0x64,0x6f,0x04,0x69,0x19,0x17
};
static CONST unsigned char default_d[128] = {
0x13,0xcb,0xbc,0xf2,0xf3,0x35,0x8c,0x6d,0x7b,0x6f,0xd9,0xf3,0xa6,0x9c,0xbd,0x80,
0x59,0x2e,0x4f,0x2f,0x11,0xa7,0x17,0x2b,0x18,0x8f,0x0f,0xe8,0x1a,0x69,0x5f,0x6e,
0xac,0x5a,0x76,0x7e,0xd9,0x4c,0x6e,0xdb,0x47,0x22,0x8a,0x57,0x37,0x7a,0x5e,0x94,
0x7a,0x25,0xb5,0xe5,0x78,0x1d,0x3c,0x99,0xaf,0x89,0x7d,0x69,0x2e,0x78,0x9d,0x1d,
0x84,0xc8,0xc1,0xd7,0x1a,0xb2,0x6d,0x2d,0x8a,0xd9,0xab,0x6b,0xce,0xae,0xb0,0xa0,
0x58,0x55,0xad,0x5c,0x40,0x8a,0xd6,0x96,0x08,0x8a,0xe8,0x63,0xe6,0x3d,0x6c,0x20,
0x49,0xc7,0xaf,0x0f,0x25,0x73,0xd3,0x69,0x43,0x3b,0xf2,0x32,0xf8,0x3d,0x5e,0xee,
0x7a,0xca,0xd6,0x94,0x55,0xe5,0xbd,0x25,0x34,0x8d,0x63,0x40,0xb5,0x8a,0xc3,0x01
};
#define DEFAULT_ITERS 50
typedef clock_t timetype;
#define gettime(x) *(x) = clock()
#define subtime(a, b) a -= b
#define msec(x) ((clock_t)((double)x * 1000.0 / CLOCKS_PER_SEC))
#define sec(x) (x / CLOCKS_PER_SEC)
struct TimingContextStr {
timetype start;
timetype end;
timetype interval;
int minutes;
int seconds;
int millisecs;
};
typedef struct TimingContextStr TimingContext;
TimingContext *CreateTimingContext(void)
{
return (TimingContext *)malloc(sizeof(TimingContext));
}
void DestroyTimingContext(TimingContext *ctx)
{
free(ctx);
}
void TimingBegin(TimingContext *ctx)
{
gettime(&ctx->start);
}
static void timingUpdate(TimingContext *ctx)
{
ctx->millisecs = msec(ctx->interval) % 1000;
ctx->seconds = sec(ctx->interval);
ctx->minutes = ctx->seconds / 60;
ctx->seconds %= 60;
}
void TimingEnd(TimingContext *ctx)
{
gettime(&ctx->end);
ctx->interval = ctx->end;
subtime(ctx->interval, ctx->start);
timingUpdate(ctx);
}
char *TimingGenerateString(TimingContext *ctx)
{
static char sBuf[4096];
sprintf(sBuf, "%d minutes, %d.%03d seconds", ctx->minutes,
ctx->seconds, ctx->millisecs);
return sBuf;
}
static void
dumpBytes( unsigned char * b, int l)
{
int i;
if (l <= 0)
return;
for (i = 0; i < l; ++i) {
if (i % 16 == 0)
printf("\t");
printf(" %02x", b[i]);
if (i % 16 == 15)
printf("\n");
}
if ((i % 16) != 0)
printf("\n");
printf("\n");
}
static mp_err
testNewFuncs(const unsigned char * modulusBytes, int modulus_len)
{
mp_err mperr = MP_OKAY;
mp_int modulus;
unsigned char buf[512];
mperr = mp_init(&modulus);
mperr = mp_read_unsigned_octets(&modulus, modulusBytes, modulus_len );
mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len);
mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len+1);
mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len+4);
mperr = mp_to_unsigned_octets(&modulus, buf, modulus_len);
mperr = mp_to_signed_octets(&modulus, buf, modulus_len + 1);
mp_clear(&modulus);
return mperr;
}
int
testModExp( const unsigned char * modulusBytes,
const unsigned int expo,
const unsigned char * input,
unsigned char * output,
int modulus_len)
{
mp_err mperr = MP_OKAY;
mp_int modulus;
mp_int base;
mp_int exponent;
mp_int result;
mperr = mp_init(&modulus);
mperr += mp_init(&base);
mperr += mp_init(&exponent);
mperr += mp_init(&result);
/* we initialize all mp_ints unconditionally, even if some fail.
** This guarantees that the DIGITS pointer is valid (even if null).
** So, mp_clear will do the right thing below.
*/
if (mperr == MP_OKAY) {
mperr = mp_read_unsigned_octets(&modulus,
modulusBytes + (sizeof default_n - modulus_len), modulus_len );
mperr += mp_read_unsigned_octets(&base, input, modulus_len );
mp_set(&exponent, expo);
if (mperr == MP_OKAY) {
#if OLD_WAY
mperr = s_mp_exptmod(&base, &exponent, &modulus, &result);
#else
mperr = mp_exptmod(&base, &exponent, &modulus, &result);
#endif
if (mperr == MP_OKAY) {
mperr = mp_to_fixlen_octets(&result, output, modulus_len);
}
}
}
mp_clear(&base);
mp_clear(&result);
mp_clear(&modulus);
mp_clear(&exponent);
return (int)mperr;
}
int
doModExp( const unsigned char * modulusBytes,
const unsigned char * exponentBytes,
const unsigned char * input,
unsigned char * output,
int modulus_len)
{
mp_err mperr = MP_OKAY;
mp_int modulus;
mp_int base;
mp_int exponent;
mp_int result;
mperr = mp_init(&modulus);
mperr += mp_init(&base);
mperr += mp_init(&exponent);
mperr += mp_init(&result);
/* we initialize all mp_ints unconditionally, even if some fail.
** This guarantees that the DIGITS pointer is valid (even if null).
** So, mp_clear will do the right thing below.
*/
if (mperr == MP_OKAY) {
mperr = mp_read_unsigned_octets(&modulus,
modulusBytes + (sizeof default_n - modulus_len), modulus_len );
mperr += mp_read_unsigned_octets(&exponent, exponentBytes, modulus_len );
mperr += mp_read_unsigned_octets(&base, input, modulus_len );
if (mperr == MP_OKAY) {
#if OLD_WAY
mperr = s_mp_exptmod(&base, &exponent, &modulus, &result);
#else
mperr = mp_exptmod(&base, &exponent, &modulus, &result);
#endif
if (mperr == MP_OKAY) {
mperr = mp_to_fixlen_octets(&result, output, modulus_len);
}
}
}
mp_clear(&base);
mp_clear(&result);
mp_clear(&modulus);
mp_clear(&exponent);
return (int)mperr;
}
int
main(int argc, char **argv)
{
TimingContext * timeCtx;
char * progName;
long iters = DEFAULT_ITERS;
unsigned int modulus_len;
int i;
int rv;
unsigned char buf [1024];
unsigned char buf2[1024];
progName = strrchr(argv[0], '/');
if (!progName)
progName = strrchr(argv[0], '\\');
progName = progName ? progName+1 : argv[0];
if (argc >= 2) {
iters = atol(argv[1]);
}
if (argc >= 3) {
modulus_len = atol(argv[2]);
} else
modulus_len = sizeof default_n;
/* no library init function !? */
memset(buf, 0x41, sizeof buf);
if (iters < 2) {
testNewFuncs( default_n, modulus_len);
testNewFuncs( default_n+1, modulus_len - 1);
testNewFuncs( default_n+2, modulus_len - 2);
testNewFuncs( default_n+3, modulus_len - 3);
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
rv = testModExp(default_n, 0, buf, buf2, modulus_len);
dumpBytes((unsigned char *)buf2, modulus_len);
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
rv = testModExp(default_n, 1, buf, buf2, modulus_len);
dumpBytes((unsigned char *)buf2, modulus_len);
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
rv = testModExp(default_n, 2, buf, buf2, modulus_len);
dumpBytes((unsigned char *)buf2, modulus_len);
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
rv = testModExp(default_n, 3, buf, buf2, modulus_len);
dumpBytes((unsigned char *)buf2, modulus_len);
}
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
rv = doModExp(default_n, default_d, buf, buf2, modulus_len);
if (rv != 0) {
fprintf(stderr, "Error in modexp operation:\n");
exit(1);
}
dumpBytes((unsigned char *)buf2, modulus_len);
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
timeCtx = CreateTimingContext();
TimingBegin(timeCtx);
i = iters;
while (i--) {
rv = doModExp(default_n, default_d, buf, buf2, modulus_len);
if (rv != 0) {
fprintf(stderr, "Error in modexp operation\n");
exit(1);
}
}
TimingEnd(timeCtx);
printf("%ld iterations in %s\n", iters, TimingGenerateString(timeCtx));
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
return 0;
}

View File

@@ -1,329 +0,0 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is SPARC optimized Montgomery multiply functions.
*
* The Initial Developer of the Original Code is
* Sun Microsystems Inc.
* Portions created by the Initial Developer are Copyright (C) 1999-2000
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Netscape Communications Corporation
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* $Id: montmulf.c,v 1.7 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
#ifdef SOLARIS
#define RF_INLINE_MACROS 1
#endif
static const double TwoTo16=65536.0;
static const double TwoToMinus16=1.0/65536.0;
static const double Zero=0.0;
static const double TwoTo32=65536.0*65536.0;
static const double TwoToMinus32=1.0/(65536.0*65536.0);
#ifdef RF_INLINE_MACROS
double upper32(double);
double lower32(double, double);
double mod(double, double, double);
void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
const double * /* 2^16*/,
const double * /* 0 */,
double * /*result16*/,
double * /* result32 */,
float * /*source - should be unsigned int*
converted to float* */);
#else
#ifdef MP_USE_FLOOR
#include <math.h>
#else
#define floor(d) ((double)((unsigned long long)(d)))
#endif
static double upper32(double x)
{
return floor(x*TwoToMinus32);
}
static double lower32(double x, double y)
{
return x-TwoTo32*floor(x*TwoToMinus32);
}
static double mod(double x, double oneoverm, double m)
{
return x-m*floor(x*oneoverm);
}
#endif
static void cleanup(double *dt, int from, int tlen)
{
int i;
double tmp,tmp1,x,x1;
tmp=tmp1=Zero;
/* original code **
for(i=2*from;i<2*tlen-2;i++)
{
x=dt[i];
dt[i]=lower32(x,Zero)+tmp1;
tmp1=tmp;
tmp=upper32(x);
}
dt[tlen-2]+=tmp1;
dt[tlen-1]+=tmp;
**end original code ***/
/* new code ***/
for(i=2*from;i<2*tlen;i+=2)
{
x=dt[i];
x1=dt[i+1];
dt[i]=lower32(x,Zero)+tmp;
dt[i+1]=lower32(x1,Zero)+tmp1;
tmp=upper32(x);
tmp1=upper32(x1);
}
/** end new code **/
}
void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
{
int i;
long long t, t1, a, b, c, d;
t1=0;
a=(long long)d16[0];
b=(long long)d16[1];
for(i=0; i<ilen-1; i++)
{
c=(long long)d16[2*i+2];
t1+=(unsigned int)a;
t=(a>>32);
d=(long long)d16[2*i+3];
t1+=(b&0xffff)<<16;
t+=(b>>16)+(t1>>32);
i32[i]=(unsigned int)t1;
t1=t;
a=c;
b=d;
}
t1+=(unsigned int)a;
t=(a>>32);
t1+=(b&0xffff)<<16;
i32[i]=(unsigned int)t1;
}
void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
{
int i;
#pragma pipeloop(0)
for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
}
void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
{
int i;
unsigned int a;
#pragma pipeloop(0)
for(i=0;i<len;i++)
{
a=i32[i];
d16[2*i]=(double)(a&0xffff);
d16[2*i+1]=(double)(a>>16);
}
}
void conv_i32_to_d32_and_d16(double *d32, double *d16,
unsigned int *i32, int len)
{
int i = 0;
unsigned int a;
#pragma pipeloop(0)
#ifdef RF_INLINE_MACROS
for(;i<len-3;i+=4)
{
i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
&(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
}
#endif
for(;i<len;i++)
{
a=i32[i];
d32[i]=(double)(i32[i]);
d16[2*i]=(double)(a&0xffff);
d16[2*i+1]=(double)(a>>16);
}
}
void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
{
long long acc;
int i;
if(i32[len]>0) i=-1;
else
{
for(i=len-1; i>=0; i--)
{
if(i32[i]!=nint[i]) break;
}
}
if((i<0)||(i32[i]>nint[i]))
{
acc=0;
for(i=0;i<len;i++)
{
acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
i32[i]=(unsigned int)acc;
acc=acc>>32;
}
}
}
/*
** the lengths of the input arrays should be at least the following:
** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
** all of them should be different from one another
**
*/
void mont_mulf_noconv(unsigned int *result,
double *dm1, double *dm2, double *dt,
double *dn, unsigned int *nint,
int nlen, double dn0)
{
int i, j, jj;
int tmp;
double digit, m2j, nextm2j, a, b;
double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
pdm1=&(dm1[0]);
pdm2=&(dm2[0]);
pdn=&(dn[0]);
pdm2[2*nlen]=Zero;
if (nlen!=16)
{
for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
a=dt[0]=pdm1[0]*pdm2[0];
digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
pdtj=&(dt[0]);
for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
{
m2j=pdm2[j];
a=pdtj[0]+pdn[0]*digit;
b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
pdtj[1]=b;
#pragma pipeloop(0)
for(i=1;i<nlen;i++)
{
pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
}
if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
}
}
else
{
a=dt[0]=pdm1[0]*pdm2[0];
dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
pdn_0=pdn[0];
pdm1_0=pdm1[0];
digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
pdtj=&(dt[0]);
for(j=0;j<32;j++,pdtj++)
{
m2j=pdm2[j];
a=pdtj[0]+pdn_0*digit;
b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
pdtj[1]=b;
/**** this loop will be fully unrolled:
for(i=1;i<16;i++)
{
pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
}
*************************************/
pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
/* no need for cleenup, cannot overflow */
digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
}
}
conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
adjust_montf_result(result,nint,nlen);
}

View File

@@ -1,103 +0,0 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is interface file for SPARC Montgomery multiply functions.
*
* The Initial Developer of the Original Code is
* Sun Microsystems Inc.
* Portions created by the Initial Developer are Copyright (C) 1999-2000
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Netscape Communications Corporation
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* $Id: montmulf.h,v 1.4 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
/* The functions that are to be called from outside of the .s file have the
* following interfaces and array size requirements:
*/
void conv_i32_to_d32(double *d32, unsigned int *i32, int len);
/* Converts an array of int's to an array of doubles, so that each double
* corresponds to an int. len is the number of items converted.
* Does not allocate the output array.
* The pointers d32 and i32 should point to arrays of size at least len
* (doubles and unsigned ints, respectively)
*/
void conv_i32_to_d16(double *d16, unsigned int *i32, int len);
/* Converts an array of int's to an array of doubles so that each element
* of the int array is converted to a pair of doubles, the first one
* corresponding to the lower (least significant) 16 bits of the int and
* the second one corresponding to the upper (most significant) 16 bits of
* the 32-bit int. len is the number of ints converted.
* Does not allocate the output array.
* The pointer d16 should point to an array of doubles of size at least
* 2*len and i32 should point an array of ints of size at least len
*/
void conv_i32_to_d32_and_d16(double *d32, double *d16,
unsigned int *i32, int len);
/* Does the above two conversions together, it is much faster than doing
* both of those in succession
*/
void mont_mulf_noconv(unsigned int *result,
double *dm1, double *dm2, double *dt,
double *dn, unsigned int *nint,
int nlen, double dn0);
/* Does the Montgomery multiplication of the numbers stored in the arrays
* pointed to by dm1 and dm2, writing the result to the array pointed to by
* result. It uses the array pointed to by dt as a temporary work area.
* nint should point to the modulus in the array-of-integers representation,
* dn should point to its array-of-doubles as obtained as a result of the
* function call conv_i32_to_d32(dn, nint, nlen);
* nlen is the length of the array containing the modulus.
* The representation used for dm1 is the one that is a result of the function
* call conv_i32_to_d32(dm1, m1, nlen), the representation for dm2 is the
* result of the function call conv_i32_to_d16(dm2, m2, nlen).
* Note that m1 and m2 should both be of length nlen, so they should be
* padded with 0's if necessary before the conversion. The result comes in
* this form (int representation, padded with 0's).
* dn0 is the value of the 16 least significant bits of n0'.
* The function does not allocate memory for any of the arrays, so the
* pointers should point to arrays with the following minimal sizes:
* result - nlen+1
* dm1 - nlen
* dm2 - 2*nlen+1 ( the +1 is necessary for technical reasons )
* dt - 4*nlen+2
* dn - nlen
* nint - nlen
* No two arrays should point to overlapping areas of memory.
*/

View File

@@ -1,141 +0,0 @@
!
! ***** BEGIN LICENSE BLOCK *****
! Version: MPL 1.1/GPL 2.0/LGPL 2.1
!
! The contents of this file are subject to the Mozilla Public License Version
! 1.1 (the "License"); you may not use this file except in compliance with
! the License. You may obtain a copy of the License at
! http://www.mozilla.org/MPL/
!
! Software distributed under the License is distributed on an "AS IS" basis,
! WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
! for the specific language governing rights and limitations under the
! License.
!
! The Original Code is inline macros for SPARC Montgomery multiply functions.
!
! The Initial Developer of the Original Code is
! Sun Microsystems Inc.
! Portions created by the Initial Developer are Copyright (C) 1999-2000
! the Initial Developer. All Rights Reserved.
!
! Contributor(s):
!
! Alternatively, the contents of this file may be used under the terms of
! either the GNU General Public License Version 2 or later (the "GPL"), or
! the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
! in which case the provisions of the GPL or the LGPL are applicable instead
! of those above. If you wish to allow use of your version of this file only
! under the terms of either the GPL or the LGPL, and not to allow others to
! use your version of this file under the terms of the MPL, indicate your
! decision by deleting the provisions above and replace them with the notice
! and other provisions required by the GPL or the LGPL. If you do not delete
! the provisions above, a recipient may use your version of this file under
! the terms of any one of the MPL, the GPL or the LGPL.
!
! ***** END LICENSE BLOCK *****
! $Id: montmulf.il,v 1.4 2004-04-27 23:04:36 gerv%gerv.net Exp $
!
! double upper32(double /*frs1*/);
!
.inline upper32,8
std %o0,[%sp+0x48]
ldd [%sp+0x48],%f10
fdtox %f10,%f10
fitod %f10,%f0
.end
!
! double lower32(double /*frs1*/, double /* Zero */);
!
.inline lower32,8
std %o0,[%sp+0x48]
ldd [%sp+0x48],%f10
std %o2,[%sp+0x48]
ldd [%sp+0x48],%f12
fdtox %f10,%f10
fmovs %f12,%f10
fxtod %f10,%f0
.end
!
! double mod(double /*x*/, double /*1/m*/, double /*m*/);
!
.inline mod,12
std %o0,[%sp+0x48]
ldd [%sp+0x48],%f2
std %o2,[%sp+0x48]
ldd [%sp+0x48],%f4
std %o4,[%sp+0x48]
ldd [%sp+0x48],%f6
fmuld %f2,%f4,%f4
fdtox %f4,%f4
fxtod %f4,%f4
fmuld %f4,%f6,%f4
fsubd %f2,%f4,%f0
.end
!
! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
! double * /* 0 */,
! double * /*result16*/, double * /* result32 */
! float * /*source - should be unsigned int*
! converted to float* */);
!
.inline i16_to_d16_and_d32x4,24
ldd [%o0],%f2 ! 1/(2^16)
ldd [%o1],%f4 ! 2^16
ldd [%o2],%f22
fmovd %f22,%f6
ld [%o5],%f7
fmovd %f22,%f10
ld [%o5+4],%f11
fmovd %f22,%f14
ld [%o5+8],%f15
fmovd %f22,%f18
ld [%o5+12],%f19
fxtod %f6,%f6
std %f6,[%o4]
fxtod %f10,%f10
std %f10,[%o4+8]
fxtod %f14,%f14
std %f14,[%o4+16]
fxtod %f18,%f18
std %f18,[%o4+24]
fmuld %f2,%f6,%f8
fmuld %f2,%f10,%f12
fmuld %f2,%f14,%f16
fmuld %f2,%f18,%f20
fdtox %f8,%f8
fdtox %f12,%f12
fdtox %f16,%f16
fdtox %f20,%f20
fxtod %f8,%f8
std %f8,[%o3+8]
fxtod %f12,%f12
std %f12,[%o3+24]
fxtod %f16,%f16
std %f16,[%o3+40]
fxtod %f20,%f20
std %f20,[%o3+56]
fmuld %f8,%f4,%f8
fmuld %f12,%f4,%f12
fmuld %f16,%f4,%f16
fmuld %f20,%f4,%f20
fsubd %f6,%f8,%f8
std %f8,[%o3]
fsubd %f10,%f12,%f12
std %f12,[%o3+16]
fsubd %f14,%f16,%f16
std %f16,[%o3+32]
fsubd %f18,%f20,%f20
std %f20,[%o3+48]
.end

File diff suppressed because it is too large Load Diff

View File

@@ -1,141 +0,0 @@
!
! ***** BEGIN LICENSE BLOCK *****
! Version: MPL 1.1/GPL 2.0/LGPL 2.1
!
! The contents of this file are subject to the Mozilla Public License Version
! 1.1 (the "License"); you may not use this file except in compliance with
! the License. You may obtain a copy of the License at
! http://www.mozilla.org/MPL/
!
! Software distributed under the License is distributed on an "AS IS" basis,
! WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
! for the specific language governing rights and limitations under the
! License.
!
! The Original Code is inline macros for SPARC Montgomery multiply functions.
!
! The Initial Developer of the Original Code is
! Sun Microsystems Inc.
! Portions created by the Initial Developer are Copyright (C) 1999-2000
! the Initial Developer. All Rights Reserved.
!
! Contributor(s):
!
! Alternatively, the contents of this file may be used under the terms of
! either the GNU General Public License Version 2 or later (the "GPL"), or
! the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
! in which case the provisions of the GPL or the LGPL are applicable instead
! of those above. If you wish to allow use of your version of this file only
! under the terms of either the GPL or the LGPL, and not to allow others to
! use your version of this file under the terms of the MPL, indicate your
! decision by deleting the provisions above and replace them with the notice
! and other provisions required by the GPL or the LGPL. If you do not delete
! the provisions above, a recipient may use your version of this file under
! the terms of any one of the MPL, the GPL or the LGPL.
!
! ***** END LICENSE BLOCK *****
! $Id: montmulfv8.il,v 1.3 2004-04-27 23:04:36 gerv%gerv.net Exp $
!
! double upper32(double /*frs1*/);
!
.inline upper32,8
std %o0,[%sp+0x48]
ldd [%sp+0x48],%f10
fdtox %f10,%f10
fitod %f10,%f0
.end
!
! double lower32(double /*frs1*/, double /* Zero */);
!
.inline lower32,8
std %o0,[%sp+0x48]
ldd [%sp+0x48],%f10
std %o2,[%sp+0x48]
ldd [%sp+0x48],%f12
fdtox %f10,%f10
fmovs %f12,%f10
fxtod %f10,%f0
.end
!
! double mod(double /*x*/, double /*1/m*/, double /*m*/);
!
.inline mod,12
std %o0,[%sp+0x48]
ldd [%sp+0x48],%f2
std %o2,[%sp+0x48]
ldd [%sp+0x48],%f4
std %o4,[%sp+0x48]
ldd [%sp+0x48],%f6
fmuld %f2,%f4,%f4
fdtox %f4,%f4
fxtod %f4,%f4
fmuld %f4,%f6,%f4
fsubd %f2,%f4,%f0
.end
!
! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
! double * /* 0 */,
! double * /*result16*/, double * /* result32 */
! float * /*source - should be unsigned int*
! converted to float* */);
!
.inline i16_to_d16_and_d32x4,24
ldd [%o0],%f2 ! 1/(2^16)
ldd [%o1],%f4 ! 2^16
ldd [%o2],%f22
fmovd %f22,%f6
ld [%o5],%f7
fmovd %f22,%f10
ld [%o5+4],%f11
fmovd %f22,%f14
ld [%o5+8],%f15
fmovd %f22,%f18
ld [%o5+12],%f19
fxtod %f6,%f6
std %f6,[%o4]
fxtod %f10,%f10
std %f10,[%o4+8]
fxtod %f14,%f14
std %f14,[%o4+16]
fxtod %f18,%f18
std %f18,[%o4+24]
fmuld %f2,%f6,%f8
fmuld %f2,%f10,%f12
fmuld %f2,%f14,%f16
fmuld %f2,%f18,%f20
fdtox %f8,%f8
fdtox %f12,%f12
fdtox %f16,%f16
fdtox %f20,%f20
fxtod %f8,%f8
std %f8,[%o3+8]
fxtod %f12,%f12
std %f12,[%o3+24]
fxtod %f16,%f16
std %f16,[%o3+40]
fxtod %f20,%f20
std %f20,[%o3+56]
fmuld %f8,%f4,%f8
fmuld %f12,%f4,%f12
fmuld %f16,%f4,%f16
fmuld %f20,%f4,%f20
fsubd %f6,%f8,%f8
std %f8,[%o3]
fsubd %f10,%f12,%f12
std %f12,[%o3+16]
fsubd %f14,%f16,%f16
std %f16,[%o3+32]
fsubd %f18,%f20,%f20
std %f20,[%o3+48]
.end

File diff suppressed because it is too large Load Diff

View File

@@ -1,126 +0,0 @@
!
! ***** BEGIN LICENSE BLOCK *****
! Version: MPL 1.1/GPL 2.0/LGPL 2.1
!
! The contents of this file are subject to the Mozilla Public License Version
! 1.1 (the "License"); you may not use this file except in compliance with
! the License. You may obtain a copy of the License at
! http://www.mozilla.org/MPL/
!
! Software distributed under the License is distributed on an "AS IS" basis,
! WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
! for the specific language governing rights and limitations under the
! License.
!
! The Original Code is inline macros for SPARC Montgomery multiply functions.
!
! The Initial Developer of the Original Code is
! Sun Microsystems Inc.
! Portions created by the Initial Developer are Copyright (C) 1999-2000
! the Initial Developer. All Rights Reserved.
!
! Contributor(s):
!
! Alternatively, the contents of this file may be used under the terms of
! either the GNU General Public License Version 2 or later (the "GPL"), or
! the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
! in which case the provisions of the GPL or the LGPL are applicable instead
! of those above. If you wish to allow use of your version of this file only
! under the terms of either the GPL or the LGPL, and not to allow others to
! use your version of this file under the terms of the MPL, indicate your
! decision by deleting the provisions above and replace them with the notice
! and other provisions required by the GPL or the LGPL. If you do not delete
! the provisions above, a recipient may use your version of this file under
! the terms of any one of the MPL, the GPL or the LGPL.
!
! ***** END LICENSE BLOCK *****
! $Id: montmulfv9.il,v 1.3 2004-04-27 23:04:36 gerv%gerv.net Exp $
!
! double upper32(double /*frs1*/);
!
.inline upper32,8
fdtox %f0,%f10
fitod %f10,%f0
.end
!
! double lower32(double /*frs1*/, double /* Zero */);
!
.inline lower32,8
fdtox %f0,%f10
fmovs %f2,%f10
fxtod %f10,%f0
.end
!
! double mod(double /*x*/, double /*1/m*/, double /*m*/);
!
.inline mod,12
fmuld %f0,%f2,%f2
fdtox %f2,%f2
fxtod %f2,%f2
fmuld %f2,%f4,%f2
fsubd %f0,%f2,%f0
.end
!
! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
! double * /* 0 */,
! double * /*result16*/, double * /* result32 */
! float * /*source - should be unsigned int*
! converted to float* */);
!
.inline i16_to_d16_and_d32x4,24
ldd [%o0],%f2 ! 1/(2^16)
ldd [%o1],%f4 ! 2^16
ldd [%o2],%f22
fmovd %f22,%f6
ld [%o5],%f7
fmovd %f22,%f10
ld [%o5+4],%f11
fmovd %f22,%f14
ld [%o5+8],%f15
fmovd %f22,%f18
ld [%o5+12],%f19
fxtod %f6,%f6
std %f6,[%o4]
fxtod %f10,%f10
std %f10,[%o4+8]
fxtod %f14,%f14
std %f14,[%o4+16]
fxtod %f18,%f18
std %f18,[%o4+24]
fmuld %f2,%f6,%f8
fmuld %f2,%f10,%f12
fmuld %f2,%f14,%f16
fmuld %f2,%f18,%f20
fdtox %f8,%f8
fdtox %f12,%f12
fdtox %f16,%f16
fdtox %f20,%f20
fxtod %f8,%f8
std %f8,[%o3+8]
fxtod %f12,%f12
std %f12,[%o3+24]
fxtod %f16,%f16
std %f16,[%o3+40]
fxtod %f20,%f20
std %f20,[%o3+56]
fmuld %f8,%f4,%f8
fmuld %f12,%f4,%f12
fmuld %f16,%f4,%f16
fmuld %f20,%f4,%f20
fsubd %f6,%f8,%f8
std %f8,[%o3]
fsubd %f10,%f12,%f12
std %f12,[%o3+16]
fsubd %f14,%f16,%f16
std %f16,[%o3+32]
fsubd %f18,%f20,%f20
std %f20,[%o3+48]
.end

File diff suppressed because it is too large Load Diff

View File

@@ -1,102 +0,0 @@
/*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the Multi-precision Binary Polynomial Arithmetic Library.
*
* The Initial Developer of the Original Code is
* Sun Microsystems, Inc.
* Portions created by the Initial Developer are Copyright (C) 2003
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Sheueling Chang Shantz <sheueling.chang@sun.com> and
* Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef _MP_GF2M_PRIV_H_
#define _MP_GF2M_PRIV_H_
#include "mpi-priv.h"
extern const mp_digit mp_gf2m_sqr_tb[16];
#if defined(MP_USE_UINT_DIGIT)
#define MP_DIGIT_BITS 32
#else
#define MP_DIGIT_BITS 64
#endif
/* Platform-specific macros for fast binary polynomial squaring. */
#if MP_DIGIT_BITS == 32
#define gf2m_SQR1(w) \
mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 16 | \
mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF]
#define gf2m_SQR0(w) \
mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \
mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) & 0xF]
#else
#define gf2m_SQR1(w) \
mp_gf2m_sqr_tb[(w) >> 60 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 56 & 0xF] << 48 | \
mp_gf2m_sqr_tb[(w) >> 52 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 48 & 0xF] << 32 | \
mp_gf2m_sqr_tb[(w) >> 44 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 40 & 0xF] << 16 | \
mp_gf2m_sqr_tb[(w) >> 36 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 32 & 0xF]
#define gf2m_SQR0(w) \
mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 48 | \
mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] << 32 | \
mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \
mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) & 0xF]
#endif
/* Multiply two binary polynomials mp_digits a, b.
* Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
* Output in two mp_digits rh, rl.
*/
void s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b);
/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0)
* result is a binary polynomial in 4 mp_digits r[4].
* The caller MUST ensure that r has the right amount of space allocated.
*/
void s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
const mp_digit b0);
/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0)
* result is a binary polynomial in 6 mp_digits r[6].
* The caller MUST ensure that r has the right amount of space allocated.
*/
void s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
const mp_digit b2, const mp_digit b1, const mp_digit b0);
/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0)
* result is a binary polynomial in 8 mp_digits r[8].
* The caller MUST ensure that r has the right amount of space allocated.
*/
void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
const mp_digit b0);
#endif /* _MP_GF2M_PRIV_H_ */

View File

@@ -1,600 +0,0 @@
/*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the Multi-precision Binary Polynomial Arithmetic Library.
*
* The Initial Developer of the Original Code is
* Sun Microsystems, Inc.
* Portions created by the Initial Developer are Copyright (C) 2003
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Sheueling Chang Shantz <sheueling.chang@sun.com> and
* Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "mp_gf2m.h"
#include "mp_gf2m-priv.h"
#include "mplogic.h"
#include "mpi-priv.h"
const mp_digit mp_gf2m_sqr_tb[16] =
{
0, 1, 4, 5, 16, 17, 20, 21,
64, 65, 68, 69, 80, 81, 84, 85
};
/* Multiply two binary polynomials mp_digits a, b.
* Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
* Output in two mp_digits rh, rl.
*/
#if MP_DIGIT_BITS == 32
void
s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
{
register mp_digit h, l, s;
mp_digit tab[8], top2b = a >> 30;
register mp_digit a1, a2, a4;
a1 = a & (0x3FFFFFFF); a2 = a1 << 1; a4 = a2 << 1;
tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
tab[4] = a4; tab[5] = a1^a4; tab[6] = a2^a4; tab[7] = a1^a2^a4;
s = tab[b & 0x7]; l = s;
s = tab[b >> 3 & 0x7]; l ^= s << 3; h = s >> 29;
s = tab[b >> 6 & 0x7]; l ^= s << 6; h ^= s >> 26;
s = tab[b >> 9 & 0x7]; l ^= s << 9; h ^= s >> 23;
s = tab[b >> 12 & 0x7]; l ^= s << 12; h ^= s >> 20;
s = tab[b >> 15 & 0x7]; l ^= s << 15; h ^= s >> 17;
s = tab[b >> 18 & 0x7]; l ^= s << 18; h ^= s >> 14;
s = tab[b >> 21 & 0x7]; l ^= s << 21; h ^= s >> 11;
s = tab[b >> 24 & 0x7]; l ^= s << 24; h ^= s >> 8;
s = tab[b >> 27 & 0x7]; l ^= s << 27; h ^= s >> 5;
s = tab[b >> 30 ]; l ^= s << 30; h ^= s >> 2;
/* compensate for the top two bits of a */
if (top2b & 01) { l ^= b << 30; h ^= b >> 2; }
if (top2b & 02) { l ^= b << 31; h ^= b >> 1; }
*rh = h; *rl = l;
}
#else
void
s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
{
register mp_digit h, l, s;
mp_digit tab[16], top3b = a >> 61;
register mp_digit a1, a2, a4, a8;
a1 = a & (0x1FFFFFFFFFFFFFFF); a2 = a1 << 1;
a4 = a2 << 1; a8 = a4 << 1;
tab[ 0] = 0; tab[ 1] = a1; tab[ 2] = a2; tab[ 3] = a1^a2;
tab[ 4] = a4; tab[ 5] = a1^a4; tab[ 6] = a2^a4; tab[ 7] = a1^a2^a4;
tab[ 8] = a8; tab[ 9] = a1^a8; tab[10] = a2^a8; tab[11] = a1^a2^a8;
tab[12] = a4^a8; tab[13] = a1^a4^a8; tab[14] = a2^a4^a8; tab[15] = a1^a2^a4^a8;
s = tab[b & 0xF]; l = s;
s = tab[b >> 4 & 0xF]; l ^= s << 4; h = s >> 60;
s = tab[b >> 8 & 0xF]; l ^= s << 8; h ^= s >> 56;
s = tab[b >> 12 & 0xF]; l ^= s << 12; h ^= s >> 52;
s = tab[b >> 16 & 0xF]; l ^= s << 16; h ^= s >> 48;
s = tab[b >> 20 & 0xF]; l ^= s << 20; h ^= s >> 44;
s = tab[b >> 24 & 0xF]; l ^= s << 24; h ^= s >> 40;
s = tab[b >> 28 & 0xF]; l ^= s << 28; h ^= s >> 36;
s = tab[b >> 32 & 0xF]; l ^= s << 32; h ^= s >> 32;
s = tab[b >> 36 & 0xF]; l ^= s << 36; h ^= s >> 28;
s = tab[b >> 40 & 0xF]; l ^= s << 40; h ^= s >> 24;
s = tab[b >> 44 & 0xF]; l ^= s << 44; h ^= s >> 20;
s = tab[b >> 48 & 0xF]; l ^= s << 48; h ^= s >> 16;
s = tab[b >> 52 & 0xF]; l ^= s << 52; h ^= s >> 12;
s = tab[b >> 56 & 0xF]; l ^= s << 56; h ^= s >> 8;
s = tab[b >> 60 ]; l ^= s << 60; h ^= s >> 4;
/* compensate for the top three bits of a */
if (top3b & 01) { l ^= b << 61; h ^= b >> 3; }
if (top3b & 02) { l ^= b << 62; h ^= b >> 2; }
if (top3b & 04) { l ^= b << 63; h ^= b >> 1; }
*rh = h; *rl = l;
}
#endif
/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0)
* result is a binary polynomial in 4 mp_digits r[4].
* The caller MUST ensure that r has the right amount of space allocated.
*/
void
s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
const mp_digit b0)
{
mp_digit m1, m0;
/* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */
s_bmul_1x1(r+3, r+2, a1, b1);
s_bmul_1x1(r+1, r, a0, b0);
s_bmul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1);
/* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */
r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */
r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
}
/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0)
* result is a binary polynomial in 6 mp_digits r[6].
* The caller MUST ensure that r has the right amount of space allocated.
*/
void
s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
const mp_digit b2, const mp_digit b1, const mp_digit b0)
{
mp_digit zm[4];
s_bmul_1x1(r+5, r+4, a2, b2); /* fill top 2 words */
s_bmul_2x2(zm, a1, a2^a0, b1, b2^b0); /* fill middle 4 words */
s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */
zm[3] ^= r[3];
zm[2] ^= r[2];
zm[1] ^= r[1] ^ r[5];
zm[0] ^= r[0] ^ r[4];
r[5] ^= zm[3];
r[4] ^= zm[2];
r[3] ^= zm[1];
r[2] ^= zm[0];
}
/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0)
* result is a binary polynomial in 8 mp_digits r[8].
* The caller MUST ensure that r has the right amount of space allocated.
*/
void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
const mp_digit b0)
{
mp_digit zm[4];
s_bmul_2x2(r+4, a3, a2, b3, b2); /* fill top 4 words */
s_bmul_2x2(zm, a3^a1, a2^a0, b3^b1, b2^b0); /* fill middle 4 words */
s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */
zm[3] ^= r[3] ^ r[7];
zm[2] ^= r[2] ^ r[6];
zm[1] ^= r[1] ^ r[5];
zm[0] ^= r[0] ^ r[4];
r[5] ^= zm[3];
r[4] ^= zm[2];
r[3] ^= zm[1];
r[2] ^= zm[0];
}
/* Compute addition of two binary polynomials a and b,
* store result in c; c could be a or b, a and b could be equal;
* c is the bitwise XOR of a and b.
*/
mp_err
mp_badd(const mp_int *a, const mp_int *b, mp_int *c)
{
mp_digit *pa, *pb, *pc;
mp_size ix;
mp_size used_pa, used_pb;
mp_err res = MP_OKAY;
/* Add all digits up to the precision of b. If b had more
* precision than a initially, swap a, b first
*/
if (MP_USED(a) >= MP_USED(b)) {
pa = MP_DIGITS(a);
pb = MP_DIGITS(b);
used_pa = MP_USED(a);
used_pb = MP_USED(b);
} else {
pa = MP_DIGITS(b);
pb = MP_DIGITS(a);
used_pa = MP_USED(b);
used_pb = MP_USED(a);
}
/* Make sure c has enough precision for the output value */
MP_CHECKOK( s_mp_pad(c, used_pa) );
/* Do word-by-word xor */
pc = MP_DIGITS(c);
for (ix = 0; ix < used_pb; ix++) {
(*pc++) = (*pa++) ^ (*pb++);
}
/* Finish the rest of digits until we're actually done */
for (; ix < used_pa; ++ix) {
*pc++ = *pa++;
}
MP_USED(c) = used_pa;
MP_SIGN(c) = ZPOS;
s_mp_clamp(c);
CLEANUP:
return res;
}
#define s_mp_div2(a) MP_CHECKOK( mpl_rsh((a), (a), 1) );
/* Compute binary polynomial multiply d = a * b */
static void
s_bmul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
{
mp_digit a_i, a0b0, a1b1, carry = 0;
while (a_len--) {
a_i = *a++;
s_bmul_1x1(&a1b1, &a0b0, a_i, b);
*d++ = a0b0 ^ carry;
carry = a1b1;
}
*d = carry;
}
/* Compute binary polynomial xor multiply accumulate d ^= a * b */
static void
s_bmul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
{
mp_digit a_i, a0b0, a1b1, carry = 0;
while (a_len--) {
a_i = *a++;
s_bmul_1x1(&a1b1, &a0b0, a_i, b);
*d++ ^= a0b0 ^ carry;
carry = a1b1;
}
*d ^= carry;
}
/* Compute binary polynomial xor multiply c = a * b.
* All parameters may be identical.
*/
mp_err
mp_bmul(const mp_int *a, const mp_int *b, mp_int *c)
{
mp_digit *pb, b_i;
mp_int tmp;
mp_size ib, a_used, b_used;
mp_err res = MP_OKAY;
MP_DIGITS(&tmp) = 0;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
if (a == c) {
MP_CHECKOK( mp_init_copy(&tmp, a) );
if (a == b)
b = &tmp;
a = &tmp;
} else if (b == c) {
MP_CHECKOK( mp_init_copy(&tmp, b) );
b = &tmp;
}
if (MP_USED(a) < MP_USED(b)) {
const mp_int *xch = b; /* switch a and b if b longer */
b = a;
a = xch;
}
MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
MP_CHECKOK( s_mp_pad(c, USED(a) + USED(b)) );
pb = MP_DIGITS(b);
s_bmul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
/* Outer loop: Digits of b */
a_used = MP_USED(a);
b_used = MP_USED(b);
MP_USED(c) = a_used + b_used;
for (ib = 1; ib < b_used; ib++) {
b_i = *pb++;
/* Inner product: Digits of a */
if (b_i)
s_bmul_d_add(MP_DIGITS(a), a_used, b_i, MP_DIGITS(c) + ib);
else
MP_DIGIT(c, ib + a_used) = b_i;
}
s_mp_clamp(c);
SIGN(c) = ZPOS;
CLEANUP:
mp_clear(&tmp);
return res;
}
/* Compute modular reduction of a and store result in r.
* r could be a.
* For modular arithmetic, the irreducible polynomial f(t) is represented
* as an array of int[], where f(t) is of the form:
* f(t) = t^p[0] + t^p[1] + ... + t^p[k]
* where m = p[0] > p[1] > ... > p[k] = 0.
*/
mp_err
mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r)
{
int j, k;
int n, dN, d0, d1;
mp_digit zz, *z, tmp;
mp_size used;
mp_err res = MP_OKAY;
/* The algorithm does the reduction in place in r,
* if a != r, copy a into r first so reduction can be done in r
*/
if (a != r) {
MP_CHECKOK( mp_copy(a, r) );
}
z = MP_DIGITS(r);
/* start reduction */
dN = p[0] / MP_DIGIT_BITS;
used = MP_USED(r);
for (j = used - 1; j > dN;) {
zz = z[j];
if (zz == 0) {
j--; continue;
}
z[j] = 0;
for (k = 1; p[k] > 0; k++) {
/* reducing component t^p[k] */
n = p[0] - p[k];
d0 = n % MP_DIGIT_BITS;
d1 = MP_DIGIT_BITS - d0;
n /= MP_DIGIT_BITS;
z[j-n] ^= (zz>>d0);
if (d0)
z[j-n-1] ^= (zz<<d1);
}
/* reducing component t^0 */
n = dN;
d0 = p[0] % MP_DIGIT_BITS;
d1 = MP_DIGIT_BITS - d0;
z[j-n] ^= (zz >> d0);
if (d0)
z[j-n-1] ^= (zz << d1);
}
/* final round of reduction */
while (j == dN) {
d0 = p[0] % MP_DIGIT_BITS;
zz = z[dN] >> d0;
if (zz == 0) break;
d1 = MP_DIGIT_BITS - d0;
/* clear up the top d1 bits */
if (d0) z[dN] = (z[dN] << d1) >> d1;
*z ^= zz; /* reduction t^0 component */
for (k = 1; p[k] > 0; k++) {
/* reducing component t^p[k]*/
n = p[k] / MP_DIGIT_BITS;
d0 = p[k] % MP_DIGIT_BITS;
d1 = MP_DIGIT_BITS - d0;
z[n] ^= (zz << d0);
tmp = zz >> d1;
if (d0 && tmp)
z[n+1] ^= tmp;
}
}
s_mp_clamp(r);
CLEANUP:
return res;
}
/* Compute the product of two polynomials a and b, reduce modulo p,
* Store the result in r. r could be a or b; a could be b.
*/
mp_err
mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], mp_int *r)
{
mp_err res;
if (a == b) return mp_bsqrmod(a, p, r);
if ((res = mp_bmul(a, b, r) ) != MP_OKAY)
return res;
return mp_bmod(r, p, r);
}
/* Compute binary polynomial squaring c = a*a mod p .
* Parameter r and a can be identical.
*/
mp_err
mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r)
{
mp_digit *pa, *pr, a_i;
mp_int tmp;
mp_size ia, a_used;
mp_err res;
ARGCHK(a != NULL && r != NULL, MP_BADARG);
MP_DIGITS(&tmp) = 0;
if (a == r) {
MP_CHECKOK( mp_init_copy(&tmp, a) );
a = &tmp;
}
MP_USED(r) = 1; MP_DIGIT(r, 0) = 0;
MP_CHECKOK( s_mp_pad(r, 2*USED(a)) );
pa = MP_DIGITS(a);
pr = MP_DIGITS(r);
a_used = MP_USED(a);
MP_USED(r) = 2 * a_used;
for (ia = 0; ia < a_used; ia++) {
a_i = *pa++;
*pr++ = gf2m_SQR0(a_i);
*pr++ = gf2m_SQR1(a_i);
}
MP_CHECKOK( mp_bmod(r, p, r) );
s_mp_clamp(r);
SIGN(r) = ZPOS;
CLEANUP:
mp_clear(&tmp);
return res;
}
/* Compute binary polynomial y/x mod p, y divided by x, reduce modulo p.
* Store the result in r. r could be x or y, and x could equal y.
* Uses algorithm Modular_Division_GF(2^m) from
* Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to
* the Great Divide".
*/
int
mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
const unsigned int p[], mp_int *r)
{
mp_int aa, bb, uu;
mp_int *a, *b, *u, *v;
mp_err res = MP_OKAY;
MP_DIGITS(&aa) = 0;
MP_DIGITS(&bb) = 0;
MP_DIGITS(&uu) = 0;
MP_CHECKOK( mp_init_copy(&aa, x) );
MP_CHECKOK( mp_init_copy(&uu, y) );
MP_CHECKOK( mp_init_copy(&bb, pp) );
MP_CHECKOK( s_mp_pad(r, USED(pp)) );
MP_USED(r) = 1; MP_DIGIT(r, 0) = 0;
a = &aa; b= &bb; u=&uu; v=r;
/* reduce x and y mod p */
MP_CHECKOK( mp_bmod(a, p, a) );
MP_CHECKOK( mp_bmod(u, p, u) );
while (!mp_isodd(a)) {
s_mp_div2(a);
if (mp_isodd(u)) {
MP_CHECKOK( mp_badd(u, pp, u) );
}
s_mp_div2(u);
}
do {
if (mp_cmp_mag(b, a) > 0) {
MP_CHECKOK( mp_badd(b, a, b) );
MP_CHECKOK( mp_badd(v, u, v) );
do {
s_mp_div2(b);
if (mp_isodd(v)) {
MP_CHECKOK( mp_badd(v, pp, v) );
}
s_mp_div2(v);
} while (!mp_isodd(b));
}
else if ((MP_DIGIT(a,0) == 1) && (MP_USED(a) == 1))
break;
else {
MP_CHECKOK( mp_badd(a, b, a) );
MP_CHECKOK( mp_badd(u, v, u) );
do {
s_mp_div2(a);
if (mp_isodd(u)) {
MP_CHECKOK( mp_badd(u, pp, u) );
}
s_mp_div2(u);
} while (!mp_isodd(a));
}
} while (1);
MP_CHECKOK( mp_copy(u, r) );
CLEANUP:
return res;
}
/* Convert the bit-string representation of a polynomial a into an array
* of integers corresponding to the bits with non-zero coefficient.
* Up to max elements of the array will be filled. Return value is total
* number of coefficients that would be extracted if array was large enough.
*/
int
mp_bpoly2arr(const mp_int *a, unsigned int p[], int max)
{
int i, j, k;
mp_digit top_bit, mask;
top_bit = 1;
top_bit <<= MP_DIGIT_BIT - 1;
for (k = 0; k < max; k++) p[k] = 0;
k = 0;
for (i = MP_USED(a) - 1; i >= 0; i--) {
mask = top_bit;
for (j = MP_DIGIT_BIT - 1; j >= 0; j--) {
if (MP_DIGITS(a)[i] & mask) {
if (k < max) p[k] = MP_DIGIT_BIT * i + j;
k++;
}
mask >>= 1;
}
}
return k;
}
/* Convert the coefficient array representation of a polynomial to a
* bit-string. The array must be terminated by 0.
*/
mp_err
mp_barr2poly(const unsigned int p[], mp_int *a)
{
mp_err res = MP_OKAY;
int i;
mp_zero(a);
for (i = 0; p[i] > 0; i++) {
MP_CHECKOK( mpl_set_bit(a, p[i], 1) );
}
MP_CHECKOK( mpl_set_bit(a, 0, 1) );
CLEANUP:
return res;
}

View File

@@ -1,63 +0,0 @@
/*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the Multi-precision Binary Polynomial Arithmetic Library.
*
* The Initial Developer of the Original Code is
* Sun Microsystems, Inc.
* Portions created by the Initial Developer are Copyright (C) 2003
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Sheueling Chang Shantz <sheueling.chang@sun.com> and
* Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef _MP_GF2M_H_
#define _MP_GF2M_H_
#include "mpi.h"
mp_err mp_badd(const mp_int *a, const mp_int *b, mp_int *c);
mp_err mp_bmul(const mp_int *a, const mp_int *b, mp_int *c);
/* For modular arithmetic, the irreducible polynomial f(t) is represented
* as an array of int[], where f(t) is of the form:
* f(t) = t^p[0] + t^p[1] + ... + t^p[k]
* where m = p[0] > p[1] > ... > p[k] = 0.
*/
mp_err mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r);
mp_err mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[],
mp_int *r);
mp_err mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r);
mp_err mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
const unsigned int p[], mp_int *r);
int mp_bpoly2arr(const mp_int *a, unsigned int p[], int max);
mp_err mp_barr2poly(const unsigned int p[], mp_int *a);
#endif /* _MP_GF2M_H_ */

View File

@@ -1,760 +0,0 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the Netscape security libraries.
*
* The Initial Developer of the Original Code is
* Red Hat, Inc
* Portions created by the Initial Developer are Copyright (C) 2005
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Robert Relyea <rrelyea@redhat.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "mpi.h"
/*
* This file implements a single function: mpi_getProcessorLineSize();
* mpi_getProcessorLineSize() returns the size in bytes of the cache line
* if a cache exists, or zero if there is no cache. If more than one
* cache line exists, it should return the smallest line size (which is
* usually the L1 cache).
*
* mp_modexp uses this information to make sure that private key information
* isn't being leaked through the cache.
*
* Currently the file returns good data for most modern x86 processors, and
* reasonable data on 64-bit ppc processors. All other processors are assumed
* to have a cache line size of 32 bytes unless modified by target.mk.
*
*/
#if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86)
/* X86 processors have special instructions that tell us about the cache */
#include "string.h"
/* Generic CPUID function */
#ifndef _WIN32
static void cpuid(unsigned long op, unsigned long *eax,
unsigned long *ebx, unsigned long *ecx,
unsigned long *edx)
{
/* sigh GCC isn't smart enough to save the ebx PIC register on it's own
* in this case, so do it by hand. */
__asm__("pushl %%ebx\n\t"
"cpuid\n\t"
"mov %%ebx,%1\n\t"
"popl %%ebx\n\t"
: "=a" (*eax),
"=r" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (op));
}
/*
* try flipping a processor flag to determine CPU type
*/
static unsigned long changeFlag(unsigned long flag)
{
unsigned long changedFlags, originalFlags;
__asm__("pushfl\n\t" /* get the flags */
"popl %0\n\t"
"movl %0,%1\n\t" /* save the original flags */
"xor %0,%2\n\t" /* flip the but */
"pushl %0\n\t" /* set the flags */
"popfl\n\t"
"pushfl\n\t" /* get the flags again (for return) */
"popl %0\n\t"
"pushl %1\n\t" /* restore the original flags */
"popfl\n\t"
: "=r" (changedFlags),
"=r" (originalFlags)
: "r" (flag));
return changedFlags ^ originalFlags;
}
#else
/*
* windows versions of the above assembler
*/
#define wcpuid __asm __emit 0fh __asm __emit 0a2h
static void cpuid(unsigned long op, unsigned long *Reax, unsigned long *Rebx,
unsigned long *Recx, unsigned long *Redx)
{
unsigned long Leax, Lebx, Lecx, Ledx;
__asm {
pushad
mov eax,op
wcpuid
mov Leax,eax
mov Lebx,ebx
mov Lecx,ecx
mov Ledx,edx
popad
}
*Reax = Leax;
*Rebx = Lebx;
*Recx = Lecx;
*Redx = Ledx;
}
static unsigned long changeFlag(unsigned long flag)
{
unsigned long changedFlags, originalFlags;
__asm {
pushad
pushfd /* get the flags */
pop eax
mov ecx,eax /* save the original flags */
mov originalFlags,ecx /* save the original flags */
mov ebx,flag
xor eax,ebx /* flip the but */
push eax /* set the flags */
popfd
pushfd /* get the flags again (for return) */
pop eax
push ecx /* restore the original flags */
popfd
mov changedFlags,eax
popad
}
return changedFlags ^ originalFlags;
}
#endif
#define AC_FLAG 0x40000
#define ID_FLAG 0x200000
/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
static int is386()
{
return changeFlag(AC_FLAG) == 0;
}
/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
static int is486()
{
return changeFlag(ID_FLAG) == 0;
}
/*
* table for Intel Cache.
* See Intel Application Note AP-485 for more information
*/
typedef enum {
Cache_NONE = 0,
Cache_UNKNOWN = 1,
Cache_TLB = 2,
Cache_Trace = 3,
Cache_L1 = 4,
Cache_L2 = 5 ,
Cache_L3 = 6
} CacheType;
#define DATA_INSTR 1
#define DATA_DATA 2
#define DATA_BOTH 3
#define DATA_TRACE 4
#define DATA_NONE 0
#define TLB_4k 0x01
#define TLB_2M 0x08
#define TLB_4M 0x10
#define TLB_4Mk 0x11
#define TLB_ALL 0x19
#define k * 1024
#define M * (1024*1024)
#define G * (1024*1024*1024)
struct _cache {
CacheType type;
unsigned long data;
#define pageSize size
#define trcuops size
unsigned long size;
unsigned long association;
#define tlbEntries lineSize
unsigned long lineSize;
} CacheMap[] = {
/* 00 */ {Cache_NONE, DATA_NONE, 0, 0, 0 },
/* 01 */ {Cache_TLB, DATA_INSTR, TLB_4k, 4, 32 },
/* 02 */ {Cache_TLB, DATA_INSTR, TLB_4M, 0, 2 },
/* 03 */ {Cache_TLB, DATA_DATA, TLB_4k, 4, 64 },
/* 04 */ {Cache_TLB, DATA_DATA, TLB_4M, 4, 8 },
/* 05 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 06 */ {Cache_L1, DATA_INSTR, 8 k, 4, 32 },
/* 07 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 08 */ {Cache_L1, DATA_INSTR, 16 k, 4, 32 },
/* 09 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 0a */ {Cache_L1, DATA_DATA, 8 k, 4, 32 },
/* 0b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 0c */ {Cache_L1, DATA_DATA, 16 k, 4, 32 },
/* 0d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 0e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 0f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 10 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 11 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 12 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 13 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 14 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 15 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 16 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 17 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 18 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 19 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 1a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 1b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 1c */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 1d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 1e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 1f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 20 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 21 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 22 */ {Cache_L3, DATA_BOTH, 512 k, 8, 64 },
/* 23 */ {Cache_L3, DATA_BOTH, 1 M, 8, 64 },
/* 24 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 25 */ {Cache_L3, DATA_BOTH, 2 M, 8, 64 },
/* 26 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 27 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 28 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 29 */ {Cache_L3, DATA_BOTH, 4 M, 8, 64 },
/* 2a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 2b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 2c */ {Cache_L1, DATA_DATA, 32 k, 8, 64 },
/* 2d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 2e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 2f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 30 */ {Cache_L1, DATA_INSTR, 32 k, 8, 64 },
/* 31 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 32 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 33 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 34 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 35 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 36 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 37 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 38 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 39 */ {Cache_L2, DATA_BOTH, 128 k, 4, 64 },
/* 3a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 3b */ {Cache_L2, DATA_BOTH, 128 k, 2, 64 },
/* 3c */ {Cache_L2, DATA_BOTH, 256 k, 4, 64 },
/* 3d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 3e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 3f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 40 */ {Cache_L2, DATA_NONE, 0, 0, 0 },
/* 41 */ {Cache_L2, DATA_BOTH, 128 k, 4, 32 },
/* 42 */ {Cache_L2, DATA_BOTH, 256 k, 4, 32 },
/* 43 */ {Cache_L2, DATA_BOTH, 512 k, 4, 32 },
/* 44 */ {Cache_L2, DATA_BOTH, 1 M, 4, 32 },
/* 45 */ {Cache_L2, DATA_BOTH, 2 M, 4, 32 },
/* 46 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 47 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 48 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 49 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 4a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 4b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 4c */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 4d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 4e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 4f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 50 */ {Cache_TLB, DATA_INSTR, TLB_ALL, 0, 64 },
/* 51 */ {Cache_TLB, DATA_INSTR, TLB_ALL, 0, 128 },
/* 52 */ {Cache_TLB, DATA_INSTR, TLB_ALL, 0, 256 },
/* 53 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 54 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 55 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 56 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 57 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 58 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 59 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 5a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 5b */ {Cache_TLB, DATA_DATA, TLB_4Mk, 0, 64 },
/* 5c */ {Cache_TLB, DATA_DATA, TLB_4Mk, 0, 128 },
/* 5d */ {Cache_TLB, DATA_DATA, TLB_4Mk, 0, 256 },
/* 5e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 5f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 60 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 61 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 62 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 63 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 64 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 65 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 66 */ {Cache_L1, DATA_DATA, 8 k, 4, 64 },
/* 67 */ {Cache_L1, DATA_DATA, 16 k, 4, 64 },
/* 68 */ {Cache_L1, DATA_DATA, 32 k, 4, 64 },
/* 69 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 6a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 6b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 6c */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 6d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 6e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 6f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 70 */ {Cache_Trace, DATA_TRACE, 12 k, 8, 1 },
/* 71 */ {Cache_Trace, DATA_TRACE, 16 k, 8, 1 },
/* 72 */ {Cache_Trace, DATA_TRACE, 32 k, 8, 1 },
/* 73 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 74 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 75 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 76 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 77 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 78 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 79 */ {Cache_L2, DATA_BOTH, 128 k, 8, 64 },
/* 7a */ {Cache_L2, DATA_BOTH, 256 k, 8, 64 },
/* 7b */ {Cache_L2, DATA_BOTH, 512 k, 8, 64 },
/* 7c */ {Cache_L2, DATA_BOTH, 1 M, 8, 64 },
/* 7d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 7e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 7f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 80 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 81 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 82 */ {Cache_L2, DATA_BOTH, 256 k, 8, 32 },
/* 83 */ {Cache_L2, DATA_BOTH, 512 k, 8, 32 },
/* 84 */ {Cache_L2, DATA_BOTH, 1 M, 8, 32 },
/* 85 */ {Cache_L2, DATA_BOTH, 2 M, 8, 32 },
/* 86 */ {Cache_L2, DATA_BOTH, 512 k, 4, 64 },
/* 87 */ {Cache_L2, DATA_BOTH, 1 M, 8, 64 },
/* 88 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 89 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 8a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 8b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 8c */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 8d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 8e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 8f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 90 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 91 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 92 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 93 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 94 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 95 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 96 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 97 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 98 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 99 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 9a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 9b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 9c */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 9d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 9e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* 9f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* a0 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* a1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* a2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* a3 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* a4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* a5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* a6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* a7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* a8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* a9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* aa */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ab */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ac */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ad */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ae */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* af */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* b0 */ {Cache_TLB, DATA_INSTR, TLB_4k, 4, 128 },
/* b1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* b2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* b3 */ {Cache_TLB, DATA_DATA, TLB_4k, 4, 128 },
/* b4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* b5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* b6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* b7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* b8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* b9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ba */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* bb */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* bc */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* bd */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* be */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* bf */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* c0 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* c1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* c2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* c3 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* c4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* c5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* c6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* c7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* c8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* c9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ca */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* cb */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* cc */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* cd */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ce */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* cf */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* d0 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* d1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* d2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* d3 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* d4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* d5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* d6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* d7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* d8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* d9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* da */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* db */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* dc */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* dd */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* de */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* df */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* e0 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* e1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* e2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* e3 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* e4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* e5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* e6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* e7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* e8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* e9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ea */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* eb */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ec */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ed */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ee */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ef */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* f0 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* f1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* f2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* f3 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* f4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* f5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* f6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* f7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* f8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* f9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* fa */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* fb */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* fc */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* fd */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* fe */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
/* ff */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 }
};
/*
* use the above table to determine the CacheEntryLineSize.
*/
static void
getIntelCacheEntryLineSize(unsigned long val, int *level,
unsigned long *lineSize)
{
CacheType type;
type = CacheMap[val].type;
/* only interested in data caches */
/* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
* this data check has the side effect of rejecting that entry. If
* that wasn't the case, we could have to reject it explicitly */
if ((CacheMap[val].data & DATA_DATA) != DATA_DATA) {
return;
}
/* look at the caches, skip types we aren't interested in.
* if we already have a value for a lower level cache, skip the
* current entry */
if (type == Cache_L1) {
*level = 1;
*lineSize = CacheMap[val].lineSize;
} else if ((*level >= 2) && type == Cache_L2) {
*level = 2;
*lineSize = CacheMap[val].lineSize;
} else if ((*level >= 3) && type == Cache_L3) {
*level = 3;
*lineSize = CacheMap[val].lineSize;
}
return;
}
static void
getIntelRegisterCacheLineSize(unsigned long val,
int *level, unsigned long *lineSize)
{
getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
}
/*
* returns '0' if no recognized cache is found, or if the cache
* information is supported by this processor
*/
static unsigned long
getIntelCacheLineSize(int cpuidLevel)
{
int level = 4;
unsigned long lineSize = 0;
unsigned long eax, ebx, ecx, edx;
int repeat, count;
if (cpuidLevel < 2) {
return 0;
}
/* command '2' of the cpuid is intel's cache info call. Each byte of the
* 4 registers contain a potential descriptor for the cache. The CacheMap
* table maps the cache entry with the processor cache. Register 'al'
* contains a count value that cpuid '2' needs to be called in order to
* find all the cache descriptors. Only registers with the high bit set
* to 'zero' have valid descriptors. This code loops through all the
* required calls to cpuid '2' and passes any valid descriptors it finds
* to the getIntelRegisterCacheLineSize code, which breaks the registers
* down into their component descriptors. In the end the lineSize of the
* lowest level cache data cache is returned. */
cpuid(2, &eax, &ebx, &ecx, &edx);
repeat = eax & 0xf;
for (count = 0; count < repeat; count++) {
if ((eax & 0x80000000) == 0) {
getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
}
if ((ebx & 0x80000000) == 0) {
getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
}
if ((ecx & 0x80000000) == 0) {
getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
}
if ((edx & 0x80000000) == 0) {
getIntelRegisterCacheLineSize(edx, &level, &lineSize);
}
if (count+1 != repeat) {
cpuid(2, &eax, &ebx, &ecx, &edx);
}
}
return lineSize;
}
/*
* returns '0' if the cache info is not supported by this processor.
* This is based on the AMD extended cache commands for cpuid.
* (see "AMD Processor Recognition Application Note" Publication 20734).
* Some other processors use the identical scheme.
* (see "Processor Recognition, Transmeta Corporation").
*/
static unsigned long
getOtherCacheLineSize(unsigned long cpuidLevel)
{
unsigned long lineSize = 0;
unsigned long eax, ebx, ecx, edx;
/* get the Extended CPUID level */
cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
cpuidLevel = eax;
if (cpuidLevel >= 0x80000005) {
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
}
return lineSize;
}
char *manMap[] = {
#define INTEL 0
"GenuineIntel",
#define AMD 1
"AuthenticAMD",
#define CYRIX 2
"CyrixInstead",
#define CENTAUR 2
"CentaurHauls",
#define NEXGEN 3
"NexGenDriven",
#define TRANSMETA 4
"GenuineTMx86",
#define RISE 5
"RiseRiseRise",
#define UMC 6
"UMC UMC UMC ",
#define SIS 7
"Sis Sis Sis ",
#define NATIONAL 8
"Geode by NSC",
};
int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
#define MAN_UNKNOWN 9
unsigned long
mpi_getProcessorLineSize()
{
unsigned long eax, ebx, ecx, edx;
unsigned long cpuidLevel;
unsigned long cacheLineSize = 0;
int manufacturer = MAN_UNKNOWN;
int i;
char string[65];
if (is386()) {
return 0; /* 386 had no cache */
} if (is486()) {
return 32; /* really? need more info */
}
/* Pentium, cpuid command is available */
cpuid(0, &eax, &ebx, &ecx, &edx);
cpuidLevel = eax;
*(int *)string = ebx;
*(int *)&string[4] = edx;
*(int *)&string[8] = ecx;
string[12] = 0;
manufacturer = MAN_UNKNOWN;
for (i=0; i < n_manufacturers; i++) {
if ( strcmp(manMap[i],string) == 0) {
manufacturer = i;
}
}
if (manufacturer == INTEL) {
cacheLineSize = getIntelCacheLineSize(cpuidLevel);
} else {
cacheLineSize = getOtherCacheLineSize(cpuidLevel);
}
/* doesn't support cache info based on cpuid. This means
* an old pentium class processor, which have cache lines of
* 32. If we learn differently, we can use a switch based on
* the Manufacturer id */
if (cacheLineSize == 0) {
cacheLineSize = 32;
}
return cacheLineSize;
}
#define MPI_GET_PROCESSER_LINE_SIZE_DEFINED 1
#endif
#if defined(__ppc64__)
/*
* Sigh, The PPC has some really nice features to help us determine cache
* size, since it had lots of direct control functions to do so. The POWER
* processor even has an instruction to do this, but it was dropped in
* PowerPC. Unfortunately most of them are not available in user mode.
*
* The dcbz function would be a great way to determine cache line size except
* 1) it only works on write-back memory (it throws an exception otherwise),
* and 2) because so many mac programs 'knew' the processor cache size was
* 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
* G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
* these programs happy. dcbzl work if 64 bit instructions are supported.
* If you know 64 bit instructions are supported, and that stack is
* write-back, you can use this code.
*/
#include "memory.h"
/* clear the cache line that contains 'array' */
static inline void dcbzl(char *array)
{
register char *a asm("r2") = array;
__asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
}
#define PPC_DO_ALIGN(x,y) ((char *)\
((((long long) (x))+((y)-1))&~((y)-1)))
#define PPC_MAX_LINE_SIZE 256
unsigned long
mpi_getProcessorLineSize()
{
char testArray[2*PPC_MAX_LINE_SIZE+1];
char *test;
int i;
/* align the array on a maximum line size boundary, so we
* know we are starting to clear from the first address */
test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
/* set all the values to 1's */
memset(test, 0xff, PPC_MAX_LINE_SIZE);
/* clear one cache block starting at 'test' */
dcbzl(test);
/* find the size of the cleared area, that's our block size */
for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
if (test[i-1] == 0) {
return i;
}
}
return 0;
}
#define MPI_GET_PROCESSER_LINE_SIZE_DEFINED 1
#endif
/*
* put other processor and platform specific cache code here
* return the smallest cache line size in bytes on the processor
* (usually the L1 cache). If the OS has a call, this would be
* a greate place to put it.
*
* If there is no cache, return 0;
*
* define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
* below aren't compiled.
*
*/
/* target.mk can define LINE_SIZE if it's common for the family or OS */
#if defined(LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
unsigned long
mpi_getProcessorLineSize()
{
return LINE_SIZE;
}
#define MPI_GET_PROCESSER_LINE_SIZE_DEFINED 1
#endif
/* If no way to get the processor cache line size has been defined, assume
* it's 32 bytes (most common value, does not significantly impact performance
*/
#ifndef MPI_GET_PROCESSER_LINE_SIZE_DEFINED
unsigned long
mpi_getProcessorLineSize()
{
return 32;
}
#endif
#ifdef TEST_IT
#include <stdio.h>
main()
{
printf("line size = %d\n", mpi_getProcessorLineSize());
}
#endif

View File

@@ -1,112 +0,0 @@
/* Default configuration for MPI library
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
*
* The Initial Developer of the Original Code is
* Michael J. Fromberger.
* Portions created by the Initial Developer are Copyright (C) 1997
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Netscape Communications Corporation
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* $Id: mpi-config.h,v 1.5 2004-04-25 15:03:10 gerv%gerv.net Exp $ */
#ifndef MPI_CONFIG_H_
#define MPI_CONFIG_H_
/*
For boolean options,
0 = no
1 = yes
Other options are documented individually.
*/
#ifndef MP_IOFUNC
#define MP_IOFUNC 0 /* include mp_print() ? */
#endif
#ifndef MP_MODARITH
#define MP_MODARITH 1 /* include modular arithmetic ? */
#endif
#ifndef MP_NUMTH
#define MP_NUMTH 1 /* include number theoretic functions? */
#endif
#ifndef MP_LOGTAB
#define MP_LOGTAB 1 /* use table of logs instead of log()? */
#endif
#ifndef MP_MEMSET
#define MP_MEMSET 1 /* use memset() to zero buffers? */
#endif
#ifndef MP_MEMCPY
#define MP_MEMCPY 1 /* use memcpy() to copy buffers? */
#endif
#ifndef MP_CRYPTO
#define MP_CRYPTO 1 /* erase memory on free? */
#endif
#ifndef MP_ARGCHK
/*
0 = no parameter checks
1 = runtime checks, continue execution and return an error to caller
2 = assertions; dump core on parameter errors
*/
#ifdef DEBUG
#define MP_ARGCHK 2 /* how to check input arguments */
#else
#define MP_ARGCHK 1 /* how to check input arguments */
#endif
#endif
#ifndef MP_DEBUG
#define MP_DEBUG 0 /* print diagnostic output? */
#endif
#ifndef MP_DEFPREC
#define MP_DEFPREC 64 /* default precision, in digits */
#endif
#ifndef MP_MACRO
#define MP_MACRO 0 /* use macros for frequent calls? */
#endif
#ifndef MP_SQUARE
#define MP_SQUARE 1 /* use separate squaring code? */
#endif
#endif /* ifndef MPI_CONFIG_H_ */

View File

@@ -1,289 +0,0 @@
/*
* mpi-priv.h - Private header file for MPI
* Arbitrary precision integer arithmetic library
*
* NOTE WELL: the content of this header file is NOT part of the "public"
* API for the MPI library, and may change at any time.
* Application programs that use libmpi should NOT include this header file.
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
*
* The Initial Developer of the Original Code is
* Michael J. Fromberger.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Netscape Communications Corporation
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* $Id: mpi-priv.h,v 1.18 2005-02-25 04:30:11 julien.pierre.bugs%sun.com Exp $ */
#ifndef _MPI_PRIV_H_
#define _MPI_PRIV_H_ 1
#include "mpi.h"
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#if MP_DEBUG
#include <stdio.h>
#define DIAG(T,V) {fprintf(stderr,T);mp_print(V,stderr);fputc('\n',stderr);}
#else
#define DIAG(T,V)
#endif
/* If we aren't using a wired-in logarithm table, we need to include
the math library to get the log() function
*/
/* {{{ s_logv_2[] - log table for 2 in various bases */
#if MP_LOGTAB
/*
A table of the logs of 2 for various bases (the 0 and 1 entries of
this table are meaningless and should not be referenced).
This table is used to compute output lengths for the mp_toradix()
function. Since a number n in radix r takes up about log_r(n)
digits, we estimate the output size by taking the least integer
greater than log_r(n), where:
log_r(n) = log_2(n) * log_r(2)
This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
which are the output bases supported.
*/
extern const float s_logv_2[];
#define LOG_V_2(R) s_logv_2[(R)]
#else
/*
If MP_LOGTAB is not defined, use the math library to compute the
logarithms on the fly. Otherwise, use the table.
Pick which works best for your system.
*/
#include <math.h>
#define LOG_V_2(R) (log(2.0)/log(R))
#endif /* if MP_LOGTAB */
/* }}} */
/* {{{ Digit arithmetic macros */
/*
When adding and multiplying digits, the results can be larger than
can be contained in an mp_digit. Thus, an mp_word is used. These
macros mask off the upper and lower digits of the mp_word (the
mp_word may be more than 2 mp_digits wide, but we only concern
ourselves with the low-order 2 mp_digits)
*/
#define CARRYOUT(W) (mp_digit)((W)>>DIGIT_BIT)
#define ACCUM(W) (mp_digit)(W)
#define MP_MIN(a,b) (((a) < (b)) ? (a) : (b))
#define MP_MAX(a,b) (((a) > (b)) ? (a) : (b))
#define MP_HOWMANY(a,b) (((a) + (b) - 1)/(b))
#define MP_ROUNDUP(a,b) (MP_HOWMANY(a,b) * (b))
/* }}} */
/* {{{ Comparison constants */
#define MP_LT -1
#define MP_EQ 0
#define MP_GT 1
/* }}} */
/* {{{ private function declarations */
/*
If MP_MACRO is false, these will be defined as actual functions;
otherwise, suitable macro definitions will be used. This works
around the fact that ANSI C89 doesn't support an 'inline' keyword
(although I hear C9x will ... about bloody time). At present, the
macro definitions are identical to the function bodies, but they'll
expand in place, instead of generating a function call.
I chose these particular functions to be made into macros because
some profiling showed they are called a lot on a typical workload,
and yet they are primarily housekeeping.
*/
#if MP_MACRO == 0
void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits */
void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count); /* copy */
void *s_mp_alloc(size_t nb, size_t ni); /* general allocator */
void s_mp_free(void *ptr); /* general free function */
extern unsigned long mp_allocs;
extern unsigned long mp_frees;
extern unsigned long mp_copies;
#else
/* Even if these are defined as macros, we need to respect the settings
of the MP_MEMSET and MP_MEMCPY configuration options...
*/
#if MP_MEMSET == 0
#define s_mp_setz(dp, count) \
{int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=0;}
#else
#define s_mp_setz(dp, count) memset(dp, 0, (count) * sizeof(mp_digit))
#endif /* MP_MEMSET */
#if MP_MEMCPY == 0
#define s_mp_copy(sp, dp, count) \
{int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=(sp)[ix];}
#else
#define s_mp_copy(sp, dp, count) memcpy(dp, sp, (count) * sizeof(mp_digit))
#endif /* MP_MEMCPY */
#define s_mp_alloc(nb, ni) calloc(nb, ni)
#define s_mp_free(ptr) {if(ptr) free(ptr);}
#endif /* MP_MACRO */
mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */
mp_err s_mp_pad(mp_int *mp, mp_size min); /* left pad with zeroes */
#if MP_MACRO == 0
void s_mp_clamp(mp_int *mp); /* clip leading zeroes */
#else
#define s_mp_clamp(mp)\
{ mp_size used = MP_USED(mp); \
while (used > 1 && DIGIT(mp, used - 1) == 0) --used; \
MP_USED(mp) = used; \
}
#endif /* MP_MACRO */
void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place */
mp_err s_mp_lshd(mp_int *mp, mp_size p); /* left-shift by p digits */
void s_mp_rshd(mp_int *mp, mp_size p); /* right-shift by p digits */
mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place */
void s_mp_div_2d(mp_int *mp, mp_digit d); /* divide by 2^d in place */
void s_mp_mod_2d(mp_int *mp, mp_digit d); /* modulo 2^d in place */
void s_mp_div_2(mp_int *mp); /* divide by 2 in place */
mp_err s_mp_mul_2(mp_int *mp); /* multiply by 2 in place */
mp_err s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd);
/* normalize for division */
mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */
mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */
mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */
mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r);
/* unsigned digit divide */
mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu);
/* Barrett reduction */
mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */
mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c);
mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */
mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c);
mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset);
/* a += b * RADIX^offset */
mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */
#if MP_SQUARE
mp_err s_mp_sqr(mp_int *a); /* magnitude square */
#else
#define s_mp_sqr(a) s_mp_mul(a, a)
#endif
mp_err s_mp_div(mp_int *rem, mp_int *div, mp_int *quot); /* magnitude div */
mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
mp_err s_mp_2expt(mp_int *a, mp_digit k); /* a = 2^k */
int s_mp_cmp(const mp_int *a, const mp_int *b); /* magnitude comparison */
int s_mp_cmp_d(const mp_int *a, mp_digit d); /* magnitude digit compare */
int s_mp_ispow2(const mp_int *v); /* is v a power of 2? */
int s_mp_ispow2d(mp_digit d); /* is d a power of 2? */
int s_mp_tovalue(char ch, int r); /* convert ch to value */
char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */
int s_mp_outlen(int bits, int r); /* output length in bytes */
mp_digit s_mp_invmod_radix(mp_digit P); /* returns (P ** -1) mod RADIX */
mp_err s_mp_invmod_odd_m( const mp_int *a, const mp_int *m, mp_int *c);
mp_err s_mp_invmod_2d( const mp_int *a, mp_size k, mp_int *c);
mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c);
/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */
#if defined (__OS2__) && defined (__IBMC__)
#define MPI_ASM_DECL __cdecl
#else
#define MPI_ASM_DECL
#endif
#ifdef MPI_AMD64
mp_digit MPI_ASM_DECL s_mpv_mul_set_vec64(mp_digit*, mp_digit *, mp_size, mp_digit);
mp_digit MPI_ASM_DECL s_mpv_mul_add_vec64(mp_digit*, const mp_digit*, mp_size, mp_digit);
/* c = a * b */
#define s_mpv_mul_d(a, a_len, b, c) \
((unsigned long*)c)[a_len] = s_mpv_mul_set_vec64(c, a, a_len, b)
/* c += a * b */
#define s_mpv_mul_d_add(a, a_len, b, c) \
((unsigned long*)c)[a_len] = s_mpv_mul_add_vec64(c, a, a_len, b)
#else
void MPI_ASM_DECL s_mpv_mul_d(const mp_digit *a, mp_size a_len,
mp_digit b, mp_digit *c);
void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
mp_digit b, mp_digit *c);
#endif
void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
mp_size a_len, mp_digit b,
mp_digit *c);
void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
mp_size a_len,
mp_digit *sqrs);
mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo,
mp_digit divisor, mp_digit *quot, mp_digit *rem);
/* c += a * b * (MP_RADIX ** offset); */
#define s_mp_mul_d_add_offset(a, b, c, off) \
(s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off), MP_OKAY)
typedef struct {
mp_int N; /* modulus N */
mp_digit n0prime; /* n0' = - (n0 ** -1) mod MP_RADIX */
mp_size b; /* R == 2 ** b, also b = # significant bits in N */
} mp_mont_modulus;
mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
mp_mont_modulus *mmm);
mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm);
/* }}} */
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,349 +0,0 @@
/*
* mpi.h
*
* Arbitrary precision integer arithmetic library
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
*
* The Initial Developer of the Original Code is
* Michael J. Fromberger.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Netscape Communications Corporation
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* $Id: mpi.h,v 1.22.8.1 2005-06-09 20:44:58 relyea%netscape.com Exp $ */
#ifndef _H_MPI_
#define _H_MPI_
#include "mpi-config.h"
#if MP_DEBUG
#undef MP_IOFUNC
#define MP_IOFUNC 1
#endif
#if MP_IOFUNC
#include <stdio.h>
#include <ctype.h>
#endif
#include <limits.h>
#if defined(BSDI)
#undef ULLONG_MAX
#endif
#if defined( macintosh )
#include <Types.h>
#elif defined( _WIN32_WCE)
/* #include <sys/types.h> What do we need here ?? */
#else
#include <sys/types.h>
#endif
#define MP_NEG 1
#define MP_ZPOS 0
#define MP_OKAY 0 /* no error, all is well */
#define MP_YES 0 /* yes (boolean result) */
#define MP_NO -1 /* no (boolean result) */
#define MP_MEM -2 /* out of memory */
#define MP_RANGE -3 /* argument out of range */
#define MP_BADARG -4 /* invalid parameter */
#define MP_UNDEF -5 /* answer is undefined */
#define MP_LAST_CODE MP_UNDEF
typedef unsigned int mp_sign;
typedef unsigned int mp_size;
typedef int mp_err;
#define MP_32BIT_MAX 4294967295U
#if !defined(ULONG_MAX)
#error "ULONG_MAX not defined"
#elif !defined(UINT_MAX)
#error "UINT_MAX not defined"
#elif !defined(USHRT_MAX)
#error "USHRT_MAX not defined"
#endif
#if defined(ULONG_LONG_MAX) /* GCC, HPUX */
#define MP_ULONG_LONG_MAX ULONG_LONG_MAX
#elif defined(ULLONG_MAX) /* Solaris */
#define MP_ULONG_LONG_MAX ULLONG_MAX
/* MP_ULONG_LONG_MAX was defined to be ULLONG_MAX */
#elif defined(ULONGLONG_MAX) /* IRIX, AIX */
#define MP_ULONG_LONG_MAX ULONGLONG_MAX
#endif
/* We only use unsigned long for mp_digit iff long is more than 32 bits. */
#if !defined(MP_USE_UINT_DIGIT) && ULONG_MAX > MP_32BIT_MAX
typedef unsigned long mp_digit;
#define MP_DIGIT_MAX ULONG_MAX
#define MP_DIGIT_FMT "%016lX" /* printf() format for 1 digit */
#define MP_HALF_DIGIT_MAX UINT_MAX
#undef MP_NO_MP_WORD
#define MP_NO_MP_WORD 1
#undef MP_USE_LONG_DIGIT
#define MP_USE_LONG_DIGIT 1
#undef MP_USE_LONG_LONG_DIGIT
#elif !defined(MP_USE_UINT_DIGIT) && defined(MP_ULONG_LONG_MAX)
typedef unsigned long long mp_digit;
#define MP_DIGIT_MAX MP_ULONG_LONG_MAX
#define MP_DIGIT_FMT "%016llX" /* printf() format for 1 digit */
#define MP_HALF_DIGIT_MAX UINT_MAX
#undef MP_NO_MP_WORD
#define MP_NO_MP_WORD 1
#undef MP_USE_LONG_LONG_DIGIT
#define MP_USE_LONG_LONG_DIGIT 1
#undef MP_USE_LONG_DIGIT
#else
typedef unsigned int mp_digit;
#define MP_DIGIT_MAX UINT_MAX
#define MP_DIGIT_FMT "%08X" /* printf() format for 1 digit */
#define MP_HALF_DIGIT_MAX USHRT_MAX
#undef MP_USE_UINT_DIGIT
#define MP_USE_UINT_DIGIT 1
#undef MP_USE_LONG_LONG_DIGIT
#undef MP_USE_LONG_DIGIT
#endif
#if !defined(MP_NO_MP_WORD)
#if defined(MP_USE_UINT_DIGIT) && \
(defined(MP_ULONG_LONG_MAX) || (ULONG_MAX > UINT_MAX))
#if (ULONG_MAX > UINT_MAX)
typedef unsigned long mp_word;
typedef long mp_sword;
#define MP_WORD_MAX ULONG_MAX
#else
typedef unsigned long long mp_word;
typedef long long mp_sword;
#define MP_WORD_MAX MP_ULONG_LONG_MAX
#endif
#else
#define MP_NO_MP_WORD 1
#endif
#endif /* !defined(MP_NO_MP_WORD) */
#if !defined(MP_WORD_MAX) && defined(MP_DEFINE_SMALL_WORD)
typedef unsigned int mp_word;
typedef int mp_sword;
#define MP_WORD_MAX UINT_MAX
#endif
#define MP_DIGIT_BIT (CHAR_BIT*sizeof(mp_digit))
#define MP_WORD_BIT (CHAR_BIT*sizeof(mp_word))
#define MP_RADIX (1+(mp_word)MP_DIGIT_MAX)
#define MP_HALF_DIGIT_BIT (MP_DIGIT_BIT/2)
#define MP_HALF_RADIX (1+(mp_digit)MP_HALF_DIGIT_MAX)
/* MP_HALF_RADIX really ought to be called MP_SQRT_RADIX, but it's named
** MP_HALF_RADIX because it's the radix for MP_HALF_DIGITs, and it's
** consistent with the other _HALF_ names.
*/
/* Macros for accessing the mp_int internals */
#define MP_SIGN(MP) ((MP)->sign)
#define MP_USED(MP) ((MP)->used)
#define MP_ALLOC(MP) ((MP)->alloc)
#define MP_DIGITS(MP) ((MP)->dp)
#define MP_DIGIT(MP,N) (MP)->dp[(N)]
/* This defines the maximum I/O base (minimum is 2) */
#define MP_MAX_RADIX 64
typedef struct {
mp_sign sign; /* sign of this quantity */
mp_size alloc; /* how many digits allocated */
mp_size used; /* how many digits used */
mp_digit *dp; /* the digits themselves */
} mp_int;
/* Default precision */
mp_size mp_get_prec(void);
void mp_set_prec(mp_size prec);
/* Memory management */
mp_err mp_init(mp_int *mp);
mp_err mp_init_size(mp_int *mp, mp_size prec);
mp_err mp_init_copy(mp_int *mp, const mp_int *from);
mp_err mp_copy(const mp_int *from, mp_int *to);
void mp_exch(mp_int *mp1, mp_int *mp2);
void mp_clear(mp_int *mp);
void mp_zero(mp_int *mp);
void mp_set(mp_int *mp, mp_digit d);
mp_err mp_set_int(mp_int *mp, long z);
#define mp_set_long(mp,z) mp_set_int(mp,z)
mp_err mp_set_ulong(mp_int *mp, unsigned long z);
/* Single digit arithmetic */
mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b);
mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b);
mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b);
mp_err mp_mul_2(const mp_int *a, mp_int *c);
mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r);
mp_err mp_div_2(const mp_int *a, mp_int *c);
mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c);
/* Sign manipulations */
mp_err mp_abs(const mp_int *a, mp_int *b);
mp_err mp_neg(const mp_int *a, mp_int *b);
/* Full arithmetic */
mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c);
mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
#if MP_SQUARE
mp_err mp_sqr(const mp_int *a, mp_int *b);
#else
#define mp_sqr(a, b) mp_mul(a, a, b)
#endif
mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r);
mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r);
mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c);
mp_err mp_2expt(mp_int *a, mp_digit k);
mp_err mp_sqrt(const mp_int *a, mp_int *b);
/* Modular arithmetic */
#if MP_MODARITH
mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c);
mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c);
mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
#if MP_SQUARE
mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c);
#else
#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c)
#endif
mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c);
#endif /* MP_MODARITH */
/* Comparisons */
int mp_cmp_z(const mp_int *a);
int mp_cmp_d(const mp_int *a, mp_digit d);
int mp_cmp(const mp_int *a, const mp_int *b);
int mp_cmp_mag(mp_int *a, mp_int *b);
int mp_cmp_int(const mp_int *a, long z);
int mp_isodd(const mp_int *a);
int mp_iseven(const mp_int *a);
/* Number theoretic */
#if MP_NUMTH
mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c);
mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y);
mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c);
mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c);
#endif /* end MP_NUMTH */
/* Input and output */
#if MP_IOFUNC
void mp_print(mp_int *mp, FILE *ofp);
#endif /* end MP_IOFUNC */
/* Base conversion */
mp_err mp_read_raw(mp_int *mp, char *str, int len);
int mp_raw_size(mp_int *mp);
mp_err mp_toraw(mp_int *mp, char *str);
mp_err mp_read_radix(mp_int *mp, const char *str, int radix);
mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix);
int mp_radix_size(mp_int *mp, int radix);
mp_err mp_toradix(mp_int *mp, char *str, int radix);
int mp_tovalue(char ch, int r);
#define mp_tobinary(M, S) mp_toradix((M), (S), 2)
#define mp_tooctal(M, S) mp_toradix((M), (S), 8)
#define mp_todecimal(M, S) mp_toradix((M), (S), 10)
#define mp_tohex(M, S) mp_toradix((M), (S), 16)
/* Error strings */
const char *mp_strerror(mp_err ec);
/* Octet string conversion functions */
mp_err mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len);
int mp_unsigned_octet_size(const mp_int *mp);
mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len);
/* Miscellaneous */
mp_size mp_trailing_zeros(const mp_int *mp);
#define MP_CHECKOK(x) if (MP_OKAY > (res = (x))) goto CLEANUP
#define MP_CHECKERR(x) if (MP_OKAY > (res = (x))) goto CLEANUP
#if defined(MP_API_COMPATIBLE)
#define NEG MP_NEG
#define ZPOS MP_ZPOS
#define DIGIT_MAX MP_DIGIT_MAX
#define DIGIT_BIT MP_DIGIT_BIT
#define DIGIT_FMT MP_DIGIT_FMT
#define RADIX MP_RADIX
#define MAX_RADIX MP_MAX_RADIX
#define SIGN(MP) MP_SIGN(MP)
#define USED(MP) MP_USED(MP)
#define ALLOC(MP) MP_ALLOC(MP)
#define DIGITS(MP) MP_DIGITS(MP)
#define DIGIT(MP,N) MP_DIGIT(MP,N)
#if MP_ARGCHK == 1
#define ARGCHK(X,Y) {if(!(X)){return (Y);}}
#elif MP_ARGCHK == 2
#include <assert.h>
#define ARGCHK(X,Y) assert(X)
#else
#define ARGCHK(X,Y) /* */
#endif
#endif /* defined MP_API_COMPATIBLE */
/*
* mpi_getProcessorLineSize() returns the size in bytes of the cache line
* if a cache exists, or zero if there is no cache. If more than one
* cache line exists, it should return the smallest line size (which is
* usually the L1 cache).
*
* mp_modexp uses this information to make sure that private key information
* isn't being leaked through the cache.
*
* see mpcpucache.c for the implementation.
*/
unsigned long mpi_getProcessorLineSize();
#endif /* end _H_MPI_ */

View File

@@ -1,65 +0,0 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the Solaris software cryptographic token.
*
* The Initial Developer of the Original Code is
* Sun Microsystems, Inc.
* Portions created by the Initial Developer are Copyright (C) 2005
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Sun Microsystems, Inc.
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef MPI_AMD64
#error This file only works on AMD64 platforms.
#endif
#include <mpi-priv.h>
/*
* MPI glue
*
*/
/* Presently, this is only used by the Montgomery arithmetic code. */
/* c += a * b */
void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
mp_digit b, mp_digit *c)
{
mp_digit w;
mp_digit d;
d = s_mpv_mul_add_vec64(c, a, a_len, b);
c += a_len;
while (d) {
w = c[0] + d;
d = (w < c[0] || w < d);
*c++ = w;
}
}

View File

@@ -1,418 +0,0 @@
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the Solaris software cryptographic token.
#
# The Initial Developer of the Original Code is
# Sun Microsystems, Inc.
# Portions created by the Initial Developer are Copyright (C) 2005
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Sun Microsystems, Inc.
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK ***** */
# ------------------------------------------------------------------------
#
# Implementation of s_mpv_mul_set_vec which exploits
# the 64X64->128 bit unsigned multiply instruction.
#
# ------------------------------------------------------------------------
# r = a * digit, r and a are vectors of length len
# returns the carry digit
# r and a are 64 bit aligned.
#
# uint64_t
# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
#
.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
xorq %rax, %rax # if (len == 0) return (0)
testq %rdx, %rdx
jz .L17
movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
xorq %r9, %r9 # cy = 0
.L15:
cmpq $8, %r8 # 8 - len
jb .L16
movq 0(%rsi), %rax # rax = a[0]
movq 8(%rsi), %r11 # prefetch a[1]
mulq %rcx # p = a[0] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 0(%rdi) # r[0] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 16(%rsi), %r11 # prefetch a[2]
mulq %rcx # p = a[1] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 8(%rdi) # r[1] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 24(%rsi), %r11 # prefetch a[3]
mulq %rcx # p = a[2] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 16(%rdi) # r[2] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 32(%rsi), %r11 # prefetch a[4]
mulq %rcx # p = a[3] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 24(%rdi) # r[3] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 40(%rsi), %r11 # prefetch a[5]
mulq %rcx # p = a[4] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 32(%rdi) # r[4] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 48(%rsi), %r11 # prefetch a[6]
mulq %rcx # p = a[5] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 40(%rdi) # r[5] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 56(%rsi), %r11 # prefetch a[7]
mulq %rcx # p = a[6] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 48(%rdi) # r[6] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
mulq %rcx # p = a[7] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 56(%rdi) # r[7] = lo(p)
movq %rdx, %r9 # cy = hi(p)
addq $64, %rsi
addq $64, %rdi
subq $8, %r8
jz .L17
jmp .L15
.L16:
movq 0(%rsi), %rax
mulq %rcx # p = a[0] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 0(%rdi) # r[0] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L17
movq 8(%rsi), %rax
mulq %rcx # p = a[1] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 8(%rdi) # r[1] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L17
movq 16(%rsi), %rax
mulq %rcx # p = a[2] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 16(%rdi) # r[2] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L17
movq 24(%rsi), %rax
mulq %rcx # p = a[3] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 24(%rdi) # r[3] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L17
movq 32(%rsi), %rax
mulq %rcx # p = a[4] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 32(%rdi) # r[4] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L17
movq 40(%rsi), %rax
mulq %rcx # p = a[5] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 40(%rdi) # r[5] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L17
movq 48(%rsi), %rax
mulq %rcx # p = a[6] * digit
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 48(%rdi) # r[6] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L17
.L17:
movq %r9, %rax
ret
.size s_mpv_mul_set_vec64, [.-s_mpv_mul_set_vec64]
# ------------------------------------------------------------------------
#
# Implementation of s_mpv_mul_add_vec which exploits
# the 64X64->128 bit unsigned multiply instruction.
#
# ------------------------------------------------------------------------
# r += a * digit, r and a are vectors of length len
# returns the carry digit
# r and a are 64 bit aligned.
#
# uint64_t
# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
#
.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
xorq %rax, %rax # if (len == 0) return (0)
testq %rdx, %rdx
jz .L27
movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
xorq %r9, %r9 # cy = 0
.L25:
cmpq $8, %r8 # 8 - len
jb .L26
movq 0(%rsi), %rax # rax = a[0]
movq 0(%rdi), %r10 # r10 = r[0]
movq 8(%rsi), %r11 # prefetch a[1]
mulq %rcx # p = a[0] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[0]
movq 8(%rdi), %r10 # prefetch r[1]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 0(%rdi) # r[0] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 16(%rsi), %r11 # prefetch a[2]
mulq %rcx # p = a[1] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[1]
movq 16(%rdi), %r10 # prefetch r[2]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 8(%rdi) # r[1] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 24(%rsi), %r11 # prefetch a[3]
mulq %rcx # p = a[2] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[2]
movq 24(%rdi), %r10 # prefetch r[3]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 16(%rdi) # r[2] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 32(%rsi), %r11 # prefetch a[4]
mulq %rcx # p = a[3] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[3]
movq 32(%rdi), %r10 # prefetch r[4]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 24(%rdi) # r[3] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 40(%rsi), %r11 # prefetch a[5]
mulq %rcx # p = a[4] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[4]
movq 40(%rdi), %r10 # prefetch r[5]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 32(%rdi) # r[4] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 48(%rsi), %r11 # prefetch a[6]
mulq %rcx # p = a[5] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[5]
movq 48(%rdi), %r10 # prefetch r[6]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 40(%rdi) # r[5] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
movq 56(%rsi), %r11 # prefetch a[7]
mulq %rcx # p = a[6] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[6]
movq 56(%rdi), %r10 # prefetch r[7]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 48(%rdi) # r[6] = lo(p)
movq %rdx, %r9 # cy = hi(p)
movq %r11, %rax
mulq %rcx # p = a[7] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[7]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 56(%rdi) # r[7] = lo(p)
movq %rdx, %r9 # cy = hi(p)
addq $64, %rsi
addq $64, %rdi
subq $8, %r8
jz .L27
jmp .L25
.L26:
movq 0(%rsi), %rax
movq 0(%rdi), %r10
mulq %rcx # p = a[0] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[0]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 0(%rdi) # r[0] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L27
movq 8(%rsi), %rax
movq 8(%rdi), %r10
mulq %rcx # p = a[1] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[1]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 8(%rdi) # r[1] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L27
movq 16(%rsi), %rax
movq 16(%rdi), %r10
mulq %rcx # p = a[2] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[2]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 16(%rdi) # r[2] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L27
movq 24(%rsi), %rax
movq 24(%rdi), %r10
mulq %rcx # p = a[3] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[3]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 24(%rdi) # r[3] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L27
movq 32(%rsi), %rax
movq 32(%rdi), %r10
mulq %rcx # p = a[4] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[4]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 32(%rdi) # r[4] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L27
movq 40(%rsi), %rax
movq 40(%rdi), %r10
mulq %rcx # p = a[5] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[5]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 40(%rdi) # r[5] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L27
movq 48(%rsi), %rax
movq 48(%rdi), %r10
mulq %rcx # p = a[6] * digit
addq %r10, %rax
adcq $0, %rdx # p += r[6]
addq %r9, %rax
adcq $0, %rdx # p += cy
movq %rax, 48(%rdi) # r[6] = lo(p)
movq %rdx, %r9 # cy = hi(p)
decq %r8
jz .L27
.L27:
movq %r9, %rax
ret
.size s_mpv_mul_add_vec64, [.-s_mpv_mul_add_vec64]

View File

@@ -1,418 +0,0 @@
/ ***** BEGIN LICENSE BLOCK *****
/ Version: MPL 1.1/GPL 2.0/LGPL 2.1
/
/ The contents of this file are subject to the Mozilla Public License Version
/ 1.1 (the "License"); you may not use this file except in compliance with
/ the License. You may obtain a copy of the License at
/ http://www.mozilla.org/MPL/
/
/ Software distributed under the License is distributed on an "AS IS" basis,
/ WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
/ for the specific language governing rights and limitations under the
/ License.
/
/ The Original Code is the Solaris software cryptographic token.
/
/ The Initial Developer of the Original Code is
/ Sun Microsystems, Inc.
/ Portions created by the Initial Developer are Copyright (C) 2005
/ the Initial Developer. All Rights Reserved.
/
/ Contributor(s):
/ Sun Microsystems, Inc.
/
/ Alternatively, the contents of this file may be used under the terms of
/ either the GNU General Public License Version 2 or later (the "GPL"), or
/ the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
/ in which case the provisions of the GPL or the LGPL are applicable instead
/ of those above. If you wish to allow use of your version of this file only
/ under the terms of either the GPL or the LGPL, and not to allow others to
/ use your version of this file under the terms of the MPL, indicate your
/ decision by deleting the provisions above and replace them with the notice
/ and other provisions required by the GPL or the LGPL. If you do not delete
/ the provisions above, a recipient may use your version of this file under
/ the terms of any one of the MPL, the GPL or the LGPL.
/
/ ***** END LICENSE BLOCK ***** */
/ ------------------------------------------------------------------------
/
/ Implementation of s_mpv_mul_set_vec which exploits
/ the 64X64->128 bit unsigned multiply instruction.
/
/ ------------------------------------------------------------------------
/ r = a * digit, r and a are vectors of length len
/ returns the carry digit
/ r and a are 64 bit aligned.
/
/ uint64_t
/ s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
/
.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
xorq %rax, %rax / if (len == 0) return (0)
testq %rdx, %rdx
jz .L17
movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
xorq %r9, %r9 / cy = 0
.L15:
cmpq $8, %r8 / 8 - len
jb .L16
movq 0(%rsi), %rax / rax = a[0]
movq 8(%rsi), %r11 / prefetch a[1]
mulq %rcx / p = a[0] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 0(%rdi) / r[0] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 16(%rsi), %r11 / prefetch a[2]
mulq %rcx / p = a[1] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 8(%rdi) / r[1] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 24(%rsi), %r11 / prefetch a[3]
mulq %rcx / p = a[2] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 16(%rdi) / r[2] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 32(%rsi), %r11 / prefetch a[4]
mulq %rcx / p = a[3] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 24(%rdi) / r[3] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 40(%rsi), %r11 / prefetch a[5]
mulq %rcx / p = a[4] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 32(%rdi) / r[4] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 48(%rsi), %r11 / prefetch a[6]
mulq %rcx / p = a[5] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 40(%rdi) / r[5] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 56(%rsi), %r11 / prefetch a[7]
mulq %rcx / p = a[6] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 48(%rdi) / r[6] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
mulq %rcx / p = a[7] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 56(%rdi) / r[7] = lo(p)
movq %rdx, %r9 / cy = hi(p)
addq $64, %rsi
addq $64, %rdi
subq $8, %r8
jz .L17
jmp .L15
.L16:
movq 0(%rsi), %rax
mulq %rcx / p = a[0] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 0(%rdi) / r[0] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L17
movq 8(%rsi), %rax
mulq %rcx / p = a[1] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 8(%rdi) / r[1] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L17
movq 16(%rsi), %rax
mulq %rcx / p = a[2] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 16(%rdi) / r[2] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L17
movq 24(%rsi), %rax
mulq %rcx / p = a[3] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 24(%rdi) / r[3] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L17
movq 32(%rsi), %rax
mulq %rcx / p = a[4] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 32(%rdi) / r[4] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L17
movq 40(%rsi), %rax
mulq %rcx / p = a[5] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 40(%rdi) / r[5] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L17
movq 48(%rsi), %rax
mulq %rcx / p = a[6] * digit
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 48(%rdi) / r[6] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L17
.L17:
movq %r9, %rax
ret
.size s_mpv_mul_set_vec64, [.-s_mpv_mul_set_vec64]
/ ------------------------------------------------------------------------
/
/ Implementation of s_mpv_mul_add_vec which exploits
/ the 64X64->128 bit unsigned multiply instruction.
/
/ ------------------------------------------------------------------------
/ r += a * digit, r and a are vectors of length len
/ returns the carry digit
/ r and a are 64 bit aligned.
/
/ uint64_t
/ s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
/
.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
xorq %rax, %rax / if (len == 0) return (0)
testq %rdx, %rdx
jz .L27
movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
xorq %r9, %r9 / cy = 0
.L25:
cmpq $8, %r8 / 8 - len
jb .L26
movq 0(%rsi), %rax / rax = a[0]
movq 0(%rdi), %r10 / r10 = r[0]
movq 8(%rsi), %r11 / prefetch a[1]
mulq %rcx / p = a[0] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[0]
movq 8(%rdi), %r10 / prefetch r[1]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 0(%rdi) / r[0] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 16(%rsi), %r11 / prefetch a[2]
mulq %rcx / p = a[1] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[1]
movq 16(%rdi), %r10 / prefetch r[2]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 8(%rdi) / r[1] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 24(%rsi), %r11 / prefetch a[3]
mulq %rcx / p = a[2] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[2]
movq 24(%rdi), %r10 / prefetch r[3]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 16(%rdi) / r[2] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 32(%rsi), %r11 / prefetch a[4]
mulq %rcx / p = a[3] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[3]
movq 32(%rdi), %r10 / prefetch r[4]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 24(%rdi) / r[3] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 40(%rsi), %r11 / prefetch a[5]
mulq %rcx / p = a[4] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[4]
movq 40(%rdi), %r10 / prefetch r[5]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 32(%rdi) / r[4] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 48(%rsi), %r11 / prefetch a[6]
mulq %rcx / p = a[5] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[5]
movq 48(%rdi), %r10 / prefetch r[6]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 40(%rdi) / r[5] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
movq 56(%rsi), %r11 / prefetch a[7]
mulq %rcx / p = a[6] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[6]
movq 56(%rdi), %r10 / prefetch r[7]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 48(%rdi) / r[6] = lo(p)
movq %rdx, %r9 / cy = hi(p)
movq %r11, %rax
mulq %rcx / p = a[7] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[7]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 56(%rdi) / r[7] = lo(p)
movq %rdx, %r9 / cy = hi(p)
addq $64, %rsi
addq $64, %rdi
subq $8, %r8
jz .L27
jmp .L25
.L26:
movq 0(%rsi), %rax
movq 0(%rdi), %r10
mulq %rcx / p = a[0] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[0]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 0(%rdi) / r[0] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L27
movq 8(%rsi), %rax
movq 8(%rdi), %r10
mulq %rcx / p = a[1] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[1]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 8(%rdi) / r[1] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L27
movq 16(%rsi), %rax
movq 16(%rdi), %r10
mulq %rcx / p = a[2] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[2]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 16(%rdi) / r[2] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L27
movq 24(%rsi), %rax
movq 24(%rdi), %r10
mulq %rcx / p = a[3] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[3]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 24(%rdi) / r[3] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L27
movq 32(%rsi), %rax
movq 32(%rdi), %r10
mulq %rcx / p = a[4] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[4]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 32(%rdi) / r[4] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L27
movq 40(%rsi), %rax
movq 40(%rdi), %r10
mulq %rcx / p = a[5] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[5]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 40(%rdi) / r[5] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L27
movq 48(%rsi), %rax
movq 48(%rdi), %r10
mulq %rcx / p = a[6] * digit
addq %r10, %rax
adcq $0, %rdx / p += r[6]
addq %r9, %rax
adcq $0, %rdx / p += cy
movq %rax, 48(%rdi) / r[6] = lo(p)
movq %rdx, %r9 / cy = hi(p)
decq %r8
jz .L27
.L27:
movq %r9, %rax
ret
.size s_mpv_mul_add_vec64, [.-s_mpv_mul_add_vec64]

View File

@@ -1,115 +0,0 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the Netscape security libraries.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2000
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* $Id: mpi_hp.c,v 1.5 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
/* This file contains routines that perform vector multiplication. */
#include "mpi-priv.h"
#include <unistd.h>
#include <stddef.h>
/* #include <sys/systeminfo.h> */
#include <strings.h>
extern void multacc512(
int length, /* doublewords in multiplicand vector. */
const mp_digit *scalaraddr, /* Address of scalar. */
const mp_digit *multiplicand, /* The multiplicand vector. */
mp_digit * result); /* Where to accumulate the result. */
extern void maxpy_little(
int length, /* doublewords in multiplicand vector. */
const mp_digit *scalaraddr, /* Address of scalar. */
const mp_digit *multiplicand, /* The multiplicand vector. */
mp_digit * result); /* Where to accumulate the result. */
extern void add_diag_little(
int length, /* doublewords in input vector. */
const mp_digit *root, /* The vector to square. */
mp_digit * result); /* Where to accumulate the result. */
void
s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
{
add_diag_little(a_len, pa, ps);
}
#define MAX_STACK_DIGITS 258
#define MULTACC512_LEN (512 / MP_DIGIT_BIT)
#define HP_MPY_ADD_FN (a_len == MULTACC512_LEN ? multacc512 : maxpy_little)
/* c = a * b */
void
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
mp_digit x[MAX_STACK_DIGITS];
mp_digit *px = x;
size_t xSize = 0;
if (a == c) {
if (a_len > MAX_STACK_DIGITS) {
xSize = sizeof(mp_digit) * (a_len + 2);
px = malloc(xSize);
if (!px)
return;
}
memcpy(px, a, a_len * sizeof(*a));
a = px;
}
s_mp_setz(c, a_len + 1);
HP_MPY_ADD_FN(a_len, &b, a, c);
if (px != x && px) {
memset(px, 0, xSize);
free(px);
}
}
/* c += a * b, where a is a_len words long. */
void
s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
c[a_len] = 0; /* so carry propagation stops here. */
HP_MPY_ADD_FN(a_len, &b, a, c);
}
/* c += a * b, where a is y words long. */
void
s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
mp_digit *c)
{
HP_MPY_ADD_FN(a_len, &b, a, c);
}

View File

@@ -1,342 +0,0 @@
/
/ The contents of this file are subject to the Mozilla Public
/ License Version 1.1 (the "License"); you may not use this file
/ except in compliance with the License. You may obtain a copy of
/ the License at http://www.mozilla.org/MPL/
/
/ Software distributed under the License is distributed on an "AS
/ IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
/ implied. See the License for the specific language governing
/ rights and limitations under the License.
/
/ The Original Code is the Netscape security libraries.
/
/ The Initial Developer of the Original Code is Netscape
/ Communications Corporation. Portions created by Netscape are
/ Copyright (C) 2001 Netscape Communications Corporation. All
/ Rights Reserved.
/
/ Contributor(s):
/
/ Alternatively, the contents of this file may be used under the
/ terms of the GNU General Public License Version 2 or later (the
/ "GPL"), in which case the provisions of the GPL are applicable
/ instead of those above. If you wish to allow use of your
/ version of this file only under the terms of the GPL and not to
/ allow others to use your version of this file under the MPL,
/ indicate your decision by deleting the provisions above and
/ replace them with the notice and other provisions required by
/ the GPL. If you do not delete the provisions above, a recipient
/ may use your version of this file under either the MPL or the
/ GPL.
/ $Id: mpi_i86pc.s,v 1.1 2001-04-27 20:47:39 nelsonb%netscape.com Exp $
/
.text
/ ebp - 36: caller's esi
/ ebp - 32: caller's edi
/ ebp - 28:
/ ebp - 24:
/ ebp - 20:
/ ebp - 16:
/ ebp - 12:
/ ebp - 8:
/ ebp - 4:
/ ebp + 0: caller's ebp
/ ebp + 4: return address
/ ebp + 8: a argument
/ ebp + 12: a_len argument
/ ebp + 16: b argument
/ ebp + 20: c argument
/ registers:
/ eax:
/ ebx: carry
/ ecx: a_len
/ edx:
/ esi: a ptr
/ edi: c ptr
.globl s_mpv_mul_d
.type s_mpv_mul_d,@function
s_mpv_mul_d:
push %ebp
mov %esp,%ebp
sub $28,%esp
push %edi
push %esi
push %ebx
movl $0,%ebx / carry = 0
mov 12(%ebp),%ecx / ecx = a_len
mov 20(%ebp),%edi
cmp $0,%ecx
je L2 / jmp if a_len == 0
mov 8(%ebp),%esi / esi = a
cld
L1:
lodsl / eax = [ds:esi]; esi += 4
mov 16(%ebp),%edx / edx = b
mull %edx / edx:eax = Phi:Plo = a_i * b
add %ebx,%eax / add carry (%ebx) to edx:eax
adc $0,%edx
mov %edx,%ebx / high half of product becomes next carry
stosl / [es:edi] = ax; edi += 4;
dec %ecx / --a_len
jnz L1 / jmp if a_len != 0
L2:
mov %ebx,0(%edi) / *c = carry
pop %ebx
pop %esi
pop %edi
leave
ret
nop
/ ebp - 36: caller's esi
/ ebp - 32: caller's edi
/ ebp - 28:
/ ebp - 24:
/ ebp - 20:
/ ebp - 16:
/ ebp - 12:
/ ebp - 8:
/ ebp - 4:
/ ebp + 0: caller's ebp
/ ebp + 4: return address
/ ebp + 8: a argument
/ ebp + 12: a_len argument
/ ebp + 16: b argument
/ ebp + 20: c argument
/ registers:
/ eax:
/ ebx: carry
/ ecx: a_len
/ edx:
/ esi: a ptr
/ edi: c ptr
.globl s_mpv_mul_d_add
.type s_mpv_mul_d_add,@function
s_mpv_mul_d_add:
push %ebp
mov %esp,%ebp
sub $28,%esp
push %edi
push %esi
push %ebx
movl $0,%ebx / carry = 0
mov 12(%ebp),%ecx / ecx = a_len
mov 20(%ebp),%edi
cmp $0,%ecx
je L4 / jmp if a_len == 0
mov 8(%ebp),%esi / esi = a
cld
L3:
lodsl / eax = [ds:esi]; esi += 4
mov 16(%ebp),%edx / edx = b
mull %edx / edx:eax = Phi:Plo = a_i * b
add %ebx,%eax / add carry (%ebx) to edx:eax
adc $0,%edx
mov 0(%edi),%ebx / add in current word from *c
add %ebx,%eax
adc $0,%edx
mov %edx,%ebx / high half of product becomes next carry
stosl / [es:edi] = ax; edi += 4;
dec %ecx / --a_len
jnz L3 / jmp if a_len != 0
L4:
mov %ebx,0(%edi) / *c = carry
pop %ebx
pop %esi
pop %edi
leave
ret
nop
/ ebp - 36: caller's esi
/ ebp - 32: caller's edi
/ ebp - 28:
/ ebp - 24:
/ ebp - 20:
/ ebp - 16:
/ ebp - 12:
/ ebp - 8:
/ ebp - 4:
/ ebp + 0: caller's ebp
/ ebp + 4: return address
/ ebp + 8: a argument
/ ebp + 12: a_len argument
/ ebp + 16: b argument
/ ebp + 20: c argument
/ registers:
/ eax:
/ ebx: carry
/ ecx: a_len
/ edx:
/ esi: a ptr
/ edi: c ptr
.globl s_mpv_mul_d_add_prop
.type s_mpv_mul_d_add_prop,@function
s_mpv_mul_d_add_prop:
push %ebp
mov %esp,%ebp
sub $28,%esp
push %edi
push %esi
push %ebx
movl $0,%ebx / carry = 0
mov 12(%ebp),%ecx / ecx = a_len
mov 20(%ebp),%edi
cmp $0,%ecx
je L6 / jmp if a_len == 0
cld
mov 8(%ebp),%esi / esi = a
L5:
lodsl / eax = [ds:esi]; esi += 4
mov 16(%ebp),%edx / edx = b
mull %edx / edx:eax = Phi:Plo = a_i * b
add %ebx,%eax / add carry (%ebx) to edx:eax
adc $0,%edx
mov 0(%edi),%ebx / add in current word from *c
add %ebx,%eax
adc $0,%edx
mov %edx,%ebx / high half of product becomes next carry
stosl / [es:edi] = ax; edi += 4;
dec %ecx / --a_len
jnz L5 / jmp if a_len != 0
L6:
cmp $0,%ebx / is carry zero?
jz L8
mov 0(%edi),%eax / add in current word from *c
add %ebx,%eax
stosl / [es:edi] = ax; edi += 4;
jnc L8
L7:
mov 0(%edi),%eax / add in current word from *c
adc $0,%eax
stosl / [es:edi] = ax; edi += 4;
jc L7
L8:
pop %ebx
pop %esi
pop %edi
leave
ret
nop
/ ebp - 20: caller's esi
/ ebp - 16: caller's edi
/ ebp - 12:
/ ebp - 8: carry
/ ebp - 4: a_len local
/ ebp + 0: caller's ebp
/ ebp + 4: return address
/ ebp + 8: pa argument
/ ebp + 12: a_len argument
/ ebp + 16: ps argument
/ ebp + 20:
/ registers:
/ eax:
/ ebx: carry
/ ecx: a_len
/ edx:
/ esi: a ptr
/ edi: c ptr
.globl s_mpv_sqr_add_prop
.type s_mpv_sqr_add_prop,@function
s_mpv_sqr_add_prop:
push %ebp
mov %esp,%ebp
sub $12,%esp
push %edi
push %esi
push %ebx
movl $0,%ebx / carry = 0
mov 12(%ebp),%ecx / a_len
mov 16(%ebp),%edi / edi = ps
cmp $0,%ecx
je L11 / jump if a_len == 0
cld
mov 8(%ebp),%esi / esi = pa
L10:
lodsl / %eax = [ds:si]; si += 4;
mull %eax
add %ebx,%eax / add "carry"
adc $0,%edx
mov 0(%edi),%ebx
add %ebx,%eax / add low word from result
mov 4(%edi),%ebx
stosl / [es:di] = %eax; di += 4;
adc %ebx,%edx / add high word from result
movl $0,%ebx
mov %edx,%eax
adc $0,%ebx
stosl / [es:di] = %eax; di += 4;
dec %ecx / --a_len
jnz L10 / jmp if a_len != 0
L11:
cmp $0,%ebx / is carry zero?
jz L14
mov 0(%edi),%eax / add in current word from *c
add %ebx,%eax
stosl / [es:edi] = ax; edi += 4;
jnc L14
L12:
mov 0(%edi),%eax / add in current word from *c
adc $0,%eax
stosl / [es:edi] = ax; edi += 4;
jc L12
L14:
pop %ebx
pop %esi
pop %edi
leave
ret
nop
/
/ Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
/ so its high bit is 1. This code is from NSPR.
/
/ mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
/ mp_digit *qp, mp_digit *rp)
/ esp + 0: Caller's ebx
/ esp + 4: return address
/ esp + 8: Nhi argument
/ esp + 12: Nlo argument
/ esp + 16: divisor argument
/ esp + 20: qp argument
/ esp + 24: rp argument
/ registers:
/ eax:
/ ebx: carry
/ ecx: a_len
/ edx:
/ esi: a ptr
/ edi: c ptr
/
.globl s_mpv_div_2dx1d
.type s_mpv_div_2dx1d,@function
s_mpv_div_2dx1d:
push %ebx
mov 8(%esp),%edx
mov 12(%esp),%eax
mov 16(%esp),%ebx
div %ebx
mov 20(%esp),%ebx
mov %eax,0(%ebx)
mov 24(%esp),%ebx
mov %edx,0(%ebx)
xor %eax,%eax / return zero
pop %ebx
ret
nop

View File

@@ -1,502 +0,0 @@
/*
* The contents of this file are subject to the Mozilla Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is the Netscape security libraries.
*
* The Initial Developer of the Original Code is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 2000 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the
* terms of the GNU General Public License Version 2 or later (the
* "GPL"), in which case the provisions of the GPL are applicable
* instead of those above. If you wish to allow use of your
* version of this file only under the terms of the GPL and not to
* allow others to use your version of this file under the MPL,
* indicate your decision by deleting the provisions above and
* replace them with the notice and other provisions required by
* the GPL. If you do not delete the provisions above, a recipient
* may use your version of this file under either the MPL or the
* GPL.
* $Id: mpi_mips.s,v 1.2 2000-08-31 02:40:32 nelsonb%netscape.com Exp $
*/
#include <regdef.h>
.set noreorder
.set noat
.section .text, 1, 0x00000006, 4, 4
.text:
.section .text
.ent s_mpv_mul_d_add
.globl s_mpv_mul_d_add
s_mpv_mul_d_add:
#/* c += a * b */
#void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
# mp_digit *c)
#{
# mp_digit a0, a1; regs a4, a5
# mp_digit c0, c1; regs a6, a7
# mp_digit cy = 0; reg t2
# mp_word w0, w1; regs t0, t1
#
# if (a_len) {
beq a1,zero,.L.1
move t2,zero # cy = 0
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
dsrl32 a2,a2,0 # This clears the upper 32 bits.
# a0 = a[0];
lwu a4,0(a0)
# w0 = ((mp_word)b * a0);
dmultu a2,a4
# if (--a_len) {
addiu a1,a1,-1
beq a1,zero,.L.2
# while (a_len >= 2) {
sltiu t3,a1,2
bne t3,zero,.L.3
# a1 = a[1];
lwu a5,4(a0)
.L.4:
# a_len -= 2;
addiu a1,a1,-2
# c0 = c[0];
lwu a6,0(a3)
# w0 += cy;
mflo t0
daddu t0,t0,t2
# w0 += c0;
daddu t0,t0,a6
# w1 = (mp_word)b * a1;
dmultu a2,a5 #
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# a0 = a[2];
lwu a4,8(a0)
# a += 2;
addiu a0,a0,8
# c1 = c[1];
lwu a7,4(a3)
# w1 += cy;
mflo t1
daddu t1,t1,t2
# w1 += c1;
daddu t1,t1,a7
# w0 = (mp_word)b * a0;
dmultu a2,a4 #
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# c[1] = ACCUM(w1);
sw t1,4(a3)
# c += 2;
addiu a3,a3,8
sltiu t3,a1,2
beq t3,zero,.L.4
# a1 = a[1];
lwu a5,4(a0)
# }
.L.3:
# c0 = c[0];
lwu a6,0(a3)
# w0 += cy;
# if (a_len) {
mflo t0
beq a1,zero,.L.5
daddu t0,t0,t2
# w1 = (mp_word)b * a1;
dmultu a2,a5
# w0 += c0;
daddu t0,t0,a6 #
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# c1 = c[1];
lwu a7,4(a3)
# w1 += cy;
mflo t1
daddu t1,t1,t2
# w1 += c1;
daddu t1,t1,a7
# c[1] = ACCUM(w1);
sw t1,4(a3)
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# c += 1;
b .L.6
addiu a3,a3,4
# } else {
.L.5:
# w0 += c0;
daddu t0,t0,a6
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
b .L.6
dsrl32 t2,t0,0
# }
# } else {
.L.2:
# c0 = c[0];
lwu a6,0(a3)
# w0 += c0;
mflo t0
daddu t0,t0,a6
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# }
.L.6:
# c[1] = cy;
jr ra
sw t2,4(a3)
# }
.L.1:
jr ra
nop
#}
#
.end s_mpv_mul_d_add
.ent s_mpv_mul_d_add_prop
.globl s_mpv_mul_d_add_prop
s_mpv_mul_d_add_prop:
#/* c += a * b */
#void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
# mp_digit *c)
#{
# mp_digit a0, a1; regs a4, a5
# mp_digit c0, c1; regs a6, a7
# mp_digit cy = 0; reg t2
# mp_word w0, w1; regs t0, t1
#
# if (a_len) {
beq a1,zero,.M.1
move t2,zero # cy = 0
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
dsrl32 a2,a2,0 # This clears the upper 32 bits.
# a0 = a[0];
lwu a4,0(a0)
# w0 = ((mp_word)b * a0);
dmultu a2,a4
# if (--a_len) {
addiu a1,a1,-1
beq a1,zero,.M.2
# while (a_len >= 2) {
sltiu t3,a1,2
bne t3,zero,.M.3
# a1 = a[1];
lwu a5,4(a0)
.M.4:
# a_len -= 2;
addiu a1,a1,-2
# c0 = c[0];
lwu a6,0(a3)
# w0 += cy;
mflo t0
daddu t0,t0,t2
# w0 += c0;
daddu t0,t0,a6
# w1 = (mp_word)b * a1;
dmultu a2,a5 #
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# a0 = a[2];
lwu a4,8(a0)
# a += 2;
addiu a0,a0,8
# c1 = c[1];
lwu a7,4(a3)
# w1 += cy;
mflo t1
daddu t1,t1,t2
# w1 += c1;
daddu t1,t1,a7
# w0 = (mp_word)b * a0;
dmultu a2,a4 #
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# c[1] = ACCUM(w1);
sw t1,4(a3)
# c += 2;
addiu a3,a3,8
sltiu t3,a1,2
beq t3,zero,.M.4
# a1 = a[1];
lwu a5,4(a0)
# }
.M.3:
# c0 = c[0];
lwu a6,0(a3)
# w0 += cy;
# if (a_len) {
mflo t0
beq a1,zero,.M.5
daddu t0,t0,t2
# w1 = (mp_word)b * a1;
dmultu a2,a5
# w0 += c0;
daddu t0,t0,a6 #
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# c1 = c[1];
lwu a7,4(a3)
# w1 += cy;
mflo t1
daddu t1,t1,t2
# w1 += c1;
daddu t1,t1,a7
# c[1] = ACCUM(w1);
sw t1,4(a3)
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# c += 1;
b .M.6
addiu a3,a3,8
# } else {
.M.5:
# w0 += c0;
daddu t0,t0,a6
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
b .M.6
addiu a3,a3,4
# }
# } else {
.M.2:
# c0 = c[0];
lwu a6,0(a3)
# w0 += c0;
mflo t0
daddu t0,t0,a6
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
addiu a3,a3,4
# }
.M.6:
# while (cy) {
beq t2,zero,.M.1
nop
.M.7:
# mp_word w = (mp_word)*c + cy;
lwu a6,0(a3)
daddu t2,t2,a6
# *c++ = ACCUM(w);
sw t2,0(a3)
# cy = CARRYOUT(w);
dsrl32 t2,t2,0
bne t2,zero,.M.7
addiu a3,a3,4
# }
.M.1:
jr ra
nop
#}
#
.end s_mpv_mul_d_add_prop
.ent s_mpv_mul_d
.globl s_mpv_mul_d
s_mpv_mul_d:
#/* c = a * b */
#void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,
# mp_digit *c)
#{
# mp_digit a0, a1; regs a4, a5
# mp_digit cy = 0; reg t2
# mp_word w0, w1; regs t0, t1
#
# if (a_len) {
beq a1,zero,.N.1
move t2,zero # cy = 0
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
dsrl32 a2,a2,0 # This clears the upper 32 bits.
# a0 = a[0];
lwu a4,0(a0)
# w0 = ((mp_word)b * a0);
dmultu a2,a4
# if (--a_len) {
addiu a1,a1,-1
beq a1,zero,.N.2
# while (a_len >= 2) {
sltiu t3,a1,2
bne t3,zero,.N.3
# a1 = a[1];
lwu a5,4(a0)
.N.4:
# a_len -= 2;
addiu a1,a1,-2
# w0 += cy;
mflo t0
daddu t0,t0,t2
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# w1 = (mp_word)b * a1;
dmultu a2,a5
# c[0] = ACCUM(w0);
sw t0,0(a3)
# a0 = a[2];
lwu a4,8(a0)
# a += 2;
addiu a0,a0,8
# w1 += cy;
mflo t1
daddu t1,t1,t2
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# w0 = (mp_word)b * a0;
dmultu a2,a4
# c[1] = ACCUM(w1);
sw t1,4(a3)
# c += 2;
addiu a3,a3,8
sltiu t3,a1,2
beq t3,zero,.N.4
# a1 = a[1];
lwu a5,4(a0)
# }
.N.3:
# w0 += cy;
# if (a_len) {
mflo t0
beq a1,zero,.N.5
daddu t0,t0,t2
# w1 = (mp_word)b * a1;
dmultu a2,a5 #
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# w1 += cy;
mflo t1
daddu t1,t1,t2
# c[1] = ACCUM(w1);
sw t1,4(a3)
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# c += 1;
b .N.6
addiu a3,a3,4
# } else {
.N.5:
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
b .N.6
dsrl32 t2,t0,0
# }
# } else {
.N.2:
mflo t0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# }
.N.6:
# c[1] = cy;
jr ra
sw t2,4(a3)
# }
.N.1:
jr ra
nop
#}
#
.end s_mpv_mul_d
.ent s_mpv_sqr_add_prop
.globl s_mpv_sqr_add_prop
#void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
# registers
# a0 *a
# a1 a_len
# a2 *sqr
# a3 digit from *a, a_i
# a4 square of digit from a
# a5,a6 next 2 digits in sqr
# a7,t0 carry
s_mpv_sqr_add_prop:
move a7,zero
move t0,zero
lwu a3,0(a0)
addiu a1,a1,-1 # --a_len
dmultu a3,a3
beq a1,zero,.P.3 # jump if we've already done the only sqr
addiu a0,a0,4 # ++a
.P.2:
lwu a5,0(a2)
lwu a6,4(a2)
addiu a2,a2,8 # sqrs += 2;
dsll32 a6,a6,0
daddu a5,a5,a6
lwu a3,0(a0)
addiu a0,a0,4 # ++a
mflo a4
daddu a6,a5,a4
sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
dmultu a3,a3
daddu a4,a6,t0
sltu t0,a4,a6
add t0,t0,a7
sw a4,-8(a2)
addiu a1,a1,-1 # --a_len
dsrl32 a4,a4,0
bne a1,zero,.P.2 # loop if a_len > 0
sw a4,-4(a2)
.P.3:
lwu a5,0(a2)
lwu a6,4(a2)
addiu a2,a2,8 # sqrs += 2;
dsll32 a6,a6,0
daddu a5,a5,a6
mflo a4
daddu a6,a5,a4
sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
daddu a4,a6,t0
sltu t0,a4,a6
add t0,t0,a7
sw a4,-8(a2)
beq t0,zero,.P.9 # jump if no carry
dsrl32 a4,a4,0
.P.8:
sw a4,-4(a2)
/* propagate final carry */
lwu a5,0(a2)
daddu a6,a5,t0
sltu t0,a6,a5
bne t0,zero,.P.8 # loop if carry persists
addiu a2,a2,4 # sqrs++
.P.9:
jr ra
sw a4,-4(a2)
.end s_mpv_sqr_add_prop

View File

@@ -1,361 +0,0 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the Netscape security libraries.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2000
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* $Id: mpi_sparc.c,v 1.6 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
/* Multiplication performance enhancements for sparc v8+vis CPUs. */
#include "mpi-priv.h"
#include <stddef.h>
#include <sys/systeminfo.h>
#include <strings.h>
/* In the functions below, */
/* vector y must be 8-byte aligned, and n must be even */
/* returns carry out of high order word of result */
/* maximum n is 256 */
/* vector x += vector y * scaler a; where y is of length n words. */
extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
/* vector z = vector x + vector y * scaler a; where y is of length n words. */
extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
int n, mp_digit a);
/* v8 versions of these functions run on any Sparc v8 CPU. */
/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
#define MP_MUL_DxD(a, b, Phi, Plo) \
{ unsigned long long product = (unsigned long long)a * b; \
Plo = (mp_digit)product; \
Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
/* c = a * b */
static void
v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
#if !defined(MP_NO_MP_WORD)
mp_digit d = 0;
/* Inner product: Digits of a */
while (a_len--) {
mp_word w = ((mp_word)b * *a++) + d;
*c++ = ACCUM(w);
d = CARRYOUT(w);
}
*c = d;
#else
mp_digit carry = 0;
while (a_len--) {
mp_digit a_i = *a++;
mp_digit a0b0, a1b1;
MP_MUL_DxD(a_i, b, a1b1, a0b0);
a0b0 += carry;
if (a0b0 < carry)
++a1b1;
*c++ = a0b0;
carry = a1b1;
}
*c = carry;
#endif
}
/* c += a * b */
static void
v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
#if !defined(MP_NO_MP_WORD)
mp_digit d = 0;
/* Inner product: Digits of a */
while (a_len--) {
mp_word w = ((mp_word)b * *a++) + *c + d;
*c++ = ACCUM(w);
d = CARRYOUT(w);
}
*c = d;
#else
mp_digit carry = 0;
while (a_len--) {
mp_digit a_i = *a++;
mp_digit a0b0, a1b1;
MP_MUL_DxD(a_i, b, a1b1, a0b0);
a0b0 += carry;
if (a0b0 < carry)
++a1b1;
a0b0 += a_i = *c;
if (a0b0 < a_i)
++a1b1;
*c++ = a0b0;
carry = a1b1;
}
*c = carry;
#endif
}
/* Presently, this is only used by the Montgomery arithmetic code. */
/* c += a * b */
static void
v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
#if !defined(MP_NO_MP_WORD)
mp_digit d = 0;
/* Inner product: Digits of a */
while (a_len--) {
mp_word w = ((mp_word)b * *a++) + *c + d;
*c++ = ACCUM(w);
d = CARRYOUT(w);
}
while (d) {
mp_word w = (mp_word)*c + d;
*c++ = ACCUM(w);
d = CARRYOUT(w);
}
#else
mp_digit carry = 0;
while (a_len--) {
mp_digit a_i = *a++;
mp_digit a0b0, a1b1;
MP_MUL_DxD(a_i, b, a1b1, a0b0);
a0b0 += carry;
if (a0b0 < carry)
++a1b1;
a0b0 += a_i = *c;
if (a0b0 < a_i)
++a1b1;
*c++ = a0b0;
carry = a1b1;
}
while (carry) {
mp_digit c_i = *c;
carry += c_i;
*c++ = carry;
carry = carry < c_i;
}
#endif
}
/* vis versions of these functions run only on v8+vis or v9+vis CPUs. */
/* c = a * b */
static void
vis_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
mp_digit d;
mp_digit x[258];
if (a_len <= 256) {
if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
mp_digit * px;
px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
memcpy(px, a, a_len * sizeof(*a));
a = px;
if (a_len & 1) {
px[a_len] = 0;
}
}
s_mp_setz(c, a_len + 1);
d = mul_add_inp(c, a, a_len, b);
c[a_len] = d;
} else {
v8_mpv_mul_d(a, a_len, b, c);
}
}
/* c += a * b, where a is a_len words long. */
static void
vis_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
mp_digit d;
mp_digit x[258];
if (a_len <= 256) {
if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
mp_digit * px;
px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
memcpy(px, a, a_len * sizeof(*a));
a = px;
if (a_len & 1) {
px[a_len] = 0;
}
}
d = mul_add_inp(c, a, a_len, b);
c[a_len] = d;
} else {
v8_mpv_mul_d_add(a, a_len, b, c);
}
}
/* c += a * b, where a is y words long. */
static void
vis_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
mp_digit *c)
{
mp_digit d;
mp_digit x[258];
if (a_len <= 256) {
if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
mp_digit * px;
px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
memcpy(px, a, a_len * sizeof(*a));
a = px;
if (a_len & 1) {
px[a_len] = 0;
}
}
d = mul_add_inp(c, a, a_len, b);
if (d) {
c += a_len;
do {
mp_digit sum = d + *c;
*c++ = sum;
d = sum < d;
} while (d);
}
} else {
v8_mpv_mul_d_add_prop(a, a_len, b, c);
}
}
#if defined(SOLARIS2_5)
static int
isSparcV8PlusVis(void)
{
long buflen;
int rv = 0; /* false */
char buf[256];
buflen = sysinfo(SI_MACHINE, buf, sizeof buf);
if (buflen > 0) {
rv = (!strcmp(buf, "sun4u") || !strcmp(buf, "sun4u1"));
}
return rv;
}
#else /* SunOS2.6or higher has SI_ISALIST */
static int
isSparcV8PlusVis(void)
{
long buflen;
int rv = 0; /* false */
char buf[256];
buflen = sysinfo(SI_ISALIST, buf, sizeof buf);
if (buflen > 0) {
#if defined(MP_USE_LONG_DIGIT)
char * found = strstr(buf, "sparcv9+vis");
#else
char * found = strstr(buf, "sparcv8plus+vis");
#endif
rv = (found != 0);
}
return rv;
}
#endif
typedef void MPVmpy(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c);
/* forward static function declarations */
static MPVmpy sp_mpv_mul_d;
static MPVmpy sp_mpv_mul_d_add;
static MPVmpy sp_mpv_mul_d_add_prop;
static MPVmpy *p_mpv_mul_d = &sp_mpv_mul_d;
static MPVmpy *p_mpv_mul_d_add = &sp_mpv_mul_d_add;
static MPVmpy *p_mpv_mul_d_add_prop = &sp_mpv_mul_d_add_prop;
static void
initPtrs(void)
{
if (isSparcV8PlusVis()) {
p_mpv_mul_d = &vis_mpv_mul_d;
p_mpv_mul_d_add = &vis_mpv_mul_d_add;
p_mpv_mul_d_add_prop = &vis_mpv_mul_d_add_prop;
} else {
p_mpv_mul_d = &v8_mpv_mul_d;
p_mpv_mul_d_add = &v8_mpv_mul_d_add;
p_mpv_mul_d_add_prop = &v8_mpv_mul_d_add_prop;
}
}
static void
sp_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
initPtrs();
(* p_mpv_mul_d)(a, a_len, b, c);
}
static void
sp_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
initPtrs();
(* p_mpv_mul_d_add)(a, a_len, b, c);
}
static void
sp_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
initPtrs();
(* p_mpv_mul_d_add_prop)(a, a_len, b, c);
}
/* This is the external interface */
void
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
(* p_mpv_mul_d)(a, a_len, b, c);
}
void
s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
(* p_mpv_mul_d_add)(a, a_len, b, c);
}
void
s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
(* p_mpv_mul_d_add_prop)(a, a_len, b, c);
}

View File

@@ -1,356 +0,0 @@
;
; mpi_x86.asm - assembly language implementation of s_mpv_ functions.
;
; ***** BEGIN LICENSE BLOCK *****
; Version: MPL 1.1/GPL 2.0/LGPL 2.1
;
; The contents of this file are subject to the Mozilla Public License Version
; 1.1 (the "License"); you may not use this file except in compliance with
; the License. You may obtain a copy of the License at
; http://www.mozilla.org/MPL/
;
; Software distributed under the License is distributed on an "AS IS" basis,
; WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
; for the specific language governing rights and limitations under the
; License.
;
; The Original Code is the Netscape security libraries.
;
; The Initial Developer of the Original Code is
; Netscape Communications Corporation.
; Portions created by the Initial Developer are Copyright (C) 2000
; the Initial Developer. All Rights Reserved.
;
; Contributor(s):
;
; Alternatively, the contents of this file may be used under the terms of
; either the GNU General Public License Version 2 or later (the "GPL"), or
; the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
; in which case the provisions of the GPL or the LGPL are applicable instead
; of those above. If you wish to allow use of your version of this file only
; under the terms of either the GPL or the LGPL, and not to allow others to
; use your version of this file under the terms of the MPL, indicate your
; decision by deleting the provisions above and replace them with the notice
; and other provisions required by the GPL or the LGPL. If you do not delete
; the provisions above, a recipient may use your version of this file under
; the terms of any one of the MPL, the GPL or the LGPL.
;
; ***** END LICENSE BLOCK *****
; $Id: mpi_x86.asm,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
.386p
.MODEL FLAT
ASSUME CS: FLAT, DS: FLAT, SS: FLAT
_TEXT SEGMENT
; ebp - 36: caller's esi
; ebp - 32: caller's edi
; ebp - 28:
; ebp - 24:
; ebp - 20:
; ebp - 16:
; ebp - 12:
; ebp - 8:
; ebp - 4:
; ebp + 0: caller's ebp
; ebp + 4: return address
; ebp + 8: a argument
; ebp + 12: a_len argument
; ebp + 16: b argument
; ebp + 20: c argument
; registers:
; eax:
; ebx: carry
; ecx: a_len
; edx:
; esi: a ptr
; edi: c ptr
public _s_mpv_mul_d
_s_mpv_mul_d PROC NEAR
push ebp
mov ebp,esp
sub esp,28
push edi
push esi
push ebx
mov ebx,0 ; carry = 0
mov ecx,[ebp+12] ; ecx = a_len
mov edi,[ebp+20]
cmp ecx,0
je L_2 ; jmp if a_len == 0
mov esi,[ebp+8] ; esi = a
cld
L_1:
lodsd ; eax = [ds:esi]; esi += 4
mov edx,[ebp+16] ; edx = b
mul edx ; edx:eax = Phi:Plo = a_i * b
add eax,ebx ; add carry (ebx) to edx:eax
adc edx,0
mov ebx,edx ; high half of product becomes next carry
stosd ; [es:edi] = ax; edi += 4;
dec ecx ; --a_len
jnz L_1 ; jmp if a_len != 0
L_2:
mov [edi],ebx ; *c = carry
pop ebx
pop esi
pop edi
leave
ret
nop
_s_mpv_mul_d ENDP
; ebp - 36: caller's esi
; ebp - 32: caller's edi
; ebp - 28:
; ebp - 24:
; ebp - 20:
; ebp - 16:
; ebp - 12:
; ebp - 8:
; ebp - 4:
; ebp + 0: caller's ebp
; ebp + 4: return address
; ebp + 8: a argument
; ebp + 12: a_len argument
; ebp + 16: b argument
; ebp + 20: c argument
; registers:
; eax:
; ebx: carry
; ecx: a_len
; edx:
; esi: a ptr
; edi: c ptr
public _s_mpv_mul_d_add
_s_mpv_mul_d_add PROC NEAR
push ebp
mov ebp,esp
sub esp,28
push edi
push esi
push ebx
mov ebx,0 ; carry = 0
mov ecx,[ebp+12] ; ecx = a_len
mov edi,[ebp+20]
cmp ecx,0
je L_4 ; jmp if a_len == 0
mov esi,[ebp+8] ; esi = a
cld
L_3:
lodsd ; eax = [ds:esi]; esi += 4
mov edx,[ebp+16] ; edx = b
mul edx ; edx:eax = Phi:Plo = a_i * b
add eax,ebx ; add carry (ebx) to edx:eax
adc edx,0
mov ebx,[edi] ; add in current word from *c
add eax,ebx
adc edx,0
mov ebx,edx ; high half of product becomes next carry
stosd ; [es:edi] = ax; edi += 4;
dec ecx ; --a_len
jnz L_3 ; jmp if a_len != 0
L_4:
mov [edi],ebx ; *c = carry
pop ebx
pop esi
pop edi
leave
ret
nop
_s_mpv_mul_d_add ENDP
; ebp - 36: caller's esi
; ebp - 32: caller's edi
; ebp - 28:
; ebp - 24:
; ebp - 20:
; ebp - 16:
; ebp - 12:
; ebp - 8:
; ebp - 4:
; ebp + 0: caller's ebp
; ebp + 4: return address
; ebp + 8: a argument
; ebp + 12: a_len argument
; ebp + 16: b argument
; ebp + 20: c argument
; registers:
; eax:
; ebx: carry
; ecx: a_len
; edx:
; esi: a ptr
; edi: c ptr
public _s_mpv_mul_d_add_prop
_s_mpv_mul_d_add_prop PROC NEAR
push ebp
mov ebp,esp
sub esp,28
push edi
push esi
push ebx
mov ebx,0 ; carry = 0
mov ecx,[ebp+12] ; ecx = a_len
mov edi,[ebp+20]
cmp ecx,0
je L_6 ; jmp if a_len == 0
cld
mov esi,[ebp+8] ; esi = a
L_5:
lodsd ; eax = [ds:esi]; esi += 4
mov edx,[ebp+16] ; edx = b
mul edx ; edx:eax = Phi:Plo = a_i * b
add eax,ebx ; add carry (ebx) to edx:eax
adc edx,0
mov ebx,[edi] ; add in current word from *c
add eax,ebx
adc edx,0
mov ebx,edx ; high half of product becomes next carry
stosd ; [es:edi] = ax; edi += 4;
dec ecx ; --a_len
jnz L_5 ; jmp if a_len != 0
L_6:
cmp ebx,0 ; is carry zero?
jz L_8
mov eax,[edi] ; add in current word from *c
add eax,ebx
stosd ; [es:edi] = ax; edi += 4;
jnc L_8
L_7:
mov eax,[edi] ; add in current word from *c
adc eax,0
stosd ; [es:edi] = ax; edi += 4;
jc L_7
L_8:
pop ebx
pop esi
pop edi
leave
ret
nop
_s_mpv_mul_d_add_prop ENDP
; ebp - 20: caller's esi
; ebp - 16: caller's edi
; ebp - 12:
; ebp - 8: carry
; ebp - 4: a_len local
; ebp + 0: caller's ebp
; ebp + 4: return address
; ebp + 8: pa argument
; ebp + 12: a_len argument
; ebp + 16: ps argument
; ebp + 20:
; registers:
; eax:
; ebx: carry
; ecx: a_len
; edx:
; esi: a ptr
; edi: c ptr
public _s_mpv_sqr_add_prop
_s_mpv_sqr_add_prop PROC NEAR
push ebp
mov ebp,esp
sub esp,12
push edi
push esi
push ebx
mov ebx,0 ; carry = 0
mov ecx,[ebp+12] ; a_len
mov edi,[ebp+16] ; edi = ps
cmp ecx,0
je L_11 ; jump if a_len == 0
cld
mov esi,[ebp+8] ; esi = pa
L_10:
lodsd ; eax = [ds:si]; si += 4;
mul eax
add eax,ebx ; add "carry"
adc edx,0
mov ebx,[edi]
add eax,ebx ; add low word from result
mov ebx,[edi+4]
stosd ; [es:di] = eax; di += 4;
adc edx,ebx ; add high word from result
mov ebx,0
mov eax,edx
adc ebx,0
stosd ; [es:di] = eax; di += 4;
dec ecx ; --a_len
jnz L_10 ; jmp if a_len != 0
L_11:
cmp ebx,0 ; is carry zero?
jz L_14
mov eax,[edi] ; add in current word from *c
add eax,ebx
stosd ; [es:edi] = ax; edi += 4;
jnc L_14
L_12:
mov eax,[edi] ; add in current word from *c
adc eax,0
stosd ; [es:edi] = ax; edi += 4;
jc L_12
L_14:
pop ebx
pop esi
pop edi
leave
ret
nop
_s_mpv_sqr_add_prop ENDP
;
; Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
; so its high bit is 1. This code is from NSPR.
;
; mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
; mp_digit *qp, mp_digit *rp)
; Dump of assembler code for function s_mpv_div_2dx1d:
;
; esp + 0: Caller's ebx
; esp + 4: return address
; esp + 8: Nhi argument
; esp + 12: Nlo argument
; esp + 16: divisor argument
; esp + 20: qp argument
; esp + 24: rp argument
; registers:
; eax:
; ebx: carry
; ecx: a_len
; edx:
; esi: a ptr
; edi: c ptr
;
public _s_mpv_div_2dx1d
_s_mpv_div_2dx1d PROC NEAR
push ebx
mov edx,[esp+8]
mov eax,[esp+12]
mov ebx,[esp+16]
div ebx
mov ebx,[esp+20]
mov [ebx],eax
mov ebx,[esp+24]
mov [ebx],edx
xor eax,eax ; return zero
pop ebx
ret
nop
_s_mpv_div_2dx1d ENDP
_TEXT ENDS
END

View File

@@ -1,345 +0,0 @@
#
# The contents of this file are subject to the Mozilla Public
# License Version 1.1 (the "License"); you may not use this file
# except in compliance with the License. You may obtain a copy of
# the License at http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
# implied. See the License for the specific language governing
# rights and limitations under the License.
#
# The Original Code is the Netscape security libraries.
#
# The Initial Developer of the Original Code is Netscape
# Communications Corporation. Portions created by Netscape are
# Copyright (C) 2000 Netscape Communications Corporation. All
# Rights Reserved.
#
# Contributor(s):
#
# Alternatively, the contents of this file may be used under the
# terms of the GNU General Public License Version 2 or later (the
# "GPL"), in which case the provisions of the GPL are applicable
# instead of those above. If you wish to allow use of your
# version of this file only under the terms of the GPL and not to
# allow others to use your version of this file under the MPL,
# indicate your decision by deleting the provisions above and
# replace them with the notice and other provisions required by
# the GPL. If you do not delete the provisions above, a recipient
# may use your version of this file under either the MPL or the
# GPL.
# $Id: mpi_x86.s,v 1.4 2003-10-24 04:47:23 wchang0222%aol.com Exp $
#
.text
# ebp - 36: caller's esi
# ebp - 32: caller's edi
# ebp - 28:
# ebp - 24:
# ebp - 20:
# ebp - 16:
# ebp - 12:
# ebp - 8:
# ebp - 4:
# ebp + 0: caller's ebp
# ebp + 4: return address
# ebp + 8: a argument
# ebp + 12: a_len argument
# ebp + 16: b argument
# ebp + 20: c argument
# registers:
# eax:
# ebx: carry
# ecx: a_len
# edx:
# esi: a ptr
# edi: c ptr
.globl s_mpv_mul_d
.type s_mpv_mul_d,@function
s_mpv_mul_d:
push %ebp
mov %esp,%ebp
sub $28,%esp
push %edi
push %esi
push %ebx
movl $0,%ebx # carry = 0
mov 12(%ebp),%ecx # ecx = a_len
mov 20(%ebp),%edi
cmp $0,%ecx
je 2f # jmp if a_len == 0
mov 8(%ebp),%esi # esi = a
cld
1:
lodsl # eax = [ds:esi]; esi += 4
mov 16(%ebp),%edx # edx = b
mull %edx # edx:eax = Phi:Plo = a_i * b
add %ebx,%eax # add carry (%ebx) to edx:eax
adc $0,%edx
mov %edx,%ebx # high half of product becomes next carry
stosl # [es:edi] = ax; edi += 4;
dec %ecx # --a_len
jnz 1b # jmp if a_len != 0
2:
mov %ebx,0(%edi) # *c = carry
pop %ebx
pop %esi
pop %edi
leave
ret
nop
# ebp - 36: caller's esi
# ebp - 32: caller's edi
# ebp - 28:
# ebp - 24:
# ebp - 20:
# ebp - 16:
# ebp - 12:
# ebp - 8:
# ebp - 4:
# ebp + 0: caller's ebp
# ebp + 4: return address
# ebp + 8: a argument
# ebp + 12: a_len argument
# ebp + 16: b argument
# ebp + 20: c argument
# registers:
# eax:
# ebx: carry
# ecx: a_len
# edx:
# esi: a ptr
# edi: c ptr
.globl s_mpv_mul_d_add
.type s_mpv_mul_d_add,@function
s_mpv_mul_d_add:
push %ebp
mov %esp,%ebp
sub $28,%esp
push %edi
push %esi
push %ebx
movl $0,%ebx # carry = 0
mov 12(%ebp),%ecx # ecx = a_len
mov 20(%ebp),%edi
cmp $0,%ecx
je 4f # jmp if a_len == 0
mov 8(%ebp),%esi # esi = a
cld
3:
lodsl # eax = [ds:esi]; esi += 4
mov 16(%ebp),%edx # edx = b
mull %edx # edx:eax = Phi:Plo = a_i * b
add %ebx,%eax # add carry (%ebx) to edx:eax
adc $0,%edx
mov 0(%edi),%ebx # add in current word from *c
add %ebx,%eax
adc $0,%edx
mov %edx,%ebx # high half of product becomes next carry
stosl # [es:edi] = ax; edi += 4;
dec %ecx # --a_len
jnz 3b # jmp if a_len != 0
4:
mov %ebx,0(%edi) # *c = carry
pop %ebx
pop %esi
pop %edi
leave
ret
nop
# ebp - 36: caller's esi
# ebp - 32: caller's edi
# ebp - 28:
# ebp - 24:
# ebp - 20:
# ebp - 16:
# ebp - 12:
# ebp - 8:
# ebp - 4:
# ebp + 0: caller's ebp
# ebp + 4: return address
# ebp + 8: a argument
# ebp + 12: a_len argument
# ebp + 16: b argument
# ebp + 20: c argument
# registers:
# eax:
# ebx: carry
# ecx: a_len
# edx:
# esi: a ptr
# edi: c ptr
.globl s_mpv_mul_d_add_prop
.type s_mpv_mul_d_add_prop,@function
s_mpv_mul_d_add_prop:
push %ebp
mov %esp,%ebp
sub $28,%esp
push %edi
push %esi
push %ebx
movl $0,%ebx # carry = 0
mov 12(%ebp),%ecx # ecx = a_len
mov 20(%ebp),%edi
cmp $0,%ecx
je 6f # jmp if a_len == 0
cld
mov 8(%ebp),%esi # esi = a
5:
lodsl # eax = [ds:esi]; esi += 4
mov 16(%ebp),%edx # edx = b
mull %edx # edx:eax = Phi:Plo = a_i * b
add %ebx,%eax # add carry (%ebx) to edx:eax
adc $0,%edx
mov 0(%edi),%ebx # add in current word from *c
add %ebx,%eax
adc $0,%edx
mov %edx,%ebx # high half of product becomes next carry
stosl # [es:edi] = ax; edi += 4;
dec %ecx # --a_len
jnz 5b # jmp if a_len != 0
6:
cmp $0,%ebx # is carry zero?
jz 8f
mov 0(%edi),%eax # add in current word from *c
add %ebx,%eax
stosl # [es:edi] = ax; edi += 4;
jnc 8f
7:
mov 0(%edi),%eax # add in current word from *c
adc $0,%eax
stosl # [es:edi] = ax; edi += 4;
jc 7b
8:
pop %ebx
pop %esi
pop %edi
leave
ret
nop
# ebp - 20: caller's esi
# ebp - 16: caller's edi
# ebp - 12:
# ebp - 8: carry
# ebp - 4: a_len local
# ebp + 0: caller's ebp
# ebp + 4: return address
# ebp + 8: pa argument
# ebp + 12: a_len argument
# ebp + 16: ps argument
# ebp + 20:
# registers:
# eax:
# ebx: carry
# ecx: a_len
# edx:
# esi: a ptr
# edi: c ptr
.globl s_mpv_sqr_add_prop
.type s_mpv_sqr_add_prop,@function
s_mpv_sqr_add_prop:
push %ebp
mov %esp,%ebp
sub $12,%esp
push %edi
push %esi
push %ebx
movl $0,%ebx # carry = 0
mov 12(%ebp),%ecx # a_len
mov 16(%ebp),%edi # edi = ps
cmp $0,%ecx
je 11f # jump if a_len == 0
cld
mov 8(%ebp),%esi # esi = pa
10:
lodsl # %eax = [ds:si]; si += 4;
mull %eax
add %ebx,%eax # add "carry"
adc $0,%edx
mov 0(%edi),%ebx
add %ebx,%eax # add low word from result
mov 4(%edi),%ebx
stosl # [es:di] = %eax; di += 4;
adc %ebx,%edx # add high word from result
movl $0,%ebx
mov %edx,%eax
adc $0,%ebx
stosl # [es:di] = %eax; di += 4;
dec %ecx # --a_len
jnz 10b # jmp if a_len != 0
11:
cmp $0,%ebx # is carry zero?
jz 14f
mov 0(%edi),%eax # add in current word from *c
add %ebx,%eax
stosl # [es:edi] = ax; edi += 4;
jnc 14f
12:
mov 0(%edi),%eax # add in current word from *c
adc $0,%eax
stosl # [es:edi] = ax; edi += 4;
jc 12b
14:
pop %ebx
pop %esi
pop %edi
leave
ret
nop
#
# Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
# so its high bit is 1. This code is from NSPR.
#
# mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
# mp_digit *qp, mp_digit *rp)
# esp + 0: Caller's ebx
# esp + 4: return address
# esp + 8: Nhi argument
# esp + 12: Nlo argument
# esp + 16: divisor argument
# esp + 20: qp argument
# esp + 24: rp argument
# registers:
# eax:
# ebx: carry
# ecx: a_len
# edx:
# esi: a ptr
# edi: c ptr
#
.globl s_mpv_div_2dx1d
.type s_mpv_div_2dx1d,@function
s_mpv_div_2dx1d:
push %ebx
mov 8(%esp),%edx
mov 12(%esp),%eax
mov 16(%esp),%ebx
div %ebx
mov 20(%esp),%ebx
mov %eax,0(%ebx)
mov 24(%esp),%ebx
mov %edx,0(%ebx)
xor %eax,%eax # return zero
pop %ebx
ret
nop
# Magic indicating no need for an executable stack
.section .note.GNU-stack, "", @progbits
.previous

View File

@@ -1,465 +0,0 @@
/*
* mplogic.c
*
* Bitwise logical operations on MPI values
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
*
* The Initial Developer of the Original Code is
* Michael J. Fromberger.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* $Id: mplogic.c,v 1.15 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
#include "mpi-priv.h"
#include "mplogic.h"
/* {{{ Lookup table for population count */
static unsigned char bitc[] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
/* }}} */
/*------------------------------------------------------------------------*/
/*
mpl_not(a, b) - compute b = ~a
mpl_and(a, b, c) - compute c = a & b
mpl_or(a, b, c) - compute c = a | b
mpl_xor(a, b, c) - compute c = a ^ b
*/
/* {{{ mpl_not(a, b) */
mp_err mpl_not(mp_int *a, mp_int *b)
{
mp_err res;
unsigned int ix;
ARGCHK(a != NULL && b != NULL, MP_BADARG);
if((res = mp_copy(a, b)) != MP_OKAY)
return res;
/* This relies on the fact that the digit type is unsigned */
for(ix = 0; ix < USED(b); ix++)
DIGIT(b, ix) = ~DIGIT(b, ix);
s_mp_clamp(b);
return MP_OKAY;
} /* end mpl_not() */
/* }}} */
/* {{{ mpl_and(a, b, c) */
mp_err mpl_and(mp_int *a, mp_int *b, mp_int *c)
{
mp_int *which, *other;
mp_err res;
unsigned int ix;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
if(USED(a) <= USED(b)) {
which = a;
other = b;
} else {
which = b;
other = a;
}
if((res = mp_copy(which, c)) != MP_OKAY)
return res;
for(ix = 0; ix < USED(which); ix++)
DIGIT(c, ix) &= DIGIT(other, ix);
s_mp_clamp(c);
return MP_OKAY;
} /* end mpl_and() */
/* }}} */
/* {{{ mpl_or(a, b, c) */
mp_err mpl_or(mp_int *a, mp_int *b, mp_int *c)
{
mp_int *which, *other;
mp_err res;
unsigned int ix;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
if(USED(a) >= USED(b)) {
which = a;
other = b;
} else {
which = b;
other = a;
}
if((res = mp_copy(which, c)) != MP_OKAY)
return res;
for(ix = 0; ix < USED(which); ix++)
DIGIT(c, ix) |= DIGIT(other, ix);
return MP_OKAY;
} /* end mpl_or() */
/* }}} */
/* {{{ mpl_xor(a, b, c) */
mp_err mpl_xor(mp_int *a, mp_int *b, mp_int *c)
{
mp_int *which, *other;
mp_err res;
unsigned int ix;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
if(USED(a) >= USED(b)) {
which = a;
other = b;
} else {
which = b;
other = a;
}
if((res = mp_copy(which, c)) != MP_OKAY)
return res;
for(ix = 0; ix < USED(which); ix++)
DIGIT(c, ix) ^= DIGIT(other, ix);
s_mp_clamp(c);
return MP_OKAY;
} /* end mpl_xor() */
/* }}} */
/*------------------------------------------------------------------------*/
/*
mpl_rsh(a, b, d) - b = a >> d
mpl_lsh(a, b, d) - b = a << d
*/
/* {{{ mpl_rsh(a, b, d) */
mp_err mpl_rsh(const mp_int *a, mp_int *b, mp_digit d)
{
mp_err res;
ARGCHK(a != NULL && b != NULL, MP_BADARG);
if((res = mp_copy(a, b)) != MP_OKAY)
return res;
s_mp_div_2d(b, d);
return MP_OKAY;
} /* end mpl_rsh() */
/* }}} */
/* {{{ mpl_lsh(a, b, d) */
mp_err mpl_lsh(const mp_int *a, mp_int *b, mp_digit d)
{
mp_err res;
ARGCHK(a != NULL && b != NULL, MP_BADARG);
if((res = mp_copy(a, b)) != MP_OKAY)
return res;
return s_mp_mul_2d(b, d);
} /* end mpl_lsh() */
/* }}} */
/*------------------------------------------------------------------------*/
/*
mpl_num_set(a, num)
Count the number of set bits in the binary representation of a.
Returns MP_OKAY and sets 'num' to be the number of such bits, if
possible. If num is NULL, the result is thrown away, but it is
not considered an error.
mpl_num_clear() does basically the same thing for clear bits.
*/
/* {{{ mpl_num_set(a, num) */
mp_err mpl_num_set(mp_int *a, int *num)
{
unsigned int ix;
int db, nset = 0;
mp_digit cur;
unsigned char reg;
ARGCHK(a != NULL, MP_BADARG);
for(ix = 0; ix < USED(a); ix++) {
cur = DIGIT(a, ix);
for(db = 0; db < sizeof(mp_digit); db++) {
reg = (unsigned char)(cur >> (CHAR_BIT * db));
nset += bitc[reg];
}
}
if(num)
*num = nset;
return MP_OKAY;
} /* end mpl_num_set() */
/* }}} */
/* {{{ mpl_num_clear(a, num) */
mp_err mpl_num_clear(mp_int *a, int *num)
{
unsigned int ix;
int db, nset = 0;
mp_digit cur;
unsigned char reg;
ARGCHK(a != NULL, MP_BADARG);
for(ix = 0; ix < USED(a); ix++) {
cur = DIGIT(a, ix);
for(db = 0; db < sizeof(mp_digit); db++) {
reg = (unsigned char)(cur >> (CHAR_BIT * db));
nset += bitc[UCHAR_MAX - reg];
}
}
if(num)
*num = nset;
return MP_OKAY;
} /* end mpl_num_clear() */
/* }}} */
/*------------------------------------------------------------------------*/
/*
mpl_parity(a)
Determines the bitwise parity of the value given. Returns MP_EVEN
if an even number of digits are set, MP_ODD if an odd number are
set.
*/
/* {{{ mpl_parity(a) */
mp_err mpl_parity(mp_int *a)
{
unsigned int ix;
int par = 0;
mp_digit cur;
ARGCHK(a != NULL, MP_BADARG);
for(ix = 0; ix < USED(a); ix++) {
int shft = (sizeof(mp_digit) * CHAR_BIT) / 2;
cur = DIGIT(a, ix);
/* Compute parity for current digit */
while(shft != 0) {
cur ^= (cur >> shft);
shft >>= 1;
}
cur &= 1;
/* XOR with running parity so far */
par ^= cur;
}
if(par)
return MP_ODD;
else
return MP_EVEN;
} /* end mpl_parity() */
/* }}} */
/*
mpl_set_bit
Returns MP_OKAY or some error code.
Grows a if needed to set a bit to 1.
*/
mp_err mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value)
{
mp_size ix;
mp_err rv;
mp_digit mask;
ARGCHK(a != NULL, MP_BADARG);
ix = bitNum / MP_DIGIT_BIT;
if (ix + 1 > MP_USED(a)) {
rv = s_mp_pad(a, ix + 1);
if (rv != MP_OKAY)
return rv;
}
bitNum = bitNum % MP_DIGIT_BIT;
mask = (mp_digit)1 << bitNum;
if (value)
MP_DIGIT(a,ix) |= mask;
else
MP_DIGIT(a,ix) &= ~mask;
s_mp_clamp(a);
return MP_OKAY;
}
/*
mpl_get_bit
returns 0 or 1 or some (negative) error code.
*/
mp_err mpl_get_bit(const mp_int *a, mp_size bitNum)
{
mp_size bit, ix;
mp_err rv;
ARGCHK(a != NULL, MP_BADARG);
ix = bitNum / MP_DIGIT_BIT;
ARGCHK(ix <= MP_USED(a) - 1, MP_RANGE);
bit = bitNum % MP_DIGIT_BIT;
rv = (mp_err)(MP_DIGIT(a, ix) >> bit) & 1;
return rv;
}
/*
mpl_get_bits
- Extracts numBits bits from a, where the least significant extracted bit
is bit lsbNum. Returns a negative value if error occurs.
- Because sign bit is used to indicate error, maximum number of bits to
be returned is the lesser of (a) the number of bits in an mp_digit, or
(b) one less than the number of bits in an mp_err.
- lsbNum + numbits can be greater than the number of significant bits in
integer a, as long as bit lsbNum is in the high order digit of a.
*/
mp_err mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits)
{
mp_size rshift = (lsbNum % MP_DIGIT_BIT);
mp_size lsWndx = (lsbNum / MP_DIGIT_BIT);
mp_digit * digit = MP_DIGITS(a) + lsWndx;
mp_digit mask = ((1 << numBits) - 1);
ARGCHK(numBits < CHAR_BIT * sizeof mask, MP_BADARG);
ARGCHK(MP_HOWMANY(lsbNum, MP_DIGIT_BIT) <= MP_USED(a), MP_RANGE);
if ((numBits + lsbNum % MP_DIGIT_BIT <= MP_DIGIT_BIT) ||
(lsWndx + 1 >= MP_USED(a))) {
mask &= (digit[0] >> rshift);
} else {
mask &= ((digit[0] >> rshift) | (digit[1] << (MP_DIGIT_BIT - rshift)));
}
return (mp_err)mask;
}
/*
mpl_significant_bits
returns number of significnant bits in abs(a).
returns 1 if value is zero.
*/
mp_err mpl_significant_bits(const mp_int *a)
{
mp_err bits = 0;
int ix;
ARGCHK(a != NULL, MP_BADARG);
ix = MP_USED(a);
for (ix = MP_USED(a); ix > 0; ) {
mp_digit d;
d = MP_DIGIT(a, --ix);
if (d) {
while (d) {
++bits;
d >>= 1;
}
break;
}
}
bits += ix * MP_DIGIT_BIT;
if (!bits)
bits = 1;
return bits;
}
/*------------------------------------------------------------------------*/
/* HERE THERE BE DRAGONS */

View File

@@ -1,85 +0,0 @@
/*
* mplogic.h
*
* Bitwise logical operations on MPI values
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
*
* The Initial Developer of the Original Code is
* Michael J. Fromberger.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* $Id: mplogic.h,v 1.7 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
#ifndef _H_MPLOGIC_
#define _H_MPLOGIC_
#include "mpi.h"
/*
The logical operations treat an mp_int as if it were a bit vector,
without regard to its sign (an mp_int is represented in a signed
magnitude format). Values are treated as if they had an infinite
string of zeros left of the most-significant bit.
*/
/* Parity results */
#define MP_EVEN MP_YES
#define MP_ODD MP_NO
/* Bitwise functions */
mp_err mpl_not(mp_int *a, mp_int *b); /* one's complement */
mp_err mpl_and(mp_int *a, mp_int *b, mp_int *c); /* bitwise AND */
mp_err mpl_or(mp_int *a, mp_int *b, mp_int *c); /* bitwise OR */
mp_err mpl_xor(mp_int *a, mp_int *b, mp_int *c); /* bitwise XOR */
/* Shift functions */
mp_err mpl_rsh(const mp_int *a, mp_int *b, mp_digit d); /* right shift */
mp_err mpl_lsh(const mp_int *a, mp_int *b, mp_digit d); /* left shift */
/* Bit count and parity */
mp_err mpl_num_set(mp_int *a, int *num); /* count set bits */
mp_err mpl_num_clear(mp_int *a, int *num); /* count clear bits */
mp_err mpl_parity(mp_int *a); /* determine parity */
/* Get & Set the value of a bit */
mp_err mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value);
mp_err mpl_get_bit(const mp_int *a, mp_size bitNum);
mp_err mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits);
mp_err mpl_significant_bits(const mp_int *a);
#endif /* end _H_MPLOGIC_ */

File diff suppressed because it is too large Load Diff

View File

@@ -1,626 +0,0 @@
/*
* mpprime.c
*
* Utilities for finding and working with prime and pseudo-prime
* integers
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
*
* The Initial Developer of the Original Code is
* Michael J. Fromberger.
* Portions created by the Initial Developer are Copyright (C) 1997
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Netscape Communications Corporation
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "mpi-priv.h"
#include "mpprime.h"
#include "mplogic.h"
#include <stdlib.h>
#include <string.h>
#define SMALL_TABLE 0 /* determines size of hard-wired prime table */
#define RANDOM() rand()
#include "primes.c" /* pull in the prime digit table */
/*
Test if any of a given vector of digits divides a. If not, MP_NO
is returned; otherwise, MP_YES is returned and 'which' is set to
the index of the integer in the vector which divided a.
*/
mp_err s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which);
/* {{{ mpp_divis(a, b) */
/*
mpp_divis(a, b)
Returns MP_YES if a is divisible by b, or MP_NO if it is not.
*/
mp_err mpp_divis(mp_int *a, mp_int *b)
{
mp_err res;
mp_int rem;
if((res = mp_init(&rem)) != MP_OKAY)
return res;
if((res = mp_mod(a, b, &rem)) != MP_OKAY)
goto CLEANUP;
if(mp_cmp_z(&rem) == 0)
res = MP_YES;
else
res = MP_NO;
CLEANUP:
mp_clear(&rem);
return res;
} /* end mpp_divis() */
/* }}} */
/* {{{ mpp_divis_d(a, d) */
/*
mpp_divis_d(a, d)
Return MP_YES if a is divisible by d, or MP_NO if it is not.
*/
mp_err mpp_divis_d(mp_int *a, mp_digit d)
{
mp_err res;
mp_digit rem;
ARGCHK(a != NULL, MP_BADARG);
if(d == 0)
return MP_NO;
if((res = mp_mod_d(a, d, &rem)) != MP_OKAY)
return res;
if(rem == 0)
return MP_YES;
else
return MP_NO;
} /* end mpp_divis_d() */
/* }}} */
/* {{{ mpp_random(a) */
/*
mpp_random(a)
Assigns a random value to a. This value is generated using the
standard C library's rand() function, so it should not be used for
cryptographic purposes, but it should be fine for primality testing,
since all we really care about there is good statistical properties.
As many digits as a currently has are filled with random digits.
*/
mp_err mpp_random(mp_int *a)
{
mp_digit next = 0;
unsigned int ix, jx;
ARGCHK(a != NULL, MP_BADARG);
for(ix = 0; ix < USED(a); ix++) {
for(jx = 0; jx < sizeof(mp_digit); jx++) {
next = (next << CHAR_BIT) | (RANDOM() & UCHAR_MAX);
}
DIGIT(a, ix) = next;
}
return MP_OKAY;
} /* end mpp_random() */
/* }}} */
/* {{{ mpp_random_size(a, prec) */
mp_err mpp_random_size(mp_int *a, mp_size prec)
{
mp_err res;
ARGCHK(a != NULL && prec > 0, MP_BADARG);
if((res = s_mp_pad(a, prec)) != MP_OKAY)
return res;
return mpp_random(a);
} /* end mpp_random_size() */
/* }}} */
/* {{{ mpp_divis_vector(a, vec, size, which) */
/*
mpp_divis_vector(a, vec, size, which)
Determines if a is divisible by any of the 'size' digits in vec.
Returns MP_YES and sets 'which' to the index of the offending digit,
if it is; returns MP_NO if it is not.
*/
mp_err mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which)
{
ARGCHK(a != NULL && vec != NULL && size > 0, MP_BADARG);
return s_mpp_divp(a, vec, size, which);
} /* end mpp_divis_vector() */
/* }}} */
/* {{{ mpp_divis_primes(a, np) */
/*
mpp_divis_primes(a, np)
Test whether a is divisible by any of the first 'np' primes. If it
is, returns MP_YES and sets *np to the value of the digit that did
it. If not, returns MP_NO.
*/
mp_err mpp_divis_primes(mp_int *a, mp_digit *np)
{
int size, which;
mp_err res;
ARGCHK(a != NULL && np != NULL, MP_BADARG);
size = (int)*np;
if(size > prime_tab_size)
size = prime_tab_size;
res = mpp_divis_vector(a, prime_tab, size, &which);
if(res == MP_YES)
*np = prime_tab[which];
return res;
} /* end mpp_divis_primes() */
/* }}} */
/* {{{ mpp_fermat(a, w) */
/*
Using w as a witness, try pseudo-primality testing based on Fermat's
little theorem. If a is prime, and (w, a) = 1, then w^a == w (mod
a). So, we compute z = w^a (mod a) and compare z to w; if they are
equal, the test passes and we return MP_YES. Otherwise, we return
MP_NO.
*/
mp_err mpp_fermat(mp_int *a, mp_digit w)
{
mp_int base, test;
mp_err res;
if((res = mp_init(&base)) != MP_OKAY)
return res;
mp_set(&base, w);
if((res = mp_init(&test)) != MP_OKAY)
goto TEST;
/* Compute test = base^a (mod a) */
if((res = mp_exptmod(&base, a, a, &test)) != MP_OKAY)
goto CLEANUP;
if(mp_cmp(&base, &test) == 0)
res = MP_YES;
else
res = MP_NO;
CLEANUP:
mp_clear(&test);
TEST:
mp_clear(&base);
return res;
} /* end mpp_fermat() */
/* }}} */
/*
Perform the fermat test on each of the primes in a list until
a) one of them shows a is not prime, or
b) the list is exhausted.
Returns: MP_YES if it passes tests.
MP_NO if fermat test reveals it is composite
Some MP error code if some other error occurs.
*/
mp_err mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes)
{
mp_err rv = MP_YES;
while (nPrimes-- > 0 && rv == MP_YES) {
rv = mpp_fermat(a, *primes++);
}
return rv;
}
/* {{{ mpp_pprime(a, nt) */
/*
mpp_pprime(a, nt)
Performs nt iteration of the Miller-Rabin probabilistic primality
test on a. Returns MP_YES if the tests pass, MP_NO if one fails.
If MP_NO is returned, the number is definitely composite. If MP_YES
is returned, it is probably prime (but that is not guaranteed).
*/
mp_err mpp_pprime(mp_int *a, int nt)
{
mp_err res;
mp_int x, amo, m, z; /* "amo" = "a minus one" */
int iter;
unsigned int jx;
mp_size b;
ARGCHK(a != NULL, MP_BADARG);
MP_DIGITS(&x) = 0;
MP_DIGITS(&amo) = 0;
MP_DIGITS(&m) = 0;
MP_DIGITS(&z) = 0;
/* Initialize temporaries... */
MP_CHECKOK( mp_init(&amo));
/* Compute amo = a - 1 for what follows... */
MP_CHECKOK( mp_sub_d(a, 1, &amo) );
b = mp_trailing_zeros(&amo);
if (!b) { /* a was even ? */
res = MP_NO;
goto CLEANUP;
}
MP_CHECKOK( mp_init_size(&x, MP_USED(a)) );
MP_CHECKOK( mp_init(&z) );
MP_CHECKOK( mp_init(&m) );
MP_CHECKOK( mp_div_2d(&amo, b, &m, 0) );
/* Do the test nt times... */
for(iter = 0; iter < nt; iter++) {
/* Choose a random value for x < a */
s_mp_pad(&x, USED(a));
mpp_random(&x);
MP_CHECKOK( mp_mod(&x, a, &x) );
/* Compute z = (x ** m) mod a */
MP_CHECKOK( mp_exptmod(&x, &m, a, &z) );
if(mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) {
res = MP_YES;
continue;
}
res = MP_NO; /* just in case the following for loop never executes. */
for (jx = 1; jx < b; jx++) {
/* z = z^2 (mod a) */
MP_CHECKOK( mp_sqrmod(&z, a, &z) );
res = MP_NO; /* previous line set res to MP_YES */
if(mp_cmp_d(&z, 1) == 0) {
break;
}
if(mp_cmp(&z, &amo) == 0) {
res = MP_YES;
break;
}
} /* end testing loop */
/* If the test passes, we will continue iterating, but a failed
test means the candidate is definitely NOT prime, so we will
immediately break out of this loop
*/
if(res == MP_NO)
break;
} /* end iterations loop */
CLEANUP:
mp_clear(&m);
mp_clear(&z);
mp_clear(&x);
mp_clear(&amo);
return res;
} /* end mpp_pprime() */
/* }}} */
/* Produce table of composites from list of primes and trial value.
** trial must be odd. List of primes must not include 2.
** sieve should have dimension >= MAXPRIME/2, where MAXPRIME is largest
** prime in list of primes. After this function is finished,
** if sieve[i] is non-zero, then (trial + 2*i) is composite.
** Each prime used in the sieve costs one division of trial, and eliminates
** one or more values from the search space. (3 eliminates 1/3 of the values
** alone!) Each value left in the search space costs 1 or more modular
** exponentations. So, these divisions are a bargain!
*/
mp_err mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
unsigned char *sieve, mp_size nSieve)
{
mp_err res;
mp_digit rem;
mp_size ix;
unsigned long offset;
memset(sieve, 0, nSieve);
for(ix = 0; ix < nPrimes; ix++) {
mp_digit prime = primes[ix];
mp_size i;
if((res = mp_mod_d(trial, prime, &rem)) != MP_OKAY)
return res;
if (rem == 0) {
offset = 0;
} else {
offset = prime - (rem / 2);
}
for (i = offset; i < nSieve ; i += prime) {
sieve[i] = 1;
}
}
return MP_OKAY;
}
#define SIEVE_SIZE 32*1024
mp_err mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong,
unsigned long * nTries)
{
mp_digit np;
mp_err res;
int i = 0;
mp_int trial;
mp_int q;
mp_size num_tests;
/*
* Always make sieve the last variabale allocated so that
* Mac builds don't break by adding an extra variable
* on the stack. -javi
*/
#if defined(macintosh) || defined (XP_OS2) \
|| (defined(HPUX) && defined(__ia64))
unsigned char *sieve;
sieve = malloc(SIEVE_SIZE);
ARGCHK(sieve != NULL, MP_MEM);
#else
unsigned char sieve[SIEVE_SIZE];
#endif
ARGCHK(start != 0, MP_BADARG);
ARGCHK(nBits > 16, MP_RANGE);
MP_DIGITS(&trial) = 0;
MP_DIGITS(&q) = 0;
MP_CHECKOK( mp_init(&trial) );
MP_CHECKOK( mp_init(&q) );
/* values taken from table 4.4, HandBook of Applied Cryptography */
if (nBits >= 1300) {
num_tests = 2;
} else if (nBits >= 850) {
num_tests = 3;
} else if (nBits >= 650) {
num_tests = 4;
} else if (nBits >= 550) {
num_tests = 5;
} else if (nBits >= 450) {
num_tests = 6;
} else if (nBits >= 400) {
num_tests = 7;
} else if (nBits >= 350) {
num_tests = 8;
} else if (nBits >= 300) {
num_tests = 9;
} else if (nBits >= 250) {
num_tests = 12;
} else if (nBits >= 200) {
num_tests = 15;
} else if (nBits >= 150) {
num_tests = 18;
} else if (nBits >= 100) {
num_tests = 27;
} else
num_tests = 50;
if (strong)
--nBits;
MP_CHECKOK( mpl_set_bit(start, nBits - 1, 1) );
MP_CHECKOK( mpl_set_bit(start, 0, 1) );
for (i = mpl_significant_bits(start) - 1; i >= nBits; --i) {
MP_CHECKOK( mpl_set_bit(start, i, 0) );
}
/* start sieveing with prime value of 3. */
MP_CHECKOK(mpp_sieve(start, prime_tab + 1, prime_tab_size - 1,
sieve, SIEVE_SIZE) );
#ifdef DEBUG_SIEVE
res = 0;
for (i = 0; i < SIEVE_SIZE; ++i) {
if (!sieve[i])
++res;
}
fprintf(stderr,"sieve found %d potential primes.\n", res);
#define FPUTC(x,y) fputc(x,y)
#else
#define FPUTC(x,y)
#endif
res = MP_NO;
for(i = 0; i < SIEVE_SIZE; ++i) {
if (sieve[i]) /* this number is composite */
continue;
MP_CHECKOK( mp_add_d(start, 2 * i, &trial) );
FPUTC('.', stderr);
/* run a Fermat test */
res = mpp_fermat(&trial, 2);
if (res != MP_OKAY) {
if (res == MP_NO)
continue; /* was composite */
goto CLEANUP;
}
FPUTC('+', stderr);
/* If that passed, run some Miller-Rabin tests */
res = mpp_pprime(&trial, num_tests);
if (res != MP_OKAY) {
if (res == MP_NO)
continue; /* was composite */
goto CLEANUP;
}
FPUTC('!', stderr);
if (!strong)
break; /* success !! */
/* At this point, we have strong evidence that our candidate
is itself prime. If we want a strong prime, we need now
to test q = 2p + 1 for primality...
*/
MP_CHECKOK( mp_mul_2(&trial, &q) );
MP_CHECKOK( mp_add_d(&q, 1, &q) );
/* Test q for small prime divisors ... */
np = prime_tab_size;
res = mpp_divis_primes(&q, &np);
if (res == MP_YES) { /* is composite */
mp_clear(&q);
continue;
}
if (res != MP_NO)
goto CLEANUP;
/* And test with Fermat, as with its parent ... */
res = mpp_fermat(&q, 2);
if (res != MP_YES) {
mp_clear(&q);
if (res == MP_NO)
continue; /* was composite */
goto CLEANUP;
}
/* And test with Miller-Rabin, as with its parent ... */
res = mpp_pprime(&q, num_tests);
if (res != MP_YES) {
mp_clear(&q);
if (res == MP_NO)
continue; /* was composite */
goto CLEANUP;
}
/* If it passed, we've got a winner */
mp_exch(&q, &trial);
mp_clear(&q);
break;
} /* end of loop through sieved values */
if (res == MP_YES)
mp_exch(&trial, start);
CLEANUP:
mp_clear(&trial);
mp_clear(&q);
if (nTries)
*nTries += i;
#if defined(macintosh) || defined(XP_OS2) \
|| (defined(HPUX) && defined(__ia64))
if (sieve != NULL) {
memset(sieve, 0, SIEVE_SIZE);
free (sieve);
}
#endif
return res;
}
/*========================================================================*/
/*------------------------------------------------------------------------*/
/* Static functions visible only to the library internally */
/* {{{ s_mpp_divp(a, vec, size, which) */
/*
Test for divisibility by members of a vector of digits. Returns
MP_NO if a is not divisible by any of them; returns MP_YES and sets
'which' to the index of the offender, if it is. Will stop on the
first digit against which a is divisible.
*/
mp_err s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which)
{
mp_err res;
mp_digit rem;
int ix;
for(ix = 0; ix < size; ix++) {
if((res = mp_mod_d(a, vec[ix], &rem)) != MP_OKAY)
return res;
if(rem == 0) {
if(which)
*which = ix;
return MP_YES;
}
}
return MP_NO;
} /* end s_mpp_divp() */
/* }}} */
/*------------------------------------------------------------------------*/
/* HERE THERE BE DRAGONS */

View File

@@ -1,70 +0,0 @@
/*
* mpprime.h
*
* Utilities for finding and working with prime and pseudo-prime
* integers
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
*
* The Initial Developer of the Original Code is
* Michael J. Fromberger.
* Portions created by the Initial Developer are Copyright (C) 1997
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef _H_MP_PRIME_
#define _H_MP_PRIME_
#include "mpi.h"
extern const int prime_tab_size; /* number of primes available */
extern const mp_digit prime_tab[];
/* Tests for divisibility */
mp_err mpp_divis(mp_int *a, mp_int *b);
mp_err mpp_divis_d(mp_int *a, mp_digit d);
/* Random selection */
mp_err mpp_random(mp_int *a);
mp_err mpp_random_size(mp_int *a, mp_size prec);
/* Pseudo-primality testing */
mp_err mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which);
mp_err mpp_divis_primes(mp_int *a, mp_digit *np);
mp_err mpp_fermat(mp_int *a, mp_digit w);
mp_err mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes);
mp_err mpp_pprime(mp_int *a, int nt);
mp_err mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
unsigned char *sieve, mp_size nSieve);
mp_err mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong,
unsigned long * nTries);
#endif /* end _H_MP_PRIME_ */

View File

@@ -1,253 +0,0 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is a SPARC/VIS optimized multiply and add function.
*
* The Initial Developer of the Original Code is
* Sun Microsystems Inc.
* Portions created by the Initial Developer are Copyright (C) 1999-2000
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* $Id: mpv_sparc.c,v 1.4 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
#include "vis_proto.h"
/***************************************************************/
typedef int t_s32;
typedef unsigned int t_u32;
#if defined(__sparcv9)
typedef long t_s64;
typedef unsigned long t_u64;
#else
typedef long long t_s64;
typedef unsigned long long t_u64;
#endif
typedef double t_d64;
/***************************************************************/
typedef union {
t_d64 d64;
struct {
t_s32 i0;
t_s32 i1;
} i32s;
} d64_2_i32;
/***************************************************************/
#define BUFF_SIZE 256
#define A_BITS 19
#define A_MASK ((1 << A_BITS) - 1)
/***************************************************************/
static t_u64 mask_cnst[] = {
0x8000000080000000ull
};
/***************************************************************/
#define DEF_VARS(N) \
t_d64 *py = (t_d64*)y; \
t_d64 mask = *((t_d64*)mask_cnst); \
t_d64 ca = (1u << 31) - 1; \
t_d64 da = (t_d64)a; \
t_s64 buff[N], s; \
d64_2_i32 dy
/***************************************************************/
#define MUL_U32_S64_2(i) \
dy.d64 = vis_fxnor(mask, py[i]); \
buff[2*(i) ] = (ca - (t_d64)dy.i32s.i0) * da; \
buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da
#define MUL_U32_S64_2_D(i) \
dy.d64 = vis_fxnor(mask, py[i]); \
d0 = ca - (t_d64)dy.i32s.i0; \
d1 = ca - (t_d64)dy.i32s.i1; \
buff[4*(i) ] = (t_s64)(d0 * da); \
buff[4*(i)+1] = (t_s64)(d0 * db); \
buff[4*(i)+2] = (t_s64)(d1 * da); \
buff[4*(i)+3] = (t_s64)(d1 * db)
/***************************************************************/
#define ADD_S64_U32(i) \
s = buff[i] + x[i] + c; \
z[i] = s; \
c = (s >> 32)
#define ADD_S64_U32_D(i) \
s = buff[2*(i)] +(((t_s64)(buff[2*(i)+1]))<<A_BITS) + x[i] + uc; \
z[i] = s; \
uc = ((t_u64)s >> 32)
/***************************************************************/
#define MUL_U32_S64_8(i) \
MUL_U32_S64_2(i); \
MUL_U32_S64_2(i+1); \
MUL_U32_S64_2(i+2); \
MUL_U32_S64_2(i+3)
#define MUL_U32_S64_D_8(i) \
MUL_U32_S64_2_D(i); \
MUL_U32_S64_2_D(i+1); \
MUL_U32_S64_2_D(i+2); \
MUL_U32_S64_2_D(i+3)
/***************************************************************/
#define ADD_S64_U32_8(i) \
ADD_S64_U32(i); \
ADD_S64_U32(i+1); \
ADD_S64_U32(i+2); \
ADD_S64_U32(i+3); \
ADD_S64_U32(i+4); \
ADD_S64_U32(i+5); \
ADD_S64_U32(i+6); \
ADD_S64_U32(i+7)
#define ADD_S64_U32_D_8(i) \
ADD_S64_U32_D(i); \
ADD_S64_U32_D(i+1); \
ADD_S64_U32_D(i+2); \
ADD_S64_U32_D(i+3); \
ADD_S64_U32_D(i+4); \
ADD_S64_U32_D(i+5); \
ADD_S64_U32_D(i+6); \
ADD_S64_U32_D(i+7)
/***************************************************************/
t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
{
if (a < (1 << A_BITS)) {
if (n == 8) {
DEF_VARS(8);
t_s32 c = 0;
MUL_U32_S64_8(0);
ADD_S64_U32_8(0);
return c;
} else if (n == 16) {
DEF_VARS(16);
t_s32 c = 0;
MUL_U32_S64_8(0);
MUL_U32_S64_8(4);
ADD_S64_U32_8(0);
ADD_S64_U32_8(8);
return c;
} else {
DEF_VARS(BUFF_SIZE);
t_s32 i, c = 0;
#pragma pipeloop(0)
for (i = 0; i < (n+1)/2; i ++) {
MUL_U32_S64_2(i);
}
#pragma pipeloop(0)
for (i = 0; i < n; i ++) {
ADD_S64_U32(i);
}
return c;
}
} else {
if (n == 8) {
DEF_VARS(2*8);
t_d64 d0, d1, db;
t_u32 uc = 0;
da = (t_d64)(a & A_MASK);
db = (t_d64)(a >> A_BITS);
MUL_U32_S64_D_8(0);
ADD_S64_U32_D_8(0);
return uc;
} else if (n == 16) {
DEF_VARS(2*16);
t_d64 d0, d1, db;
t_u32 uc = 0;
da = (t_d64)(a & A_MASK);
db = (t_d64)(a >> A_BITS);
MUL_U32_S64_D_8(0);
MUL_U32_S64_D_8(4);
ADD_S64_U32_D_8(0);
ADD_S64_U32_D_8(8);
return uc;
} else {
DEF_VARS(2*BUFF_SIZE);
t_d64 d0, d1, db;
t_u32 i, uc = 0;
da = (t_d64)(a & A_MASK);
db = (t_d64)(a >> A_BITS);
#pragma pipeloop(0)
for (i = 0; i < (n+1)/2; i ++) {
MUL_U32_S64_2_D(i);
}
#pragma pipeloop(0)
for (i = 0; i < n; i ++) {
ADD_S64_U32_D(i);
}
return uc;
}
}
}
/***************************************************************/
t_u32 mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
{
return mul_add(x, x, y, n, a);
}
/***************************************************************/

Some files were not shown because too many files have changed in this diff Show More