Compare commits
2 Commits
BOBS_MPI_E
...
regalloc_c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1c43d4984f | ||
|
|
cfe021ff88 |
134
mozilla/ef/Compiler/RegisterAllocator/BitSet.cpp
Normal file
134
mozilla/ef/Compiler/RegisterAllocator/BitSet.cpp
Normal file
@@ -0,0 +1,134 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "BitSet.h"
|
||||
|
||||
// Return the next bit after index set to true or -1 if none.
|
||||
//
|
||||
Int32 BitSet::nextOne(Int32 pos) const
|
||||
{
|
||||
++pos;
|
||||
|
||||
if (pos < 0 || Uint32(pos) >= universeSize)
|
||||
return -1;
|
||||
|
||||
Uint32 offset = getWordOffset(pos);
|
||||
Uint8 index = getBitOffset(pos);
|
||||
Word* ptr = &word[offset];
|
||||
Word currentWord = *ptr++ >> index;
|
||||
|
||||
if (currentWord != Word(0)) {
|
||||
while ((currentWord & Word(1)) == 0) {
|
||||
++index;
|
||||
currentWord >>= 1;
|
||||
}
|
||||
return (offset << nBitsInWordLog2) + index;
|
||||
}
|
||||
|
||||
Word* limit = &word[getSizeInWords(universeSize)];
|
||||
while (ptr < limit) {
|
||||
++offset;
|
||||
currentWord = *ptr++;
|
||||
if (currentWord != Word(0)) {
|
||||
index = 0;
|
||||
while ((currentWord & Word(1)) == 0) {
|
||||
++index;
|
||||
currentWord >>= 1;
|
||||
}
|
||||
return (offset << nBitsInWordLog2) + index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Return the next bit after index set to false or -1 if none.
|
||||
//
|
||||
Int32 BitSet::nextZero(Int32 pos) const
|
||||
{
|
||||
++pos;
|
||||
|
||||
if (pos < 0 || Uint32(pos) >= universeSize)
|
||||
return -1;
|
||||
|
||||
Uint32 offset = getWordOffset(pos);
|
||||
Uint8 index = getBitOffset(pos);
|
||||
Word* ptr = &word[offset];
|
||||
Word currentWord = *ptr++ >> index;
|
||||
|
||||
if (currentWord != Word(~0)) {
|
||||
for (; index < nBitsInWord; ++index) {
|
||||
if ((currentWord & Word(1)) == 0) {
|
||||
Int32 ret = (offset << nBitsInWordLog2) + index;
|
||||
return (Uint32(ret) < universeSize) ? ret : -1;
|
||||
}
|
||||
currentWord >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
Word* limit = &word[getSizeInWords(universeSize)];
|
||||
while (ptr < limit) {
|
||||
++offset;
|
||||
currentWord = *ptr++;
|
||||
if (currentWord != Word(~0)) {
|
||||
for (index = 0; index < nBitsInWord; ++index) {
|
||||
if ((currentWord & Word(1)) == 0) {
|
||||
Int32 ret = (offset << nBitsInWordLog2) + index;
|
||||
return (Uint32(ret) < universeSize) ? ret : -1;
|
||||
}
|
||||
currentWord >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
|
||||
// Print the set.
|
||||
//
|
||||
void BitSet::printPretty(LogModuleObject log)
|
||||
{
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("[ "));
|
||||
|
||||
for (Int32 i = firstOne(); i != -1; i = nextOne(i)) {
|
||||
Int32 currentBit = i;
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("%d", currentBit));
|
||||
|
||||
Int32 nextBit = nextOne(currentBit);
|
||||
if (nextBit != currentBit + 1) {
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (" "));
|
||||
continue;
|
||||
}
|
||||
|
||||
while ((nextBit != -1) && (nextBit == (currentBit + 1))) {
|
||||
currentBit = nextBit;
|
||||
nextBit = nextOne(nextBit);
|
||||
}
|
||||
|
||||
if (currentBit > (i+1))
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("-%d ", currentBit));
|
||||
else
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (" %d ", currentBit));
|
||||
|
||||
i = currentBit;
|
||||
}
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("]\n"));
|
||||
}
|
||||
|
||||
#endif // DEBUG_LOG
|
||||
195
mozilla/ef/Compiler/RegisterAllocator/BitSet.h
Normal file
195
mozilla/ef/Compiler/RegisterAllocator/BitSet.h
Normal file
@@ -0,0 +1,195 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _BITSET_H_
|
||||
#define _BITSET_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "LogModule.h"
|
||||
#include "Pool.h"
|
||||
#include <string.h>
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// BitSet -
|
||||
|
||||
class BitSet
|
||||
{
|
||||
private:
|
||||
|
||||
#if (PR_BITS_PER_WORD == 64)
|
||||
typedef Uint64 Word;
|
||||
#elif (PR_BITS_PER_WORD == 32)
|
||||
typedef Uint32 Word;
|
||||
#endif
|
||||
|
||||
static const nBitsInWord = PR_BITS_PER_WORD;
|
||||
static const nBytesInWord = PR_BYTES_PER_WORD;
|
||||
static const nBitsInWordLog2 = PR_BITS_PER_WORD_LOG2;
|
||||
static const nBytesInWordLog2 = PR_BYTES_PER_WORD_LOG2;
|
||||
|
||||
// Return the number of Word need to store the universe.
|
||||
static Uint32 getSizeInWords(Uint32 sizeOfUniverse) {return (sizeOfUniverse + (nBitsInWord - 1)) >> nBitsInWordLog2;}
|
||||
// Return the given element offset in its containing Word.
|
||||
static Uint32 getBitOffset(Uint32 element) {return element & (nBitsInWord - 1);}
|
||||
// Return the Word offset for the given element int the universe.
|
||||
static Uint32 getWordOffset(Uint32 element) {return element >> nBitsInWordLog2;}
|
||||
// Return the mask for the given bit index.
|
||||
static Word getMask(Uint8 index) {return Word(1) << index;}
|
||||
|
||||
private:
|
||||
|
||||
Uint32 universeSize; // Size of the universe
|
||||
Word* word; // universe memory.
|
||||
|
||||
private:
|
||||
|
||||
// No copy constructor.
|
||||
BitSet(const BitSet&);
|
||||
|
||||
// Check if the given set's universe is of the same size than this universe.
|
||||
void checkUniverseCompatibility(const BitSet& set) const {assert(set.universeSize == universeSize);}
|
||||
// Check if pos is valid for this set's universe.
|
||||
void checkMember(Int32 pos) const {assert(pos >=0 && Uint32(pos) < universeSize);}
|
||||
|
||||
public:
|
||||
|
||||
// Create a bitset of universeSize bits.
|
||||
BitSet(Pool& pool, Uint32 universeSize) : universeSize(universeSize) {word = new(pool) Word[getSizeInWords(universeSize)]; clear();}
|
||||
|
||||
// Return the size of this bitset.
|
||||
Uint32 getSize() const {return universeSize;}
|
||||
|
||||
// Clear the bitset.
|
||||
void clear() {memset(word, 0x00, getSizeInWords(universeSize) << nBytesInWordLog2);}
|
||||
// Clear the bit at index.
|
||||
void clear(Uint32 index) {checkMember(index); word[getWordOffset(index)] &= ~getMask(index);}
|
||||
// Set the bitset.
|
||||
void set() {memset(word, 0xFF, getSizeInWords(universeSize) << nBytesInWordLog2);}
|
||||
// Set the bit at index.
|
||||
void set(Uint32 index) {checkMember(index); word[getWordOffset(index)] |= getMask(index);}
|
||||
// Return true if the bit at index is set.
|
||||
bool test(Uint32 index) const {checkMember(index); return (word[getWordOffset(index)] & getMask(index)) != 0;}
|
||||
// Union with the given bitset.
|
||||
inline void or(const BitSet& set);
|
||||
// Intersection with the given bitset.
|
||||
inline void and(const BitSet& set);
|
||||
// Difference with the given bitset.
|
||||
inline void difference(const BitSet& set);
|
||||
// Copy set.
|
||||
inline BitSet& operator = (const BitSet& set);
|
||||
// Return true if the bitset are identical.
|
||||
friend bool operator == (const BitSet& set1, const BitSet& set2);
|
||||
// Return true if the bitset are different.
|
||||
friend bool operator != (const BitSet& set1, const BitSet& set2);
|
||||
|
||||
// Logical operators.
|
||||
BitSet& operator |= (const BitSet& set) {or(set); return *this;}
|
||||
BitSet& operator &= (const BitSet& set) {and(set); return *this;}
|
||||
BitSet& operator -= (const BitSet& set) {difference(set); return *this;}
|
||||
|
||||
// Return the first bit at set to true or -1 if none.
|
||||
Int32 firstOne() const {return nextOne(-1);}
|
||||
// Return the next bit after index set to true or -1 if none.
|
||||
Int32 nextOne(Int32 pos) const;
|
||||
// Return the first bit at set to false or -1 if none.
|
||||
Int32 firstZero() const {return nextZero(-1);}
|
||||
// Return the next bit after index set to false or -1 if none.
|
||||
Int32 nextZero(Int32 pos) const;
|
||||
|
||||
// Iterator to conform with the set API.
|
||||
typedef Int32 iterator;
|
||||
// Return true if the walk is ordered.
|
||||
static bool isOrdered() {return true;}
|
||||
// Return the iterator for the first element of this set.
|
||||
iterator begin() const {return firstOne();}
|
||||
// Return the next iterator.
|
||||
iterator advance(iterator pos) const {return nextOne(pos);}
|
||||
// Return true if the iterator is at the end of the set.
|
||||
bool done(iterator pos) const {return pos == -1;}
|
||||
// Return the element corresponding to the given iterator.
|
||||
Uint32 get(iterator pos) const {return pos;}
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
// Print the set.
|
||||
void printPretty(LogModuleObject log);
|
||||
#endif // DEBUG_LOG
|
||||
};
|
||||
|
||||
// Union with the given bitset.
|
||||
//
|
||||
inline void BitSet::or(const BitSet& set)
|
||||
{
|
||||
checkUniverseCompatibility(set);
|
||||
Word* src = set.word;
|
||||
Word* dst = word;
|
||||
Word* limit = &src[getSizeInWords(universeSize)];
|
||||
|
||||
while (src < limit)
|
||||
*dst++ |= *src++;
|
||||
}
|
||||
|
||||
// Intersection with the given bitset.
|
||||
//
|
||||
inline void BitSet::and(const BitSet& set)
|
||||
{
|
||||
checkUniverseCompatibility(set);
|
||||
Word* src = set.word;
|
||||
Word* dst = word;
|
||||
Word* limit = &src[getSizeInWords(universeSize)];
|
||||
|
||||
while (src < limit)
|
||||
*dst++ &= *src++;
|
||||
}
|
||||
|
||||
// Difference with the given bitset.
|
||||
//
|
||||
inline void BitSet::difference(const BitSet& set)
|
||||
{
|
||||
checkUniverseCompatibility(set);
|
||||
Word* src = set.word;
|
||||
Word* dst = word;
|
||||
Word* limit = &src[getSizeInWords(universeSize)];
|
||||
|
||||
while (src < limit)
|
||||
*dst++ &= ~*src++;
|
||||
}
|
||||
|
||||
// Copy the given set into this set.
|
||||
//
|
||||
inline BitSet& BitSet::operator = (const BitSet& set)
|
||||
{
|
||||
checkUniverseCompatibility(set);
|
||||
if (this != &set)
|
||||
memcpy(word, set.word, getSizeInWords(universeSize) << nBytesInWordLog2);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Return true if the given set is identical to this set.
|
||||
inline bool operator == (const BitSet& set1, const BitSet& set2)
|
||||
{
|
||||
set1.checkUniverseCompatibility(set2);
|
||||
|
||||
if (&set1 == &set2)
|
||||
return true;
|
||||
|
||||
return memcmp(set1.word, set2.word, BitSet::getSizeInWords(set1.universeSize) << BitSet::nBytesInWordLog2) == 0;
|
||||
}
|
||||
|
||||
inline bool operator != (const BitSet& set1, const BitSet& set2) {return !(set1 == set2);}
|
||||
|
||||
#endif // _BITSET_H
|
||||
159
mozilla/ef/Compiler/RegisterAllocator/Coalescing.h
Normal file
159
mozilla/ef/Compiler/RegisterAllocator/Coalescing.h
Normal file
@@ -0,0 +1,159 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _COALESCING_H_
|
||||
#define _COALESCING_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "Pool.h"
|
||||
#include "RegisterPressure.h"
|
||||
#include "InterferenceGraph.h"
|
||||
#include "ControlGraph.h"
|
||||
#include "ControlNodes.h"
|
||||
#include "Instruction.h"
|
||||
#include "SparseSet.h"
|
||||
#include "RegisterAllocator.h"
|
||||
#include "RegisterAllocatorTools.h"
|
||||
|
||||
#if 1
|
||||
// Performing an ultra conservative coalescing meens that when we look at
|
||||
// candidates (source,destination) for coalescing we need to make sure
|
||||
// that the combined interference of the source and destination register
|
||||
// will not exceed the total number of register available for the register
|
||||
// class.
|
||||
#define ULTRA_CONSERVATIVE_COALESCING
|
||||
#else
|
||||
// If we are not doing an ultra conservative coalescing we have to make sure
|
||||
// that the total number of neighbor whose degree is greater than the total
|
||||
// number of register is not greater than the total number of register.
|
||||
#undef ULTRA_CONSERVATIVE_COALESCING
|
||||
#endif
|
||||
|
||||
template <class RegisterPressure>
|
||||
struct Coalescing
|
||||
{
|
||||
static bool coalesce(RegisterAllocator& registerAllocator);
|
||||
};
|
||||
|
||||
template <class RegisterPressure>
|
||||
bool Coalescing<RegisterPressure>::coalesce(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
Pool& pool = registerAllocator.pool;
|
||||
|
||||
// Initialize the lookup table
|
||||
//
|
||||
Uint32 rangeCount = registerAllocator.rangeCount;
|
||||
RegisterName* newRange = new RegisterName[2 * rangeCount];
|
||||
RegisterName* coalescedRange = &newRange[rangeCount];
|
||||
RegisterName* name2range = registerAllocator.name2range;
|
||||
|
||||
init(coalescedRange, rangeCount);
|
||||
|
||||
SparseSet interferences(pool, rangeCount);
|
||||
InterferenceGraph<RegisterPressure>& iGraph = registerAllocator.iGraph;
|
||||
bool removedInstructions = false;
|
||||
|
||||
ControlGraph& controlGraph = registerAllocator.controlGraph;
|
||||
ControlNode** nodes = controlGraph.lndList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
|
||||
// Walk the nodes in the loop nesting depth list.
|
||||
for (Int32 n = nNodes - 1; n >= 0; n--) {
|
||||
InstructionList& instructions = nodes[n]->getInstructions();
|
||||
|
||||
InstructionList::iterator it = instructions.begin();
|
||||
while (!instructions.done(it)) {
|
||||
Instruction& instruction = instructions.get(it);
|
||||
it = instructions.advance(it);
|
||||
|
||||
if ((instruction.getFlags() & ifCopy) != 0) {
|
||||
assert(instruction.getInstructionUseBegin() != instruction.getInstructionUseEnd() && instruction.getInstructionUseBegin()[0].isRegister());
|
||||
assert(instruction.getInstructionDefineBegin() != instruction.getInstructionDefineEnd() && instruction.getInstructionDefineBegin()[0].isRegister());
|
||||
|
||||
RegisterName source = findRoot(name2range[instruction.getInstructionUseBegin()[0].getRegisterName()], coalescedRange);
|
||||
RegisterName destination = findRoot(name2range[instruction.getInstructionDefineBegin()[0].getRegisterName()], coalescedRange);
|
||||
|
||||
if (source == destination) {
|
||||
instruction.remove();
|
||||
} else if (!iGraph.interfere(source, destination)) {
|
||||
InterferenceVector* sourceVector = iGraph.getInterferenceVector(source);
|
||||
InterferenceVector* destinationVector = iGraph.getInterferenceVector(destination);
|
||||
|
||||
#ifdef ULTRA_CONSERVATIVE_COALESCING
|
||||
interferences.clear();
|
||||
|
||||
InterferenceVector* vector;
|
||||
for (vector = sourceVector; vector != NULL; vector = vector->next) {
|
||||
RegisterName* neighbors = vector->neighbors;
|
||||
for (Uint32 i = 0; i < vector->count; i++)
|
||||
interferences.set(findRoot(neighbors[i], coalescedRange));
|
||||
}
|
||||
for (vector = destinationVector; vector != NULL; vector = vector->next) {
|
||||
RegisterName* neighbors = vector->neighbors;
|
||||
for (Uint32 i = 0; i < vector->count; i++)
|
||||
interferences.set(findRoot(neighbors[i], coalescedRange));
|
||||
}
|
||||
|
||||
Uint32 count = interferences.getSize();
|
||||
#else // ULTRA_CONSERVATIVE_COALESCING
|
||||
trespass("not implemented");
|
||||
Uint32 count = 0;
|
||||
#endif // ULTRA_CONSERVATIVE_COALESCING
|
||||
|
||||
if (count < 6 /* FIX: should get the number from the class */) {
|
||||
// Update the interferences vector.
|
||||
if (sourceVector == NULL) {
|
||||
iGraph.setInterferenceVector(source, destinationVector);
|
||||
sourceVector = destinationVector;
|
||||
} else if (destinationVector == NULL)
|
||||
iGraph.setInterferenceVector(destination, sourceVector);
|
||||
else {
|
||||
InterferenceVector* last = NULL;
|
||||
for (InterferenceVector* v = sourceVector; v != NULL; v = v->next)
|
||||
last = v;
|
||||
assert(last);
|
||||
last->next = destinationVector;
|
||||
iGraph.setInterferenceVector(destination, sourceVector);
|
||||
}
|
||||
// Update the interference matrix.
|
||||
for (InterferenceVector* v = sourceVector; v != NULL; v = v->next) {
|
||||
RegisterName* neighbors = v->neighbors;
|
||||
for (Uint32 i = 0; i < v->count; i++) {
|
||||
RegisterName neighbor = findRoot(neighbors[i], coalescedRange);
|
||||
iGraph.setInterference(neighbor, source);
|
||||
iGraph.setInterference(neighbor, destination);
|
||||
}
|
||||
}
|
||||
|
||||
instruction.remove();
|
||||
coalescedRange[source] = destination;
|
||||
removedInstructions = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
registerAllocator.rangeCount = compress(registerAllocator.name2range, coalescedRange, registerAllocator.nameCount, rangeCount);
|
||||
delete newRange;
|
||||
|
||||
return removedInstructions;
|
||||
}
|
||||
|
||||
#endif // _COALESCING_H_
|
||||
283
mozilla/ef/Compiler/RegisterAllocator/Coloring.cpp
Normal file
283
mozilla/ef/Compiler/RegisterAllocator/Coloring.cpp
Normal file
@@ -0,0 +1,283 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef NEW_LAURENTM_CODE
|
||||
|
||||
#include "Coloring.h"
|
||||
#include "VirtualRegister.h"
|
||||
#include "FastBitSet.h"
|
||||
#include "FastBitMatrix.h"
|
||||
#include "CpuInfo.h"
|
||||
|
||||
bool Coloring::
|
||||
assignRegisters(FastBitMatrix& interferenceMatrix)
|
||||
{
|
||||
PRUint32 *stackPtr = new(pool) PRUint32[vRegManager.count()];
|
||||
|
||||
return select(interferenceMatrix, stackPtr, simplify(interferenceMatrix, stackPtr));
|
||||
}
|
||||
|
||||
PRInt32 Coloring::
|
||||
getLowestSpillCostRegister(FastBitSet& bitset)
|
||||
{
|
||||
PRInt32 lowest = bitset.firstOne();
|
||||
if (lowest != -1)
|
||||
{
|
||||
Flt32 cost = vRegManager.getVirtualRegister(lowest).spillInfo.spillCost;
|
||||
for (PRInt32 r = bitset.nextOne(lowest); r != -1; r = bitset.nextOne(r))
|
||||
{
|
||||
VirtualRegister& vReg = vRegManager.getVirtualRegister(r);
|
||||
if (!vReg.spillInfo.infiniteSpillCost && (vReg.spillInfo.spillCost < cost))
|
||||
{
|
||||
cost = vReg.spillInfo.spillCost;
|
||||
lowest = r;
|
||||
}
|
||||
}
|
||||
}
|
||||
return lowest;
|
||||
}
|
||||
|
||||
PRUint32* Coloring::
|
||||
simplify(FastBitMatrix interferenceMatrix, PRUint32* stackPtr)
|
||||
{
|
||||
// first we construct the sets low and high. low contains all nodes of degree
|
||||
// inferior to the number of register available on the processor. All the
|
||||
// nodes with an high degree and a finite spill cost are placed in high.
|
||||
// Nodes of high degree and infinite spill cost are not included in either sets.
|
||||
|
||||
PRUint32 nRegisters = vRegManager.count();
|
||||
FastBitSet low(pool, nRegisters);
|
||||
FastBitSet high(pool, nRegisters);
|
||||
FastBitSet stack(pool, nRegisters);
|
||||
|
||||
for (VirtualRegisterManager::iterator i = vRegManager.begin(); !vRegManager.done(i); i = vRegManager.advance(i))
|
||||
{
|
||||
VirtualRegister& vReg = vRegManager.getVirtualRegister(i);
|
||||
|
||||
if (vReg.getClass() == vrcStackSlot)
|
||||
{
|
||||
stack.set(i);
|
||||
vReg.colorRegister(nRegisters);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (vReg.colorInfo.interferenceDegree < NUMBER_OF_REGISTERS)
|
||||
low.set(i);
|
||||
else // if (!vReg.spillInfo.infiniteSpillCost)
|
||||
high.set(i);
|
||||
|
||||
// Set coloring info.
|
||||
vReg.spillInfo.willSpill = false;
|
||||
|
||||
switch(vReg.getClass())
|
||||
{
|
||||
case vrcInteger:
|
||||
vReg.colorRegister(LAST_GREGISTER + 1);
|
||||
break;
|
||||
case vrcFloatingPoint:
|
||||
case vrcFixedPoint:
|
||||
vReg.colorRegister(LAST_FPREGISTER + 1);
|
||||
break;
|
||||
default:
|
||||
PR_ASSERT(false); // Cannot happen.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// push the stack registers
|
||||
PRInt32 j;
|
||||
for (j = stack.firstOne(); j != -1; j = stack.nextOne(j))
|
||||
*stackPtr++ = j;
|
||||
|
||||
// simplify
|
||||
while (true)
|
||||
{
|
||||
PRInt32 r;
|
||||
while ((r = getLowestSpillCostRegister(low)) != -1)
|
||||
{
|
||||
VirtualRegister& vReg = vRegManager.getVirtualRegister(r);
|
||||
|
||||
/* update low and high */
|
||||
FastBitSet inter(interferenceMatrix.getRow(r), nRegisters);
|
||||
for (j = inter.firstOne(); j != -1; j = inter.nextOne(j))
|
||||
{
|
||||
VirtualRegister& neighbor = vRegManager.getVirtualRegister(j);
|
||||
// if the new interference degree of one of his neighbor becomes
|
||||
// NUMBER_OF_REGISTERS - 1 then it is added to the set 'low'.
|
||||
|
||||
PRUint32 maxInterference = 0;
|
||||
switch (neighbor.getClass())
|
||||
{
|
||||
case vrcInteger:
|
||||
maxInterference = NUMBER_OF_GREGISTERS;
|
||||
break;
|
||||
case vrcFloatingPoint:
|
||||
case vrcFixedPoint:
|
||||
maxInterference = NUMBER_OF_FPREGISTERS;
|
||||
break;
|
||||
default:
|
||||
PR_ASSERT(false);
|
||||
}
|
||||
if ((vRegManager.getVirtualRegister(j).colorInfo.interferenceDegree-- == maxInterference))
|
||||
{
|
||||
high.clear(j);
|
||||
low.set(j);
|
||||
}
|
||||
vReg.colorInfo.interferenceDegree--;
|
||||
interferenceMatrix.clear(r, j);
|
||||
interferenceMatrix.clear(j, r);
|
||||
}
|
||||
low.clear(r);
|
||||
|
||||
// Push this register.
|
||||
*stackPtr++ = r;
|
||||
}
|
||||
if ((r = getLowestSpillCostRegister(high)) != -1)
|
||||
{
|
||||
high.clear(r);
|
||||
low.set(r);
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return stackPtr;
|
||||
}
|
||||
|
||||
bool Coloring::
|
||||
select(FastBitMatrix& interferenceMatrix, PRUint32* stackBase, PRUint32* stackPtr)
|
||||
{
|
||||
PRUint32 nRegisters = vRegManager.count();
|
||||
FastBitSet usedRegisters(NUMBER_OF_REGISTERS + 1); // usedRegisters if used for both GR & FPR.
|
||||
FastBitSet preColoredRegisters(NUMBER_OF_REGISTERS + 1);
|
||||
FastBitSet usedStack(nRegisters + 1);
|
||||
bool success = true;
|
||||
Int32 lastUsedSSR = -1;
|
||||
|
||||
// select
|
||||
while (stackPtr != stackBase)
|
||||
{
|
||||
// Pop one register.
|
||||
PRUint32 r = *--stackPtr;
|
||||
VirtualRegister& vReg = vRegManager.getVirtualRegister(r);
|
||||
|
||||
FastBitSet neighbors(interferenceMatrix.getRow(r), nRegisters);
|
||||
|
||||
if (vReg.getClass() == vrcStackSlot)
|
||||
// Stack slots coloring.
|
||||
{
|
||||
usedStack.clear();
|
||||
|
||||
for (PRInt32 i = neighbors.firstOne(); i != -1; i = neighbors.nextOne(i))
|
||||
usedStack.set(vRegManager.getVirtualRegister(i).getColor());
|
||||
|
||||
Int32 color = usedStack.firstZero();
|
||||
vReg.colorRegister(color);
|
||||
if (color > lastUsedSSR)
|
||||
lastUsedSSR = color;
|
||||
}
|
||||
else
|
||||
// Integer & Floating point register coloring.
|
||||
{
|
||||
usedRegisters.clear();
|
||||
preColoredRegisters.clear();
|
||||
|
||||
for (PRInt32 i = neighbors.firstOne(); i != -1; i = neighbors.nextOne(i))
|
||||
{
|
||||
VirtualRegister& nvReg = vRegManager.getVirtualRegister(i);
|
||||
usedRegisters.set(nvReg.getColor());
|
||||
if (nvReg.isPreColored())
|
||||
preColoredRegisters.set(nvReg.getPreColor());
|
||||
}
|
||||
if (vReg.hasSpecialInterference)
|
||||
usedRegisters |= vReg.specialInterference;
|
||||
|
||||
PRInt8 c = -1;
|
||||
PRInt8 maxColor = 0;
|
||||
PRInt8 firstColor = 0;
|
||||
switch (vReg.getClass())
|
||||
{
|
||||
case vrcInteger:
|
||||
firstColor = FIRST_GREGISTER;
|
||||
maxColor = LAST_GREGISTER;
|
||||
break;
|
||||
case vrcFloatingPoint:
|
||||
case vrcFixedPoint:
|
||||
firstColor = FIRST_FPREGISTER;
|
||||
maxColor = LAST_FPREGISTER;
|
||||
break;
|
||||
default:
|
||||
PR_ASSERT(false);
|
||||
}
|
||||
|
||||
if (vReg.isPreColored())
|
||||
{
|
||||
c = vReg.getPreColor();
|
||||
if (usedRegisters.test(c))
|
||||
c = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (c = usedRegisters.nextZero(firstColor - 1); (c >= 0) && (c <= maxColor) && (preColoredRegisters.test(c));
|
||||
c = usedRegisters.nextZero(c)) {}
|
||||
}
|
||||
|
||||
if ((c >= 0) && (c <= maxColor))
|
||||
{
|
||||
vReg.colorRegister(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
VirtualRegister& stackRegister = vRegManager.newVirtualRegister(vrcStackSlot);
|
||||
vReg.equivalentRegister[vrcStackSlot] = &stackRegister;
|
||||
vReg.spillInfo.willSpill = true;
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
if (success)
|
||||
{
|
||||
for (VirtualRegisterManager::iterator i = vRegManager.begin(); !vRegManager.done(i); i = vRegManager.advance(i))
|
||||
{
|
||||
VirtualRegister& vReg = vRegManager.getVirtualRegister(i);
|
||||
switch (vReg.getClass())
|
||||
{
|
||||
case vrcInteger:
|
||||
if (vReg.getColor() > LAST_GREGISTER)
|
||||
PR_ASSERT(false);
|
||||
break;
|
||||
case vrcFloatingPoint:
|
||||
case vrcFixedPoint:
|
||||
#if NUMBER_OF_FPREGISTERS != 0
|
||||
if (vReg.getColor() > LAST_FPREGISTER)
|
||||
PR_ASSERT(false);
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
vRegManager.nUsedStackSlots = lastUsedSSR + 1;
|
||||
return success;
|
||||
}
|
||||
#endif // NEW_LAURENTM_CODE
|
||||
284
mozilla/ef/Compiler/RegisterAllocator/Coloring.h
Normal file
284
mozilla/ef/Compiler/RegisterAllocator/Coloring.h
Normal file
@@ -0,0 +1,284 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "ControlGraph.h"
|
||||
#include "ControlNodes.h"
|
||||
#include "Instruction.h"
|
||||
#include "RegisterAllocator.h"
|
||||
#include "VirtualRegister.h"
|
||||
#include "InterferenceGraph.h"
|
||||
#include "SparseSet.h"
|
||||
#include "Spilling.h"
|
||||
#include "Splits.h"
|
||||
|
||||
UT_EXTERN_LOG_MODULE(RegAlloc);
|
||||
|
||||
template <class RegisterPressure>
|
||||
class Coloring
|
||||
{
|
||||
private:
|
||||
static RegisterName* simplify(RegisterAllocator& registerAllocator, RegisterName* coloringStack);
|
||||
static bool select(RegisterAllocator& registerAllocator, RegisterName* coloringStack, RegisterName* coloringStackPtr);
|
||||
|
||||
public:
|
||||
static bool color(RegisterAllocator& registerAllocator);
|
||||
static void finalColoring(RegisterAllocator& registerAllocator);
|
||||
};
|
||||
|
||||
|
||||
template <class RegisterPressure>
|
||||
void Coloring<RegisterPressure>::finalColoring(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
RegisterName* color = registerAllocator.color;
|
||||
RegisterName* name2range = registerAllocator.name2range;
|
||||
|
||||
ControlGraph& controlGraph = registerAllocator.controlGraph;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
InstructionList& instructions = nodes[n]->getInstructions();
|
||||
|
||||
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister()) {
|
||||
usePtr->setRegisterName(color[name2range[usePtr->getRegisterName()]]);
|
||||
#ifdef DEBUG
|
||||
RegisterID rid = usePtr->getRegisterID();
|
||||
setColoredRegister(rid);
|
||||
usePtr->setRegisterID(rid);
|
||||
#endif // DEBUG
|
||||
}
|
||||
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister()) {
|
||||
definePtr->setRegisterName(color[name2range[definePtr->getRegisterName()]]);
|
||||
#ifdef DEBUG
|
||||
RegisterID rid = definePtr->getRegisterID();
|
||||
setColoredRegister(rid);
|
||||
definePtr->setRegisterID(rid);
|
||||
#endif // DEBUG
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
bool Coloring<RegisterPressure>::select(RegisterAllocator& registerAllocator, RegisterName* coloringStack, RegisterName* coloringStackPtr)
|
||||
{
|
||||
Uint32 rangeCount = registerAllocator.rangeCount;
|
||||
RegisterName* color = new RegisterName[rangeCount];
|
||||
registerAllocator.color = color;
|
||||
|
||||
for (Uint32 r = 1; r < rangeCount; r++)
|
||||
color[r] = RegisterName(6); // FIX;
|
||||
|
||||
// Color the preColored registers.
|
||||
//
|
||||
VirtualRegisterManager& vrManager = registerAllocator.vrManager;
|
||||
RegisterName* name2range = registerAllocator.name2range;
|
||||
PreColoredRegister* machineEnd = vrManager.getMachineRegistersEnd();
|
||||
for (PreColoredRegister* machinePtr = vrManager.getMachineRegistersBegin(); machinePtr < machineEnd; machinePtr++)
|
||||
if (machinePtr->id != invalidID) {
|
||||
color[name2range[getName(machinePtr->id)]] = machinePtr->color;
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\twill preColor range %d as %d\n", name2range[getName(machinePtr->id)], machinePtr->color));
|
||||
}
|
||||
|
||||
SpillCost* cost = registerAllocator.spillCost;
|
||||
Pool& pool = registerAllocator.pool;
|
||||
SparseSet& spill = *new(pool) SparseSet(pool, rangeCount);
|
||||
registerAllocator.willSpill = &spill;
|
||||
SparseSet neighborColors(pool, 6); // FIX
|
||||
InterferenceGraph<RegisterPressure>& iGraph = registerAllocator.iGraph;
|
||||
|
||||
bool coloringFailed = false;
|
||||
while (coloringStackPtr > coloringStack) {
|
||||
RegisterName range = *--coloringStackPtr;
|
||||
|
||||
if (!cost[range].infinite && cost[range].cost < 0) {
|
||||
coloringFailed = true;
|
||||
spill.set(range);
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\tfailed to color %d, will spill.\n", range));
|
||||
} else {
|
||||
neighborColors.clear();
|
||||
|
||||
for (InterferenceVector* vector = iGraph.getInterferenceVector(range); vector != NULL; vector = vector->next)
|
||||
for (Int32 i = vector->count - 1; i >= 0; --i) {
|
||||
RegisterName neighborColor = color[vector->neighbors[i]];
|
||||
if (neighborColor < 6) // FIX
|
||||
neighborColors.set(neighborColor);
|
||||
}
|
||||
|
||||
if (neighborColors.getSize() == 6) { // FIX
|
||||
coloringFailed = true;
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\tfailed to color %d, ", range));
|
||||
|
||||
if (!Splits<RegisterPressure>::findSplit(registerAllocator, color, range)) {
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("will spill.\n"));
|
||||
spill.set(range);
|
||||
} else
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("will split.\n"));
|
||||
} else {
|
||||
for (Uint32 i = 0; i < 6; i++) // FIX
|
||||
if (!neighborColors.test(i)) {
|
||||
fprintf(stdout, "\twill color %d as %d\n", range, i);
|
||||
color[range] = RegisterName(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
if (coloringFailed) {
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("Coloring failed:\n"));
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\twill spill: "));
|
||||
spill.printPretty(UT_LOG_MODULE(RegAlloc));
|
||||
} else {
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("Coloring succeeded:\n"));
|
||||
for (Uint32 i = 1; i < rangeCount; i++)
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\trange %d colored as %d\n", i, color[i]));
|
||||
}
|
||||
#endif
|
||||
|
||||
return !coloringFailed;
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
RegisterName* Coloring<RegisterPressure>::simplify(RegisterAllocator& registerAllocator, RegisterName* coloringStack)
|
||||
{
|
||||
InterferenceGraph<RegisterPressure>& iGraph = registerAllocator.iGraph;
|
||||
SpillCost* spillCost = registerAllocator.spillCost;
|
||||
Uint32 rangeCount = registerAllocator.rangeCount;
|
||||
|
||||
Uint32* degree = new Uint32[rangeCount];
|
||||
for (RegisterName i = RegisterName(1); i < rangeCount; i = RegisterName(i + 1)) {
|
||||
InterferenceVector* vector = iGraph.getInterferenceVector(i);
|
||||
degree[i] = (vector != NULL) ? vector->count : 0;
|
||||
}
|
||||
|
||||
Pool& pool = registerAllocator.pool;
|
||||
SparseSet low(pool, rangeCount);
|
||||
SparseSet high(pool, rangeCount);
|
||||
SparseSet highInfinite(pool, rangeCount);
|
||||
SparseSet preColored(pool, rangeCount);
|
||||
|
||||
// Get the precolored registers.
|
||||
//
|
||||
VirtualRegisterManager& vrManager = registerAllocator.vrManager;
|
||||
RegisterName* name2range = registerAllocator.name2range;
|
||||
PreColoredRegister* machineEnd = vrManager.getMachineRegistersEnd();
|
||||
for (PreColoredRegister* machinePtr = vrManager.getMachineRegistersBegin(); machinePtr < machineEnd; machinePtr++)
|
||||
if (machinePtr->id != invalidID)
|
||||
preColored.set(name2range[getName(machinePtr->id)]);
|
||||
|
||||
// Insert the live ranges in the sets.
|
||||
//
|
||||
for (Uint32 range = 1; range < rangeCount; range++)
|
||||
if (!preColored.test(range))
|
||||
if (degree[range] < 6) // FIX
|
||||
low.set(range);
|
||||
else if (!spillCost[range].infinite)
|
||||
high.set(range);
|
||||
else
|
||||
highInfinite.set(range);
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("Coloring sets:\n\tlow = "));
|
||||
low.printPretty(UT_LOG_MODULE(RegAlloc));
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\thigh = "));
|
||||
high.printPretty(UT_LOG_MODULE(RegAlloc));
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\thighInfinite = "));
|
||||
highInfinite.printPretty(UT_LOG_MODULE(RegAlloc));
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\tpreColored = "));
|
||||
preColored.printPretty(UT_LOG_MODULE(RegAlloc));
|
||||
#endif // DEBUG_LOG
|
||||
|
||||
RegisterName* coloringStackPtr = coloringStack;
|
||||
|
||||
while (low.getSize() != 0 || high.getSize() != 0) {
|
||||
while (low.getSize() != 0) {
|
||||
RegisterName range = RegisterName(low.getOne());
|
||||
low.clear(range);
|
||||
*coloringStackPtr++ = range;
|
||||
|
||||
for (InterferenceVector* vector = iGraph.getInterferenceVector(range); vector != NULL; vector = vector->next)
|
||||
for (Int32 i = (vector->count - 1); i >= 0; --i) {
|
||||
RegisterName neighbor = vector->neighbors[i];
|
||||
degree[neighbor]--;
|
||||
|
||||
if (degree[neighbor] < 6) // FIX
|
||||
if (high.test(neighbor)) {
|
||||
high.clear(neighbor);
|
||||
low.set(neighbor);
|
||||
} else if (highInfinite.test(neighbor)) {
|
||||
highInfinite.clear(neighbor);
|
||||
low.set(neighbor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (high.getSize() != 0) {
|
||||
RegisterName best = RegisterName(high.getOne());
|
||||
double bestCost = spillCost[best].cost;
|
||||
double bestDegree = degree[best];
|
||||
|
||||
// Choose the next best candidate.
|
||||
//
|
||||
for (SparseSet::iterator i = high.begin(); !high.done(i); i = high.advance(i)) {
|
||||
RegisterName range = RegisterName(high.get(i));
|
||||
double thisCost = spillCost[range].cost;
|
||||
double thisDegree = degree[range];
|
||||
|
||||
if (thisCost * bestDegree < bestCost * thisDegree) {
|
||||
best = range;
|
||||
bestCost = thisCost;
|
||||
bestDegree = thisDegree;
|
||||
}
|
||||
}
|
||||
|
||||
high.clear(best);
|
||||
low.set(best);
|
||||
}
|
||||
}
|
||||
assert(highInfinite.getSize() == 0);
|
||||
|
||||
delete degree;
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("Coloring stack:\n\t"));
|
||||
for (RegisterName* sp = coloringStack; sp < coloringStackPtr; ++sp)
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("%d ", *sp));
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\n"));
|
||||
#endif // DEBUG_LOG
|
||||
|
||||
return coloringStackPtr;
|
||||
}
|
||||
|
||||
|
||||
template <class RegisterPressure>
|
||||
bool Coloring<RegisterPressure>::color(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
RegisterName* coloringStack = new RegisterName[registerAllocator.rangeCount];
|
||||
return select(registerAllocator, coloringStack, simplify(registerAllocator, coloringStack));
|
||||
}
|
||||
212
mozilla/ef/Compiler/RegisterAllocator/DominatorGraph.cpp
Normal file
212
mozilla/ef/Compiler/RegisterAllocator/DominatorGraph.cpp
Normal file
@@ -0,0 +1,212 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include <string.h>
|
||||
#include "ControlGraph.h"
|
||||
#include "ControlNodes.h"
|
||||
|
||||
#include "DominatorGraph.h"
|
||||
|
||||
DominatorGraph::DominatorGraph(ControlGraph& controlGraph) : controlGraph(controlGraph)
|
||||
{
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
GtoV = new Uint32[nNodes + 1];
|
||||
VtoG = new Uint32[nNodes + 1];
|
||||
|
||||
Uint32 v = 1;
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
VtoG[v] = n;
|
||||
GtoV[n] = v++;
|
||||
}
|
||||
|
||||
// Initialize all the 1-based arrays.
|
||||
//
|
||||
parent = new Uint32[v];
|
||||
semi = new Uint32[v];
|
||||
vertex = new Uint32[v];
|
||||
label = new Uint32[v];
|
||||
size = new Uint32[v];
|
||||
ancestor = new Uint32[v];
|
||||
child = new Uint32[v];
|
||||
dom = new Uint32[v];
|
||||
bucket = new DGLinkedList*[v];
|
||||
|
||||
memset(semi, '\0', v * sizeof(Uint32));
|
||||
memset(bucket, '\0', v * sizeof(DGLinkedList*));
|
||||
|
||||
vCount = v;
|
||||
|
||||
build();
|
||||
|
||||
delete parent;
|
||||
delete semi;
|
||||
delete vertex;
|
||||
delete label;
|
||||
delete size;
|
||||
delete ancestor;
|
||||
delete child;
|
||||
delete dom;
|
||||
delete bucket;
|
||||
}
|
||||
|
||||
Uint32 DominatorGraph::DFS(Uint32 vx, Uint32 n)
|
||||
{
|
||||
semi[vx] = ++n;
|
||||
vertex[n] = label[vx] = vx;
|
||||
ancestor[vx] = child[vx] = 0;
|
||||
size[vx] = 1;
|
||||
|
||||
|
||||
ControlNode& node = *controlGraph.dfsList[VtoG[vx]];
|
||||
ControlEdge* successorEnd = node.getSuccessorsEnd();
|
||||
for (ControlEdge* successorPtr = node.getSuccessorsBegin(); successorPtr < successorEnd; successorPtr++) {
|
||||
Uint32 w = GtoV[successorPtr->getTarget().dfsNum];
|
||||
if (semi[w] == 0) {
|
||||
parent[w] = vx;
|
||||
n = DFS(w, n);
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
void DominatorGraph::LINK(Uint32 vx, Uint32 w)
|
||||
{
|
||||
Uint32 s = w;
|
||||
|
||||
while (semi[label[w]] < semi[label[child[s]]]) {
|
||||
if (size[s] + size[child[child[s]]] >= (size[child[s]] << 1)) {
|
||||
ancestor[child[s]] = s;
|
||||
child[s] = child[child[s]];
|
||||
} else {
|
||||
size[child[s]] = size[s];
|
||||
s = ancestor[s] = child[s];
|
||||
}
|
||||
}
|
||||
label[s] = label[w];
|
||||
size[vx] += size[w];
|
||||
if(size[vx] < (size[w] << 1)) {
|
||||
Uint32 t = s;
|
||||
s = child[vx];
|
||||
child[vx] = t;
|
||||
}
|
||||
while( s != 0 ) {
|
||||
ancestor[s] = vx;
|
||||
s = child[s];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DominatorGraph::COMPRESS(Uint32 vx)
|
||||
{
|
||||
if(ancestor[ancestor[vx]] != 0) {
|
||||
COMPRESS(ancestor[vx]);
|
||||
if(semi[label[ancestor[vx]]] < semi[label[vx]])
|
||||
label[vx] = label[ancestor[vx]];
|
||||
ancestor[vx] = ancestor[ancestor[vx]];
|
||||
}
|
||||
}
|
||||
|
||||
Uint32 DominatorGraph::EVAL(Uint32 vx)
|
||||
{
|
||||
if(ancestor[vx] == 0)
|
||||
return label[vx];
|
||||
COMPRESS(vx);
|
||||
return (semi[label[ancestor[vx]]] >= semi[label[vx]]) ? label[vx] : label[ancestor[vx]];
|
||||
}
|
||||
|
||||
void DominatorGraph::build()
|
||||
{
|
||||
Uint32 n = DFS(GtoV[0], 0);
|
||||
size[0] = label[0] = semi[0];
|
||||
|
||||
for (Uint32 i = n; i >= 2; i--) {
|
||||
Uint32 w = vertex[i];
|
||||
|
||||
ControlNode& node = *controlGraph.dfsList[VtoG[w]];
|
||||
const DoublyLinkedList<ControlEdge>& predecessors = node.getPredecessors();
|
||||
for (DoublyLinkedList<ControlEdge>::iterator p = predecessors.begin(); !predecessors.done(p); p = predecessors.advance(p)) {
|
||||
Uint32 vx = GtoV[predecessors.get(p).getSource().dfsNum];
|
||||
Uint32 u = EVAL(vx);
|
||||
|
||||
if(semi[u] < semi[w])
|
||||
semi[w] = semi[u];
|
||||
}
|
||||
|
||||
DGLinkedList* elem = new DGLinkedList();
|
||||
elem->next = bucket[vertex[semi[w]]];
|
||||
elem->index = w;
|
||||
bucket[vertex[semi[w]]] = elem;
|
||||
|
||||
LINK(parent[w], w);
|
||||
|
||||
elem = bucket[parent[w]];
|
||||
while(elem != NULL) {
|
||||
Uint32 vx = elem->index;
|
||||
Uint32 u = EVAL(vx);
|
||||
dom[vx] = (semi[u] < semi[vx]) ? u : parent[w];
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
|
||||
memset(size, '\0', n * sizeof(Uint32));
|
||||
Pool& pool = controlGraph.pool;
|
||||
nodes = new(pool) DGNode[n];
|
||||
|
||||
for(Uint32 j = 2; j <= n; j++) {
|
||||
Uint32 w = vertex[j];
|
||||
Uint32 d = dom[w];
|
||||
if(d != vertex[semi[w]]) {
|
||||
d = dom[d];
|
||||
dom[w] = d;
|
||||
}
|
||||
size[d]++;
|
||||
}
|
||||
dom[GtoV[0]] = 0;
|
||||
|
||||
for (Uint32 k = 1; k <= n; k++) {
|
||||
DGNode& node = nodes[VtoG[k]];
|
||||
Uint32 count = size[k];
|
||||
node.successorsEnd = node.successorsBegin = (count) ? new(pool) Uint32[count] : (Uint32*) 0;
|
||||
}
|
||||
|
||||
for (Uint32 l = 2; l <= n; l++)
|
||||
*(nodes[VtoG[dom[l]]].successorsEnd)++ = VtoG[l];
|
||||
}
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
void DominatorGraph::printPretty(LogModuleObject log)
|
||||
{
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Dominator Graph:\n"));
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
for (Uint32 i = 0; i < nNodes; i++) {
|
||||
DGNode& node = nodes[i];
|
||||
if (node.successorsBegin != node.successorsEnd) {
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\tN%d dominates ", i));
|
||||
for (Uint32* successorsPtr = node.successorsBegin; successorsPtr < node.successorsEnd; successorsPtr++)
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("N%d ", *successorsPtr));
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // DEBUG_LOG
|
||||
|
||||
|
||||
|
||||
80
mozilla/ef/Compiler/RegisterAllocator/DominatorGraph.h
Normal file
80
mozilla/ef/Compiler/RegisterAllocator/DominatorGraph.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _DOMINATOR_GRAPH_H_
|
||||
#define _DOMINATOR_GRAPH_H_
|
||||
|
||||
#include "LogModule.h"
|
||||
|
||||
class ControlGraph;
|
||||
|
||||
struct DGNode
|
||||
{
|
||||
Uint32* successorsBegin;
|
||||
Uint32* successorsEnd;
|
||||
};
|
||||
|
||||
struct DGLinkedList
|
||||
{
|
||||
DGLinkedList* next;
|
||||
Uint32 index;
|
||||
};
|
||||
|
||||
class DominatorGraph
|
||||
{
|
||||
private:
|
||||
|
||||
ControlGraph& controlGraph;
|
||||
|
||||
Uint32 vCount;
|
||||
|
||||
Uint32* VtoG;
|
||||
Uint32* GtoV;
|
||||
Uint32* parent;
|
||||
Uint32* semi;
|
||||
Uint32* vertex;
|
||||
Uint32* label;
|
||||
Uint32* size;
|
||||
Uint32* ancestor;
|
||||
Uint32* child;
|
||||
Uint32* dom;
|
||||
DGLinkedList** bucket;
|
||||
DGNode* nodes;
|
||||
|
||||
private:
|
||||
|
||||
void build();
|
||||
|
||||
Uint32 DFS(Uint32 vx, Uint32 n);
|
||||
void LINK(Uint32 vx, Uint32 w);
|
||||
void COMPRESS(Uint32 vx);
|
||||
Uint32 EVAL(Uint32 vx);
|
||||
|
||||
public:
|
||||
|
||||
DominatorGraph(ControlGraph& controlGraph);
|
||||
|
||||
Uint32* getSuccessorsBegin(Uint32 n) const {return nodes[n].successorsBegin;}
|
||||
Uint32* getSuccessorsEnd(Uint32 n) const {return nodes[n].successorsEnd;}
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
void printPretty(LogModuleObject log);
|
||||
#endif // DEBUG_LOG
|
||||
};
|
||||
|
||||
#endif // _DOMINATOR_GRAPH_H_
|
||||
20
mozilla/ef/Compiler/RegisterAllocator/HashSet.cpp
Normal file
20
mozilla/ef/Compiler/RegisterAllocator/HashSet.cpp
Normal file
@@ -0,0 +1,20 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "HashSet.h"
|
||||
97
mozilla/ef/Compiler/RegisterAllocator/HashSet.h
Normal file
97
mozilla/ef/Compiler/RegisterAllocator/HashSet.h
Normal file
@@ -0,0 +1,97 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _HASH_SET_H_
|
||||
#define _HASH_SET_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "Pool.h"
|
||||
#include <string.h>
|
||||
|
||||
struct HashSetElement
|
||||
{
|
||||
Uint32 index;
|
||||
HashSetElement* next;
|
||||
};
|
||||
|
||||
class HashSet
|
||||
{
|
||||
private:
|
||||
|
||||
static const hashSize = 64;
|
||||
|
||||
// Return the hash code for the given element index.
|
||||
static Uint32 getHashCode(Uint32 index) {return index & (hashSize - 1);} // Could be better !
|
||||
|
||||
private:
|
||||
|
||||
Pool& allocationPool;
|
||||
HashSetElement** bucket;
|
||||
HashSetElement* free;
|
||||
|
||||
private:
|
||||
|
||||
// No copy constructor.
|
||||
HashSet(const HashSet&);
|
||||
// No copy operator.
|
||||
void operator = (const HashSet&);
|
||||
|
||||
public:
|
||||
|
||||
// Create a new HashSet.
|
||||
inline HashSet(Pool& pool, Uint32 universeSize);
|
||||
|
||||
// Clear the hashset.
|
||||
void clear();
|
||||
// Clear the element for the given index.
|
||||
void clear(Uint32 index);
|
||||
// Set the element for the given index.
|
||||
void set(Uint32 index);
|
||||
// Return true if the element at index is a member.
|
||||
bool test(Uint32 index) const;
|
||||
// Union with the given hashset.
|
||||
inline void or(const HashSet& set);
|
||||
// Intersection with the given hashset.
|
||||
inline void and(const HashSet& set);
|
||||
// Difference with the given hashset.
|
||||
inline void difference(const HashSet& set);
|
||||
|
||||
// Logical operators.
|
||||
HashSet& operator |= (const HashSet& set) {or(set); return *this;}
|
||||
HashSet& operator &= (const HashSet& set) {and(set); return *this;}
|
||||
HashSet& operator -= (const HashSet& set) {difference(set); return *this;}
|
||||
|
||||
// Iterator to conform with the set API.
|
||||
typedef HashSetElement* iterator;
|
||||
// Return the iterator for the first element of this set.
|
||||
iterator begin() const;
|
||||
// Return the next iterator.
|
||||
iterator advance(iterator pos) const;
|
||||
// Return true if the iterator is at the end of the set.
|
||||
bool done(iterator pos) const {return pos == NULL;}
|
||||
};
|
||||
|
||||
|
||||
inline HashSet::HashSet(Pool& pool, Uint32 /*universeSize*/)
|
||||
: allocationPool(pool), free(NULL)
|
||||
{
|
||||
bucket = new(pool) HashSetElement*[hashSize];
|
||||
memset(bucket, '\0', sizeof(HashSetElement*));
|
||||
}
|
||||
|
||||
#endif // _HASH_SET_H_
|
||||
213
mozilla/ef/Compiler/RegisterAllocator/IndexedPool.h
Normal file
213
mozilla/ef/Compiler/RegisterAllocator/IndexedPool.h
Normal file
@@ -0,0 +1,213 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _INDEXED_POOL_H_
|
||||
#define _INDEXED_POOL_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// IndexedPool<IndexedObjectSubclass> is an indexed pool of objects. The
|
||||
// template parameter 'IndexedObjectSubclass' must be a subclass of the struct
|
||||
// IndexedObject.
|
||||
//
|
||||
// When the indexed pool is ask to allocate and initialize a new object (using
|
||||
// the operator new(anIndexedPool) it will zero the memory used to store the
|
||||
// object and initialize the field 'index' of this object to its position in
|
||||
// the pool.
|
||||
//
|
||||
// An object allocated by the indexed pool can be freed by calling the method
|
||||
// IndexedPool::release(IndexedElement& objectIndex).
|
||||
//
|
||||
// example:
|
||||
//
|
||||
// IndexedPool<IndexedElement> elementPool;
|
||||
//
|
||||
// IndexedElement& element1 = *new(elementPool) IndexedElement();
|
||||
// IndexedElement& element2 = *new(elementPool) IndexedElement();
|
||||
//
|
||||
// indexedPool.release(element1);
|
||||
// IndexedElement& element3 = *new(elementPool) IndexedElement();
|
||||
//
|
||||
// At this point element1 is no longer a valid object, element2 is at
|
||||
// index 2 and element3 is at index 1.
|
||||
//
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// IndexedObject -
|
||||
//
|
||||
|
||||
template<class Object>
|
||||
struct IndexedObject
|
||||
{
|
||||
Uint32 index; // Index in the pool.
|
||||
Object* next; // Used to link IndexedObject together.
|
||||
|
||||
Uint32 getIndex() {return index;}
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// IndexedPool<IndexedObject> -
|
||||
//
|
||||
|
||||
template <class IndexedObject>
|
||||
class IndexedPool
|
||||
{
|
||||
private:
|
||||
|
||||
static const blockSize = 4; // Size of one block.
|
||||
|
||||
Uint32 nBlocks; // Number of blocks in the pool.
|
||||
IndexedObject** block; // Array of block pointers.
|
||||
IndexedObject* freeObjects; // Chained list of free IndexedObjects.
|
||||
Uint32 nextIndex; // Index of the next free object in the last block.
|
||||
|
||||
private:
|
||||
|
||||
void allocateAnotherBlock();
|
||||
IndexedObject& newObject();
|
||||
|
||||
public:
|
||||
|
||||
IndexedPool() : nBlocks(0), block(NULL), freeObjects(NULL), nextIndex(1) {}
|
||||
~IndexedPool();
|
||||
|
||||
IndexedObject& get(Uint32 index) const;
|
||||
void release(IndexedObject& object);
|
||||
|
||||
void setSize(Uint32 size) {assert(size < nextIndex); nextIndex = size;}
|
||||
|
||||
// Return the universe size.
|
||||
Uint32 getSize() {return nextIndex;}
|
||||
|
||||
friend void* operator new(size_t, IndexedPool<IndexedObject>& pool); // Needs to call newObject().
|
||||
};
|
||||
|
||||
// Free all the memory allocated for this object.
|
||||
//
|
||||
template <class IndexedObject>
|
||||
IndexedPool<IndexedObject>::~IndexedPool()
|
||||
{
|
||||
for (Uint32 n = 0; n < nBlocks; n++)
|
||||
free(&((IndexedObject **) &block[n][n*blockSize])[-(n + 1)]);
|
||||
}
|
||||
|
||||
// Release the given. This object will be iserted in the chained
|
||||
// list of free IndexedObjects. To minimize the fragmentation the chained list
|
||||
// is ordered by ascending indexes.
|
||||
//
|
||||
template <class IndexedObject>
|
||||
void IndexedPool<IndexedObject>::release(IndexedObject& object)
|
||||
{
|
||||
Uint32 index = object.index;
|
||||
IndexedObject* list = freeObjects;
|
||||
|
||||
assert(&object == &get(index)); // Make sure that object is owned by this pool.
|
||||
|
||||
if (list == NULL) { // The list is empty.
|
||||
freeObjects = &object;
|
||||
object.next = NULL;
|
||||
} else { // The list contains at least 1 element.
|
||||
if (index < list->index) { // insert as first element.
|
||||
freeObjects = &object;
|
||||
object.next = list;
|
||||
} else { // Find this object's place.
|
||||
while ((list->next) != NULL && (list->next->index < index))
|
||||
list = list->next;
|
||||
|
||||
object.next = list->next;
|
||||
list->next = &object;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
// Sanity check to be sure that the list is correctly ordered.
|
||||
for (IndexedObject* obj = freeObjects; obj != NULL; obj = obj->next)
|
||||
if (obj->next != NULL)
|
||||
assert(obj->index < obj->next->index);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Create a new block of IndexedObjects. We will allocate the memory to
|
||||
// store IndexedPool::blockSize IndexedObject and the new Array of block
|
||||
// pointers.
|
||||
// The newly created IndexedObjects will not be initialized.
|
||||
//
|
||||
template <class IndexedObject>
|
||||
void IndexedPool<IndexedObject>::allocateAnotherBlock()
|
||||
{
|
||||
void* memory = (void *) malloc((nBlocks + 1) * sizeof(Uint32) + blockSize * sizeof(IndexedObject));
|
||||
|
||||
memcpy(memory, block, nBlocks * sizeof(Uint32));
|
||||
|
||||
block = (IndexedObject **) memory;
|
||||
IndexedObject* objects = (IndexedObject *) &block[nBlocks + 1];
|
||||
|
||||
block[nBlocks] = &objects[-(nBlocks * blockSize)];
|
||||
nBlocks++;
|
||||
}
|
||||
|
||||
// Return the IndexedObject at the position 'index' in the pool.
|
||||
//
|
||||
template <class IndexedObject>
|
||||
IndexedObject& IndexedPool<IndexedObject>::get(Uint32 index) const
|
||||
{
|
||||
Uint32 blockIndex = index / blockSize;
|
||||
assert(blockIndex < nBlocks);
|
||||
|
||||
return block[blockIndex][index];
|
||||
}
|
||||
|
||||
// Return the reference of an unused object in the pool.
|
||||
//
|
||||
template <class IndexedObject>
|
||||
IndexedObject& IndexedPool<IndexedObject>::newObject()
|
||||
{
|
||||
if (freeObjects != NULL) {
|
||||
IndexedObject& newObject = *freeObjects;
|
||||
freeObjects = newObject.next;
|
||||
return newObject;
|
||||
}
|
||||
|
||||
Uint32 nextIndex = this->nextIndex++;
|
||||
Uint32 blockIndex = nextIndex / blockSize;
|
||||
|
||||
while (blockIndex >= nBlocks)
|
||||
allocateAnotherBlock();
|
||||
|
||||
IndexedObject& newObject = block[blockIndex][nextIndex];
|
||||
newObject.index = nextIndex;
|
||||
|
||||
return newObject;
|
||||
}
|
||||
|
||||
// Return the address of the next unsused object in the given
|
||||
// indexed pool. The field index of the newly allocated object
|
||||
// will be initialized to the corresponding index of this object
|
||||
// in the pool.
|
||||
//
|
||||
template <class IndexedObject>
|
||||
void* operator new(size_t size, IndexedPool<IndexedObject>& pool)
|
||||
{
|
||||
assert(size == sizeof(IndexedObject));
|
||||
return (void *) &pool.newObject();
|
||||
}
|
||||
|
||||
#endif // _INDEXED_POOL_H_
|
||||
258
mozilla/ef/Compiler/RegisterAllocator/InterferenceGraph.h
Normal file
258
mozilla/ef/Compiler/RegisterAllocator/InterferenceGraph.h
Normal file
@@ -0,0 +1,258 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _INTERFERENCE_GRAPH_H_
|
||||
#define _INTERFERENCE_GRAPH_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "ControlGraph.h"
|
||||
#include "Primitives.h"
|
||||
#include "Instruction.h"
|
||||
#include "VirtualRegister.h"
|
||||
#include "RegisterPressure.h"
|
||||
#include "SparseSet.h"
|
||||
#include <string.h>
|
||||
|
||||
struct InterferenceVector
|
||||
{
|
||||
Uint32 count;
|
||||
InterferenceVector* next;
|
||||
RegisterName* neighbors;
|
||||
|
||||
InterferenceVector() : count(0), next(NULL) {}
|
||||
};
|
||||
|
||||
class RegisterAllocator;
|
||||
|
||||
template <class RegisterPressure>
|
||||
class InterferenceGraph
|
||||
{
|
||||
private:
|
||||
|
||||
RegisterAllocator& registerAllocator;
|
||||
|
||||
RegisterPressure::Set* interferences;
|
||||
InterferenceVector** vector;
|
||||
Uint32* offset;
|
||||
Uint32 rangeCount;
|
||||
|
||||
private:
|
||||
|
||||
// No copy constructor.
|
||||
InterferenceGraph(const InterferenceGraph&);
|
||||
// No copy operator.
|
||||
void operator = (const InterferenceGraph&);
|
||||
|
||||
// Check if reg is a member of the universe.
|
||||
void checkMember(RegisterName name) {assert(name < rangeCount);}
|
||||
// Return the edge index for the interference between name1 and name2.
|
||||
Uint32 getEdgeIndex(RegisterName name1, RegisterName name2);
|
||||
|
||||
public:
|
||||
InterferenceGraph(RegisterAllocator& registerAllocator) : registerAllocator(registerAllocator) {}
|
||||
|
||||
// Calculate the interferences.
|
||||
void build();
|
||||
// Return true if reg1 and reg2 interfere.
|
||||
bool interfere(RegisterName name1, RegisterName name2);
|
||||
// Return the interference vector for the given register or NULL if there is none.
|
||||
InterferenceVector* getInterferenceVector(RegisterName name) {return vector[name];}
|
||||
// Set the interference between name1 and name2.
|
||||
void setInterference(RegisterName name1, RegisterName name2);
|
||||
// Set the interference vector for the given register.
|
||||
void setInterferenceVector(RegisterName name, InterferenceVector* v) {vector[name] = v;}
|
||||
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
// Print the interferences.
|
||||
void printPretty(LogModuleObject log);
|
||||
#endif // DEBUG_LOG
|
||||
};
|
||||
|
||||
template <class RegisterPressure>
|
||||
void InterferenceGraph<RegisterPressure>::build()
|
||||
{
|
||||
Pool& pool = registerAllocator.pool;
|
||||
Uint32 rangeCount = registerAllocator.rangeCount;
|
||||
this->rangeCount = rangeCount;
|
||||
|
||||
// Initialize the structures.
|
||||
//
|
||||
offset = new(pool) Uint32[rangeCount + 1];
|
||||
vector = new(pool) InterferenceVector*[rangeCount];
|
||||
memset(vector, '\0', sizeof(InterferenceVector*) * rangeCount);
|
||||
|
||||
Uint32 o = 0;
|
||||
offset[0] = 0;
|
||||
for (Uint32 i = 1; i <= rangeCount; ++i) {
|
||||
offset[i] = o;
|
||||
o += i;
|
||||
}
|
||||
|
||||
interferences = new(pool) RegisterPressure::Set(pool, (rangeCount * rangeCount) / 2);
|
||||
|
||||
ControlGraph& controlGraph = registerAllocator.controlGraph;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
RegisterName* name2range = registerAllocator.name2range;
|
||||
LivenessInfo<RegisterPressure> liveness = Liveness<RegisterPressure>::analysis(controlGraph, rangeCount, name2range);
|
||||
registerAllocator.liveness = liveness;
|
||||
SparseSet currentLive(pool, rangeCount);
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
ControlNode& node = *nodes[n];
|
||||
currentLive = liveness.liveOut[n];
|
||||
|
||||
InstructionList& instructions = node.getInstructions();
|
||||
for (InstructionList::iterator i = instructions.end(); !instructions.done(i); i = instructions.retreat(i)) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
|
||||
InstructionUse* useBegin = instruction.getInstructionUseBegin();
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
InstructionUse* usePtr;
|
||||
InstructionDefine* defineBegin = instruction.getInstructionDefineBegin();
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
InstructionDefine* definePtr;
|
||||
|
||||
// Handle the copy instruction to avoid unnecessary interference between the 2 registers.
|
||||
if ((instruction.getFlags() & ifCopy) != 0) {
|
||||
assert(useBegin != useEnd && useBegin[0].isRegister());
|
||||
currentLive.clear(name2range[useBegin[0].getRegisterName()]);
|
||||
}
|
||||
|
||||
// Create the interferences.
|
||||
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister()) {
|
||||
RegisterName define = name2range[definePtr->getRegisterName()];
|
||||
|
||||
for (SparseSet::iterator e = currentLive.begin(); !currentLive.done(e); e = currentLive.advance(e)) {
|
||||
RegisterName live = RegisterName(currentLive.get(e));
|
||||
|
||||
if ((live != define) && !interfere(live, define) && registerAllocator.canInterfere(live, define)) {
|
||||
|
||||
if (vector[define] == NULL)
|
||||
vector[define] = new(pool) InterferenceVector();
|
||||
vector[define]->count++;
|
||||
|
||||
if (vector[live] == NULL)
|
||||
vector[live] = new(pool) InterferenceVector();
|
||||
vector[live]->count++;
|
||||
|
||||
setInterference(live, define);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now update the liveness.
|
||||
//
|
||||
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
currentLive.clear(name2range[definePtr->getRegisterName()]);
|
||||
|
||||
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister())
|
||||
currentLive.set(name2range[usePtr->getRegisterName()]);
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate the memory to store the interferences.
|
||||
//
|
||||
for (Uint32 e = 0; e < rangeCount; e++)
|
||||
if (vector[e] != NULL) {
|
||||
InterferenceVector& v = *vector[e];
|
||||
v.neighbors = new(pool) RegisterName[v.count];
|
||||
v.count = 0;
|
||||
}
|
||||
|
||||
// Initialize the edges.
|
||||
//
|
||||
if (RegisterPressure::Set::isOrdered()) {
|
||||
RegisterName name1 = RegisterName(0);
|
||||
|
||||
for (RegisterPressure::Set::iterator i = interferences->begin(); !interferences->done(i); i = interferences->advance(i)) {
|
||||
Uint32 interferenceIndex = interferences->get(i);
|
||||
|
||||
while(interferenceIndex >= offset[name1 + 1])
|
||||
name1 = RegisterName(name1 + 1);
|
||||
|
||||
assert((interferenceIndex >= offset[name1]) && (interferenceIndex < offset[name1 + 1]));
|
||||
|
||||
RegisterName name2 = RegisterName(interferenceIndex - offset[name1]);
|
||||
|
||||
assert(interfere(name1, name2));
|
||||
|
||||
InterferenceVector& vector1 = *vector[name1];
|
||||
vector1.neighbors[vector1.count++] = name2;
|
||||
|
||||
InterferenceVector& vector2 = *vector[name2];
|
||||
vector2.neighbors[vector2.count++] = name1;
|
||||
}
|
||||
} else {
|
||||
trespass("not Implemented"); // FIX: need one more pass to initialize the vectors.
|
||||
}
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
Uint32 InterferenceGraph<RegisterPressure>::getEdgeIndex(RegisterName name1, RegisterName name2)
|
||||
{
|
||||
checkMember(name1); checkMember(name2);
|
||||
assert(name1 != name2); // This is not possible.
|
||||
return (name1 < name2) ? offset[name2] + name1 : offset[name1] + name2;
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
void InterferenceGraph<RegisterPressure>::setInterference(RegisterName name1, RegisterName name2)
|
||||
{
|
||||
interferences->set(getEdgeIndex(name1, name2));
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
bool InterferenceGraph<RegisterPressure>::interfere(RegisterName name1, RegisterName name2)
|
||||
{
|
||||
return interferences->test(getEdgeIndex(name1, name2));
|
||||
}
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
template <class RegisterPressure>
|
||||
void InterferenceGraph<RegisterPressure>::printPretty(LogModuleObject log)
|
||||
{
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Interference Vectors:\n"));
|
||||
for (Uint32 i = 1; i < rangeCount; i++) {
|
||||
if (vector[i] != NULL) {
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\tvr%d: (", i));
|
||||
for (InterferenceVector* v = vector[i]; v != NULL; v = v->next)
|
||||
for (Uint32 j = 0; j < v->count; j++) {
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("%d", v->neighbors[j]));
|
||||
if (v->next != NULL || j != (v->count - 1))
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (","));
|
||||
}
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (")\n"));
|
||||
}
|
||||
}
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Interference Matrix:\n"));
|
||||
for (RegisterName name1 = RegisterName(1); name1 < rangeCount; name1 = RegisterName(name1 + 1)) {
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\t%d:\t", name1));
|
||||
for (RegisterName name2 = RegisterName(1); name2 < rangeCount; name2 = RegisterName(name2 + 1))
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("%c", ((name1 != name2) && interfere(name1, name2)) ? '1' : '0'));
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
|
||||
}
|
||||
}
|
||||
#endif // DEBUG_LOG
|
||||
|
||||
#endif // _INTERFERENCE_GRAPH_H_
|
||||
87
mozilla/ef/Compiler/RegisterAllocator/LiveRange.h
Normal file
87
mozilla/ef/Compiler/RegisterAllocator/LiveRange.h
Normal file
@@ -0,0 +1,87 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _LIVE_RANGE_H_
|
||||
#define _LIVE_RANGE_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "ControlGraph.h"
|
||||
#include "ControlNodes.h"
|
||||
#include "Primitives.h"
|
||||
#include "Instruction.h"
|
||||
#include "RegisterAllocator.h"
|
||||
#include "RegisterAllocatorTools.h"
|
||||
|
||||
template <class RegisterPressure>
|
||||
struct LiveRange
|
||||
{
|
||||
static void build(RegisterAllocator& registerAllocator);
|
||||
};
|
||||
|
||||
template <class RegisterPressure>
|
||||
void LiveRange<RegisterPressure>::build(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
// Intialize the lookup table.
|
||||
//
|
||||
Uint32 nameCount = registerAllocator.nameCount;
|
||||
RegisterName* nameTable = new(registerAllocator.pool) RegisterName[2*nameCount];
|
||||
RegisterName* rangeName = &nameTable[nameCount];
|
||||
|
||||
init(rangeName, nameCount);
|
||||
|
||||
// Walk the graph.
|
||||
//
|
||||
ControlGraph& controlGraph = registerAllocator.controlGraph;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
SparseSet destination(registerAllocator.pool, nameCount);
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
InstructionList& phiNodes = nodes[n]->getPhiNodeInstructions();
|
||||
|
||||
destination.clear();
|
||||
for (InstructionList::iterator i = phiNodes.begin(); !phiNodes.done(i); i = phiNodes.advance(i)) {
|
||||
Instruction& phiNode = phiNodes.get(i);
|
||||
assert(phiNode.getInstructionDefineBegin() != phiNode.getInstructionDefineEnd() && phiNode.getInstructionDefineBegin()[0].isRegister());
|
||||
destination.set(findRoot(phiNode.getInstructionDefineBegin()[0].getRegisterName(), rangeName));
|
||||
}
|
||||
|
||||
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
|
||||
Instruction& phiNode = phiNodes.get(p);
|
||||
|
||||
assert(phiNode.getInstructionDefineBegin() != phiNode.getInstructionDefineEnd() && phiNode.getInstructionDefineBegin()[0].isRegister());
|
||||
RegisterName destinationName = phiNode.getInstructionDefineBegin()[0].getRegisterName();
|
||||
RegisterName destinationRoot = findRoot(destinationName, rangeName);
|
||||
|
||||
InstructionUse* useEnd = phiNode.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = phiNode.getInstructionUseBegin(); usePtr < useEnd; usePtr++) {
|
||||
assert(usePtr->isRegister());
|
||||
RegisterName sourceName = usePtr->getRegisterName();
|
||||
RegisterName sourceRoot = findRoot(sourceName, rangeName);
|
||||
|
||||
if (sourceRoot != destinationRoot && !destination.test(sourceRoot))
|
||||
rangeName[sourceRoot] = destinationRoot;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
registerAllocator.rangeCount = compress(registerAllocator.name2range, rangeName, nameCount, nameCount);
|
||||
}
|
||||
|
||||
#endif // _LIVE_RANGE_H_
|
||||
163
mozilla/ef/Compiler/RegisterAllocator/LiveRangeGraph.h
Normal file
163
mozilla/ef/Compiler/RegisterAllocator/LiveRangeGraph.h
Normal file
@@ -0,0 +1,163 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _LIVE_RANGE_GRAPH_
|
||||
#define _LIVE_RANGE_GRAPH_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "Pool.h"
|
||||
#include "ControlGraph.h"
|
||||
#include "ControlNodes.h"
|
||||
#include "Instruction.h"
|
||||
#include "RegisterTypes.h"
|
||||
|
||||
class RegisterAllocator;
|
||||
|
||||
template <class RegisterPressure>
|
||||
class LiveRangeGraph
|
||||
{
|
||||
private:
|
||||
|
||||
RegisterAllocator& registerAllocator;
|
||||
|
||||
RegisterPressure::Set* edges;
|
||||
Uint32 rangeCount;
|
||||
|
||||
public:
|
||||
//
|
||||
//
|
||||
LiveRangeGraph(RegisterAllocator& registerAllocator) : registerAllocator(registerAllocator) {}
|
||||
|
||||
//
|
||||
//
|
||||
void build();
|
||||
|
||||
//
|
||||
//
|
||||
void addEdge(RegisterName name1, RegisterName name2);
|
||||
|
||||
//
|
||||
//
|
||||
bool haveEdge(RegisterName name1, RegisterName name2);
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
//
|
||||
//
|
||||
void printPretty(LogModuleObject log);
|
||||
#endif // DEBUG_LOG
|
||||
};
|
||||
|
||||
template <class RegisterPressure>
|
||||
void LiveRangeGraph<RegisterPressure>::build()
|
||||
{
|
||||
Pool& pool = registerAllocator.pool;
|
||||
Uint32 rangeCount = registerAllocator.rangeCount;
|
||||
this->rangeCount = rangeCount;
|
||||
|
||||
edges = new(pool) RegisterPressure::Set(pool, rangeCount * rangeCount);
|
||||
|
||||
ControlGraph& controlGraph = registerAllocator.controlGraph;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
RegisterName* name2range = registerAllocator.name2range;
|
||||
LivenessInfo<RegisterPressure>& liveness = registerAllocator.liveness;
|
||||
SparseSet currentLive(pool, rangeCount);
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
ControlNode& node = *nodes[n];
|
||||
currentLive = liveness.liveOut[n];
|
||||
|
||||
InstructionList& instructions = node.getInstructions();
|
||||
for (InstructionList::iterator i = instructions.end(); !instructions.done(i); i = instructions.retreat(i)) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
|
||||
InstructionUse* useBegin = instruction.getInstructionUseBegin();
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
InstructionUse* usePtr;
|
||||
InstructionDefine* defineBegin = instruction.getInstructionDefineBegin();
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
InstructionDefine* definePtr;
|
||||
|
||||
if ((instruction.getFlags() & ifCopy) != 0) {
|
||||
assert(useBegin != useEnd && useBegin[0].isRegister());
|
||||
currentLive.clear(name2range[useBegin[0].getRegisterName()]);
|
||||
}
|
||||
|
||||
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister()) {
|
||||
RegisterName define = name2range[definePtr->getRegisterName()];
|
||||
|
||||
for (SparseSet::iterator l = currentLive.begin(); !currentLive.done(l); l = currentLive.advance(l)) {
|
||||
RegisterName live = RegisterName(currentLive.get(l));
|
||||
if (define != live && registerAllocator.canInterfere(define, live))
|
||||
addEdge(define, live);
|
||||
}
|
||||
}
|
||||
|
||||
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
currentLive.clear(name2range[definePtr->getRegisterName()]);
|
||||
|
||||
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister())
|
||||
currentLive.set(name2range[usePtr->getRegisterName()]);
|
||||
|
||||
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister()) {
|
||||
RegisterName use = name2range[usePtr->getRegisterName()];
|
||||
|
||||
for (SparseSet::iterator l = currentLive.begin(); !currentLive.done(l); l = currentLive.advance(l)) {
|
||||
RegisterName live = RegisterName(currentLive.get(l));
|
||||
if (use != live && registerAllocator.canInterfere(use, live))
|
||||
addEdge(use, live);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
void LiveRangeGraph<RegisterPressure>::addEdge(RegisterName name1, RegisterName name2)
|
||||
{
|
||||
assert(name1 != name2);
|
||||
edges->set(name1 * rangeCount + name2);
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
bool LiveRangeGraph<RegisterPressure>::haveEdge(RegisterName name1, RegisterName name2)
|
||||
{
|
||||
assert(name1 != name2);
|
||||
return edges->test(name1 * rangeCount + name2);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
template <class RegisterPressure>
|
||||
void LiveRangeGraph<RegisterPressure>::printPretty(LogModuleObject log)
|
||||
{
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Live ranges graph:\n"));
|
||||
for (RegisterName name1 = RegisterName(1); name1 < rangeCount; name1 = RegisterName(name1 + 1)) {
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\t%d:\t", name1));
|
||||
for (RegisterName name2 = RegisterName(1); name2 < rangeCount; name2 = RegisterName(name2 + 1))
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("%c", ((name1 != name2) && haveEdge(name1, name2)) ? '1' : '0'));
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
|
||||
}
|
||||
}
|
||||
#endif // DEBUG_LOG
|
||||
|
||||
#endif // _LIVE_RANGE_GRAPH_
|
||||
21
mozilla/ef/Compiler/RegisterAllocator/Liveness.cpp
Normal file
21
mozilla/ef/Compiler/RegisterAllocator/Liveness.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "Liveness.h"
|
||||
|
||||
301
mozilla/ef/Compiler/RegisterAllocator/Liveness.h
Normal file
301
mozilla/ef/Compiler/RegisterAllocator/Liveness.h
Normal file
@@ -0,0 +1,301 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _LIVENESS_H_
|
||||
#define _LIVENESS_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "ControlGraph.h"
|
||||
#include "ControlNodes.h"
|
||||
#include "Instruction.h"
|
||||
#include "RegisterTypes.h"
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// LivenessInfo -
|
||||
|
||||
template <class RegisterPressure>
|
||||
struct LivenessInfo
|
||||
{
|
||||
RegisterPressure::Set* liveIn;
|
||||
RegisterPressure::Set* liveOut;
|
||||
DEBUG_LOG_ONLY(Uint32 size);
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
void printPretty(LogModuleObject log);
|
||||
#endif // DEBUG_LOG
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Liveness
|
||||
//
|
||||
// The liveness is defined by the following data-flow equations:
|
||||
//
|
||||
// LiveIn(n) = LocalLive(n) U (LiveOut(n) - Killed(n)).
|
||||
// LiveOut(n) = U LiveIn(s) (s a successor of n).
|
||||
//
|
||||
// where LocalLive(n) is the set of used registers in the block n, Killed(n)
|
||||
// is the set of defined registers in the block n, LiveIn(n) is the set of
|
||||
// live registers at the begining of the block n and LiveOut(n) is the set
|
||||
// of live registers at the end of the block n.
|
||||
//
|
||||
//
|
||||
// We will compute the liveness analysis in two stages:
|
||||
//
|
||||
// 1- Build LocalLive(n) (wich is an approximation of LiveIn(n)) and Killed(n)
|
||||
// for each block n.
|
||||
// 2- Perform a backward data-flow analysis to propagate the liveness information
|
||||
// through the entire control-flow graph.
|
||||
//
|
||||
|
||||
template <class RegisterPressure>
|
||||
struct Liveness
|
||||
{
|
||||
static LivenessInfo<RegisterPressure> analysis(ControlGraph& controlGraph, Uint32 rangeCount, const RegisterName* name2range);
|
||||
static LivenessInfo<RegisterPressure> analysis(ControlGraph& controlGraph, Uint32 nameCount);
|
||||
};
|
||||
|
||||
template <class RegisterPressure>
|
||||
LivenessInfo<RegisterPressure> Liveness<RegisterPressure>::analysis(ControlGraph& controlGraph, Uint32 rangeCount, const RegisterName* name2range)
|
||||
{
|
||||
Pool& pool = controlGraph.pool;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
// Allocate the temporary sets.
|
||||
RegisterPressure::Set* killed = new(pool) RegisterPressure::Set[nNodes](pool, rangeCount);
|
||||
|
||||
// Allocate the globals sets.
|
||||
RegisterPressure::Set* liveIn = new(pool) RegisterPressure::Set[nNodes](pool, rangeCount);
|
||||
RegisterPressure::Set* liveOut = new(pool) RegisterPressure::Set[nNodes](pool, rangeCount);
|
||||
|
||||
// First stage of the liveness analysis: Compute the sets LocalLive(stored in LiveIn) and Killed.
|
||||
//
|
||||
for (Uint32 n = 0; n < (nNodes - 1); n++) {
|
||||
ControlNode& node = *nodes[n];
|
||||
|
||||
RegisterPressure::Set& currentLocalLive = liveIn[n];
|
||||
RegisterPressure::Set& currentKilled = killed[n];
|
||||
|
||||
// Find the instructions contributions to the sets LocalLive and Killed.
|
||||
//
|
||||
InstructionList& instructions = node.getInstructions();
|
||||
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
|
||||
// If a VirtualRegister is 'used' before being 'defined' then we add it to set LocalLive.
|
||||
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister()) {
|
||||
Uint32 index = name2range[usePtr->getRegisterName()];
|
||||
|
||||
if (!currentKilled.test(index))
|
||||
currentLocalLive.set(index);
|
||||
}
|
||||
|
||||
// If a Virtualregister is 'defined' then we add it to the set Killed.
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
currentKilled.set(name2range[definePtr->getRegisterName()]);
|
||||
}
|
||||
}
|
||||
|
||||
// Second stage of the liveness analysis: We propagate the LiveIn & LiveOut through the entire
|
||||
// control-flow graph.
|
||||
//
|
||||
RegisterPressure::Set temp(pool, rangeCount);
|
||||
|
||||
bool changed;
|
||||
do {
|
||||
changed = false;
|
||||
|
||||
// For all nodes is this graph except the endNode.
|
||||
for (Int32 n = (nNodes - 2); n >= 0; n--) {
|
||||
ControlNode& node = *nodes[n];
|
||||
|
||||
RegisterPressure::Set& currentLiveIn = liveIn[n];
|
||||
RegisterPressure::Set& currentLiveOut = liveOut[n];
|
||||
|
||||
// Compute temp = Union of LiveIn(s) (s a successor of this node) | usedByPhiNodes(n).
|
||||
// temp will be the new LiveOut(n).
|
||||
Uint32 nSuccessors = node.nSuccessors();
|
||||
if (nSuccessors != 0) {
|
||||
temp = liveIn[node.nthSuccessor(0).getTarget().dfsNum];
|
||||
for (Uint32 s = 1; s < nSuccessors; s++)
|
||||
temp |= liveIn[node.nthSuccessor(s).getTarget().dfsNum];
|
||||
} else
|
||||
temp.clear();
|
||||
|
||||
// If temp and LiveOut(n) differ then set LiveOut(n) = temp and recalculate the
|
||||
// new LiveIn(n).
|
||||
if (currentLiveOut != temp) {
|
||||
currentLiveOut = temp;
|
||||
temp -= killed[n]; // FIX: could be optimized with one call to unionDiff !
|
||||
temp |= currentLiveIn;
|
||||
|
||||
if (currentLiveIn != temp) {
|
||||
currentLiveIn = temp;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while(changed);
|
||||
|
||||
LivenessInfo<RegisterPressure> liveness;
|
||||
liveness.liveIn = liveIn;
|
||||
liveness.liveOut = liveOut;
|
||||
DEBUG_LOG_ONLY(liveness.size = nNodes);
|
||||
return liveness;
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
LivenessInfo<RegisterPressure> Liveness<RegisterPressure>::analysis(ControlGraph& controlGraph, Uint32 nameCount)
|
||||
{
|
||||
Pool& pool = controlGraph.pool;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
// Allocate the temporary sets.
|
||||
RegisterPressure::Set* killed = new(pool) RegisterPressure::Set[nNodes](pool, nameCount);
|
||||
RegisterPressure::Set* usedByPhiNodes = NULL;
|
||||
|
||||
// Allocate the globals sets.
|
||||
RegisterPressure::Set* liveIn = new(pool) RegisterPressure::Set[nNodes](pool, nameCount);
|
||||
RegisterPressure::Set* liveOut = new(pool) RegisterPressure::Set[nNodes](pool, nameCount);
|
||||
|
||||
// First stage of the liveness analysis: Compute the sets LocalLive(stored in LiveIn) and Killed.
|
||||
//
|
||||
for (Uint32 n = 0; n < (nNodes - 1); n++) {
|
||||
ControlNode& node = *nodes[n];
|
||||
|
||||
RegisterPressure::Set& currentLocalLive = liveIn[n];
|
||||
RegisterPressure::Set& currentKilled = killed[n];
|
||||
|
||||
InstructionList& phiNodes = node.getPhiNodeInstructions();
|
||||
|
||||
if ((usedByPhiNodes == NULL) && !phiNodes.empty())
|
||||
usedByPhiNodes = new(pool) RegisterPressure::Set[nNodes](pool, nameCount);
|
||||
|
||||
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
|
||||
Instruction& phiNode = phiNodes.get(p);
|
||||
|
||||
InstructionDefine& define = phiNode.getInstructionDefineBegin()[0];
|
||||
currentKilled.set(define.getRegisterName());
|
||||
|
||||
typedef DoublyLinkedList<ControlEdge> ControlEdgeList;
|
||||
const ControlEdgeList& predecessors = node.getPredecessors();
|
||||
ControlEdgeList::iterator p = predecessors.begin();
|
||||
InstructionUse* useEnd = phiNode.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = phiNode.getInstructionUseBegin(); usePtr < useEnd; usePtr++, p = predecessors.advance(p))
|
||||
if (usePtr->isRegister())
|
||||
usedByPhiNodes[predecessors.get(p).getSource().dfsNum].set(usePtr->getRegisterName());
|
||||
}
|
||||
|
||||
// Find the instructions contributions to the sets LocalLive and Killed.
|
||||
//
|
||||
InstructionList& instructions = node.getInstructions();
|
||||
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
|
||||
// If a VirtualRegister is 'used' before being 'defined' then we add it to set LocalLive.
|
||||
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister()) {
|
||||
Uint32 index = usePtr->getRegisterName();
|
||||
|
||||
if (!currentKilled.test(index))
|
||||
currentLocalLive.set(index);
|
||||
}
|
||||
|
||||
// If a Virtualregister is 'defined' then we add it to the set Killed.
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
currentKilled.set(definePtr->getRegisterName());
|
||||
}
|
||||
}
|
||||
|
||||
// Second stage of the liveness analysis: We propagate the LiveIn & LiveOut through the entire
|
||||
// control-flow graph.
|
||||
//
|
||||
RegisterPressure::Set temp(pool, nameCount);
|
||||
|
||||
bool changed;
|
||||
do {
|
||||
changed = false;
|
||||
|
||||
// For all nodes is this graph except the endNode.
|
||||
for (Int32 n = (nNodes - 2); n >= 0; n--) {
|
||||
ControlNode& node = *nodes[n];
|
||||
|
||||
RegisterPressure::Set& currentLiveIn = liveIn[n];
|
||||
RegisterPressure::Set& currentLiveOut = liveOut[n];
|
||||
|
||||
// Compute temp = Union of LiveIn(s) (s a successor of this node) | usedByPhiNodes(n).
|
||||
// temp will be the new LiveOut(n).
|
||||
Uint32 nSuccessors = node.nSuccessors();
|
||||
if (nSuccessors != 0) {
|
||||
temp = liveIn[node.nthSuccessor(0).getTarget().dfsNum];
|
||||
for (Uint32 s = 1; s < nSuccessors; s++)
|
||||
temp |= liveIn[node.nthSuccessor(s).getTarget().dfsNum];
|
||||
} else
|
||||
temp.clear();
|
||||
|
||||
// Insert the phiNodes contribution.
|
||||
if (usedByPhiNodes != NULL)
|
||||
temp |= usedByPhiNodes[n];
|
||||
|
||||
// If temp and LiveOut(n) differ then set LiveOut(n) = temp and recalculate the
|
||||
// new LiveIn(n).
|
||||
if (currentLiveOut != temp) {
|
||||
currentLiveOut = temp;
|
||||
temp -= killed[n]; // FIX: could be optimized with one call to unionDiff !
|
||||
temp |= currentLiveIn;
|
||||
|
||||
if (currentLiveIn != temp) {
|
||||
currentLiveIn = temp;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while(changed);
|
||||
|
||||
LivenessInfo<RegisterPressure> liveness;
|
||||
liveness.liveIn = liveIn;
|
||||
liveness.liveOut = liveOut;
|
||||
DEBUG_LOG_ONLY(liveness.size = nNodes);
|
||||
return liveness;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
template <class RegisterPressure>
|
||||
void LivenessInfo<RegisterPressure>::printPretty(LogModuleObject log)
|
||||
{
|
||||
for (Uint32 n = 0; n < size; n++) {
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Node N%d:\n\tliveIn = ", n));
|
||||
liveIn[n].printPretty(log);
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\tliveOut = "));
|
||||
liveOut[n].printPretty(log);
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
|
||||
}
|
||||
}
|
||||
#endif // DEBUG_LOG
|
||||
|
||||
#endif // _LIVENESS_H_
|
||||
40
mozilla/ef/Compiler/RegisterAllocator/Makefile
Normal file
40
mozilla/ef/Compiler/RegisterAllocator/Makefile
Normal file
@@ -0,0 +1,40 @@
|
||||
#! gmake
|
||||
|
||||
DEPTH = ../..
|
||||
|
||||
MODULE_NAME = RegisterAllocator
|
||||
|
||||
include $(DEPTH)/config/config.mk
|
||||
|
||||
INCLUDES += \
|
||||
-I$(DEPTH)/Utilities/General \
|
||||
-I$(DEPTH)/Utilities/zlib \
|
||||
-I$(DEPTH)/Runtime/ClassReader \
|
||||
-I$(DEPTH)/Runtime/NativeMethods \
|
||||
-I$(DEPTH)/Runtime/System \
|
||||
-I$(DEPTH)/Runtime/ClassInfo \
|
||||
-I$(DEPTH)/Runtime/FileReader \
|
||||
-I$(DEPTH)/Compiler/PrimitiveGraph \
|
||||
-I$(DEPTH)/Compiler/FrontEnd \
|
||||
-I$(DEPTH)/Compiler/Optimizer \
|
||||
-I$(DEPTH)/Compiler/CodeGenerator \
|
||||
-I$(DEPTH)/Compiler/CodeGenerator/md \
|
||||
-I$(DEPTH)/Compiler/CodeGenerator/md/$(CPU_ARCH) \
|
||||
-I$(DEPTH)/Compiler/RegisterAllocator \
|
||||
-I$(DEPTH)/Driver/StandAloneJava \
|
||||
-I$(DEPTH)/Debugger \
|
||||
$(NULL)
|
||||
|
||||
CXXSRCS = \
|
||||
RegisterAllocator.cpp \
|
||||
RegisterAllocatorTools.cpp \
|
||||
DominatorGraph.cpp \
|
||||
VirtualRegister.cpp \
|
||||
BitSet.cpp \
|
||||
SparseSet.cpp \
|
||||
$(NULL)
|
||||
|
||||
|
||||
include $(DEPTH)/config/rules.mk
|
||||
|
||||
libs:: $(MODULE)
|
||||
392
mozilla/ef/Compiler/RegisterAllocator/PhiNodeRemover.h
Normal file
392
mozilla/ef/Compiler/RegisterAllocator/PhiNodeRemover.h
Normal file
@@ -0,0 +1,392 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _PHI_NODE_REMOVER_H_
|
||||
#define _PHI_NODE_REMOVER_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "Pool.h"
|
||||
#include "ControlGraph.h"
|
||||
#include "DominatorGraph.h"
|
||||
#include "VirtualRegister.h"
|
||||
#include "RegisterPressure.h"
|
||||
#include "Liveness.h"
|
||||
#include "Instruction.h"
|
||||
#include "InstructionEmitter.h"
|
||||
#include "SparseSet.h"
|
||||
#include <string.h>
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// RegisterNameNode -
|
||||
|
||||
struct RegisterNameNode
|
||||
{
|
||||
RegisterNameNode* next;
|
||||
RegisterName newName;
|
||||
Uint32 nextPushed;
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// CopyData -
|
||||
|
||||
struct CopyData
|
||||
{
|
||||
RegisterName source;
|
||||
RegisterClassKind classKind;
|
||||
Uint32 useCount;
|
||||
bool isLiveOut;
|
||||
RegisterName sourceNameToUse;
|
||||
RegisterName temporaryName;
|
||||
RegisterNameNode* newName;
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// PhiNodeRemover<RegisterPressure> -
|
||||
|
||||
template <class RegisterPressure>
|
||||
struct PhiNodeRemover
|
||||
{
|
||||
// Replace the phi nodes by copy instructions.
|
||||
static void replacePhiNodes(ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter);
|
||||
};
|
||||
|
||||
// Split some of the critical edges and return true if there are still some
|
||||
// in the graph after that.
|
||||
//
|
||||
static bool splitCriticalEdges(ControlGraph& /*cg*/)
|
||||
{
|
||||
// FIX: not implemented.
|
||||
return true;
|
||||
}
|
||||
|
||||
inline void pushName(Pool& pool, RegisterNameNode** stack, SparseSet& pushed, Uint32* nodeListPointer, RegisterName oldName, RegisterName newName)
|
||||
{
|
||||
RegisterNameNode& newNode = *new(pool) RegisterNameNode();
|
||||
|
||||
if (pushed.test(oldName))
|
||||
(*stack)->newName = newName;
|
||||
else {
|
||||
newNode.newName = newName;
|
||||
newNode.nextPushed = *nodeListPointer;
|
||||
*nodeListPointer = oldName;
|
||||
newNode.next = *stack;
|
||||
*stack = &newNode;
|
||||
pushed.set(oldName);
|
||||
}
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
void PhiNodeRemover<RegisterPressure>::replacePhiNodes(ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter)
|
||||
{
|
||||
Pool& pool = controlGraph.pool;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
// Initialize the local variables.
|
||||
//
|
||||
|
||||
// When we insert the copies we will also need to create new VirtualRegisters for
|
||||
// the insertion of temporaries. The maximum number of temporary register will not
|
||||
// exceed the number of phiNodes in the primitive graph.
|
||||
Uint32 nameCount = vrManager.getSize();
|
||||
Uint32 maxNameCount = nameCount;
|
||||
for (Uint32 n = 0; n < nNodes; n++)
|
||||
maxNameCount += nodes[n]->getPhiNodes().length();
|
||||
|
||||
// If the CFG contains some critical edges (backward edge which source has more than one
|
||||
// outgoing edge and destination has more than one incomimg edge) then we need the liveness
|
||||
// information to be able to insert temporary copies.
|
||||
RegisterPressure::Set* liveOut = NULL;
|
||||
if (splitCriticalEdges(controlGraph))
|
||||
liveOut = Liveness<LowRegisterPressure>::analysis(controlGraph, nameCount).liveOut;
|
||||
|
||||
DominatorGraph dGraph(controlGraph);
|
||||
|
||||
SparseSet pushed(pool, maxNameCount);
|
||||
SparseSet destinationList(pool, maxNameCount);
|
||||
SparseSet workList(pool, maxNameCount);
|
||||
|
||||
CopyData* copyStats = new(pool) CopyData[maxNameCount];
|
||||
memset(copyStats, '\0', maxNameCount*sizeof(CopyData));
|
||||
|
||||
struct NodeStack {
|
||||
Uint32* next;
|
||||
Uint32* limit;
|
||||
Uint32 pushedList;
|
||||
};
|
||||
|
||||
// Allocate the node stack and initialize the node stack pointer.
|
||||
NodeStack* nodeStack = new(pool) NodeStack[nNodes + 1];
|
||||
NodeStack* nodeStackPtr = nodeStack;
|
||||
|
||||
// We start by the begin node.
|
||||
Uint32 startNode = 0;
|
||||
Uint32* next = &startNode;
|
||||
Uint32* limit = &startNode + 1;
|
||||
|
||||
while (true) {
|
||||
|
||||
if (next == limit) {
|
||||
// If there are no more node in the sibling, we have to pop the current
|
||||
// frame from the stack and update the copyStats of the pushed nodes.
|
||||
//
|
||||
if (nodeStackPtr == nodeStack)
|
||||
// We are at the bottom of the stack and there are no more nodes
|
||||
// to look at. We are done !
|
||||
break;
|
||||
|
||||
--nodeStackPtr;
|
||||
// We are done with all the children of this node in the dominator tree.
|
||||
// We need to update the copy information of all the new names pushed
|
||||
// during the walk over this node.
|
||||
Uint32 pushedList = nodeStackPtr->pushedList;
|
||||
while (pushedList != 0) {
|
||||
Uint32 nextName = copyStats[pushedList].newName->nextPushed;
|
||||
copyStats[pushedList].newName = copyStats[pushedList].newName->next;
|
||||
pushedList = nextName;
|
||||
}
|
||||
|
||||
// restore the previous frame.
|
||||
next = nodeStackPtr->next;
|
||||
limit = nodeStackPtr->limit;
|
||||
} else {
|
||||
Uint32 currentNode = *next++;
|
||||
Uint32 pushedList = 0;
|
||||
|
||||
|
||||
// Initialize the sets.
|
||||
pushed.clear();
|
||||
destinationList.clear();
|
||||
|
||||
// STEP1:
|
||||
// Walk the instruction list and to replace all the instruction uses with their new name.
|
||||
// If the instruction is a phi node and its defined register is alive at the end of this
|
||||
// block then we push the defined register into the stack.
|
||||
//
|
||||
ControlNode& node = *nodes[currentNode];
|
||||
RegisterPressure::Set* currentLiveOut = (liveOut != NULL) ? &liveOut[currentNode] : (RegisterPressure::Set*) 0;
|
||||
|
||||
InstructionList& phiNodes = node.getPhiNodeInstructions();
|
||||
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
|
||||
Instruction& phiNode = phiNodes.get(p);
|
||||
|
||||
InstructionUse* useEnd = phiNode.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = phiNode.getInstructionUseBegin(); usePtr < useEnd; usePtr++) {
|
||||
assert(usePtr->isRegister());
|
||||
RegisterName name = usePtr->getRegisterName();
|
||||
|
||||
if (copyStats[name].newName != NULL && copyStats[name].newName->newName != name)
|
||||
usePtr->setRegisterName(copyStats[name].newName->newName);
|
||||
}
|
||||
|
||||
if (currentLiveOut != NULL) {
|
||||
// This is a phi node and we have to push its defined name if it is live
|
||||
// at the end of the node. We only need to do this if the CFG has critical edges.
|
||||
assert(phiNode.getInstructionDefineBegin() != phiNode.getInstructionDefineEnd() && phiNode.getInstructionDefineBegin()[0].isRegister());
|
||||
RegisterName name = phiNode.getInstructionDefineBegin()[0].getRegisterName();
|
||||
|
||||
if (currentLiveOut->test(name))
|
||||
pushName(pool, &(copyStats[name].newName), pushed, &pushedList, name, name);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
InstructionList& instructions = node.getInstructions();
|
||||
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister()) {
|
||||
RegisterName name = usePtr->getRegisterName();
|
||||
|
||||
if (copyStats[name].newName != NULL && copyStats[name].newName->newName != name)
|
||||
usePtr->setRegisterName(copyStats[name].newName->newName);
|
||||
}
|
||||
}
|
||||
|
||||
// STEP2:
|
||||
// Look at this node's successors' phiNodes. We keep track of the number of time
|
||||
// a VR will be used by another copy instruction and insert each definition into the
|
||||
// destinationList. This is the only pass over this node's successors as we will
|
||||
// get all the information we need in the CopyData structures.
|
||||
//
|
||||
ControlEdge* successorEdgeEnd = node.getSuccessorsEnd();
|
||||
for (ControlEdge* successorEdgePtr = node.getSuccessorsBegin(); successorEdgePtr < successorEdgeEnd; successorEdgePtr++) {
|
||||
Uint32 useIndex = successorEdgePtr->getIndex();
|
||||
ControlNode& successor = successorEdgePtr->getTarget();
|
||||
|
||||
// Look at its phi nodes. The phi nodes are at the top of the instruction list. We exit
|
||||
// as soon as we find an instruction which is not a phi node
|
||||
InstructionList& phiNodes = successor.getPhiNodeInstructions();
|
||||
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
|
||||
Instruction& phiNode = phiNodes.get(p);
|
||||
|
||||
assert((phiNode.getInstructionUseBegin() + useIndex) < phiNode.getInstructionUseEnd());
|
||||
assert(phiNode.getInstructionDefineBegin() != phiNode.getInstructionDefineEnd());
|
||||
|
||||
InstructionUse& source = phiNode.getInstructionUseBegin()[useIndex];
|
||||
InstructionDefine& destination = phiNode.getInstructionDefineBegin()[0];
|
||||
|
||||
assert(source.isRegister() && destination.isRegister());
|
||||
|
||||
RegisterName sourceName = source.getRegisterName();
|
||||
RegisterName destinationName = destination.getRegisterName();
|
||||
|
||||
// Get the correct name for the source.
|
||||
if (copyStats[sourceName].newName != NULL)
|
||||
sourceName = copyStats[sourceName].newName->newName;
|
||||
|
||||
// Update the CopyData structures.
|
||||
if ((sourceName != rnInvalid) && (sourceName != destinationName)) {
|
||||
copyStats[destinationName].source = sourceName;
|
||||
copyStats[destinationName].classKind = destination.getRegisterClass();
|
||||
copyStats[destinationName].isLiveOut = (currentLiveOut != NULL) ? currentLiveOut->test(destinationName) : false;
|
||||
copyStats[destinationName].sourceNameToUse = destinationName;
|
||||
copyStats[sourceName].sourceNameToUse = sourceName;
|
||||
copyStats[sourceName].useCount++;
|
||||
destinationList.set(destinationName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// STEP3:
|
||||
// Insert into the worklist only the destination registers that will be not used in
|
||||
// another copy instruction in this block.
|
||||
//
|
||||
assert(workList.getSize() == 0);
|
||||
for (SparseSet::iterator d = destinationList.begin(); !destinationList.done(d); d = destinationList.advance(d)) {
|
||||
Uint32 dest = destinationList.get(d);
|
||||
if (copyStats[dest].useCount == 0)
|
||||
workList.set(dest);
|
||||
}
|
||||
|
||||
// STEP4:
|
||||
// Insert the copy instructions.
|
||||
//
|
||||
Uint32 destinationListSize = destinationList.getSize();
|
||||
InstructionList::iterator endOfTheNode = instructions.end();
|
||||
|
||||
// Find the right place to insert the copy instructions.
|
||||
if (destinationListSize != 0)
|
||||
while (instructions.get(endOfTheNode).getFlags() & ifControl)
|
||||
endOfTheNode = instructions.retreat(endOfTheNode);
|
||||
|
||||
while (destinationListSize != 0) {
|
||||
while(workList.getSize()) {
|
||||
RegisterName destinationName = RegisterName(workList.getOne());
|
||||
RegisterName sourceName = copyStats[destinationName].source;
|
||||
|
||||
workList.clear(destinationName);
|
||||
if (copyStats[destinationName].isLiveOut && !copyStats[destinationName].temporaryName) {
|
||||
// Lost copy problem.
|
||||
copyStats[destinationName].isLiveOut = false;
|
||||
|
||||
RegisterName sourceName = destinationName;
|
||||
RegisterClassKind classKind = copyStats[sourceName].classKind;
|
||||
RegisterName destinationName = getName(vrManager.newVirtualRegister(classKind));
|
||||
assert(destinationName < maxNameCount);
|
||||
|
||||
copyStats[destinationName].classKind = classKind;
|
||||
copyStats[sourceName].useCount = 0;
|
||||
|
||||
// We need to insert a copy to a temporary register to keep the
|
||||
// source register valid at the end of the node defining it.
|
||||
// This copy will be inserted right after the phi node defining it.
|
||||
RegisterName from = copyStats[sourceName].sourceNameToUse;
|
||||
Instruction* definingPhiNode = vrManager.getVirtualRegister(from).getDefiningInstruction();
|
||||
assert(definingPhiNode && (definingPhiNode->getFlags() & ifPhiNode) != 0);
|
||||
|
||||
RegisterID fromID = buildRegisterID(from, classKind);
|
||||
RegisterID toID = buildRegisterID(destinationName, classKind);
|
||||
Instruction& copy = emitter.newCopy(*definingPhiNode->getPrimitive(), fromID, toID);
|
||||
vrManager.getVirtualRegister(destinationName).setDefiningInstruction(copy);
|
||||
definingPhiNode->getPrimitive()->getContainer()->getInstructions().addFirst(copy);
|
||||
|
||||
copyStats[sourceName].temporaryName = destinationName;
|
||||
copyStats[sourceName].sourceNameToUse = destinationName;
|
||||
pushName(pool, &(copyStats[sourceName].newName), pushed, &pushedList, sourceName, destinationName);
|
||||
}
|
||||
|
||||
// Insert the copy instruction at the end of the current node.
|
||||
RegisterName from = copyStats[sourceName].sourceNameToUse;
|
||||
|
||||
RegisterClassKind classKind = copyStats[destinationName].classKind;
|
||||
RegisterID fromID = buildRegisterID(from, classKind);
|
||||
RegisterID toID = buildRegisterID(destinationName, classKind);
|
||||
Instruction& copy = emitter.newCopy(*vrManager.getVirtualRegister(from).getDefiningInstruction()->getPrimitive(), fromID, toID);
|
||||
instructions.insertAfter(copy, endOfTheNode);
|
||||
endOfTheNode = instructions.advance(endOfTheNode);
|
||||
|
||||
copyStats[sourceName].useCount = 0;
|
||||
if (destinationList.test(sourceName) && copyStats[sourceName].isLiveOut)
|
||||
pushName(pool, &(copyStats[sourceName].newName), pushed, &pushedList, sourceName, destinationName);
|
||||
copyStats[sourceName].isLiveOut = false;
|
||||
copyStats[sourceName].sourceNameToUse = destinationName;
|
||||
|
||||
if (destinationList.test(sourceName))
|
||||
workList.set(sourceName);
|
||||
destinationList.clear(destinationName);
|
||||
}
|
||||
|
||||
destinationListSize = destinationList.getSize();
|
||||
if (destinationListSize != 0) {
|
||||
RegisterName sourceName = RegisterName(destinationList.getOne());
|
||||
RegisterName destinationName;
|
||||
|
||||
if (!copyStats[sourceName].temporaryName) {
|
||||
// Cycle problem.
|
||||
RegisterClassKind classKind = copyStats[sourceName].classKind;
|
||||
destinationName = getName(vrManager.newVirtualRegister(classKind));
|
||||
assert(destinationName < maxNameCount);
|
||||
|
||||
copyStats[destinationName].classKind = classKind;
|
||||
copyStats[sourceName].temporaryName = destinationName;
|
||||
|
||||
// Insert the copy instruction at the end of the current node.
|
||||
RegisterName from = copyStats[sourceName].sourceNameToUse;
|
||||
|
||||
RegisterID fromID = buildRegisterID(from, classKind);
|
||||
RegisterID toID = buildRegisterID(destinationName, classKind);
|
||||
Instruction& copy = emitter.newCopy(*vrManager.getVirtualRegister(from).getDefiningInstruction()->getPrimitive(), fromID, toID);
|
||||
vrManager.getVirtualRegister(destinationName).setDefiningInstruction(copy);
|
||||
instructions.insertAfter(copy, endOfTheNode);
|
||||
endOfTheNode = instructions.advance(endOfTheNode);
|
||||
} else
|
||||
destinationName = copyStats[sourceName].temporaryName;
|
||||
|
||||
copyStats[sourceName].useCount = 0;
|
||||
copyStats[sourceName].isLiveOut = false;
|
||||
copyStats[sourceName].sourceNameToUse = destinationName;
|
||||
pushName(pool, &(copyStats[sourceName].newName), pushed, &pushedList, sourceName, destinationName);
|
||||
|
||||
workList.set(sourceName);
|
||||
}
|
||||
}
|
||||
|
||||
nodeStackPtr->pushedList = pushedList;
|
||||
nodeStackPtr->next = next;
|
||||
nodeStackPtr->limit = limit;
|
||||
++nodeStackPtr;
|
||||
next = dGraph.getSuccessorsBegin(currentNode);
|
||||
limit = dGraph.getSuccessorsEnd(currentNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // _PHI_NODE_REMOVER_H_
|
||||
155
mozilla/ef/Compiler/RegisterAllocator/RegisterAllocator.cpp
Normal file
155
mozilla/ef/Compiler/RegisterAllocator/RegisterAllocator.cpp
Normal file
@@ -0,0 +1,155 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "LogModule.h"
|
||||
#include "RegisterAllocator.h"
|
||||
#include "RegisterPressure.h"
|
||||
#include "RegisterAllocatorTools.h"
|
||||
#include "PhiNodeRemover.h"
|
||||
#include "LiveRange.h"
|
||||
#include "Liveness.h"
|
||||
#include "InterferenceGraph.h"
|
||||
#include "LiveRangeGraph.h"
|
||||
#include "Coalescing.h"
|
||||
#include "Spilling.h"
|
||||
#include "Coloring.h"
|
||||
#include "Splits.h"
|
||||
|
||||
class Pool;
|
||||
class ControlGraph;
|
||||
class VirtualRegisterManager;
|
||||
class InstructionEmitter;
|
||||
|
||||
UT_DEFINE_LOG_MODULE(RegAlloc);
|
||||
|
||||
void RegisterAllocator::allocateRegisters(Pool& pool, ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter)
|
||||
{
|
||||
// Insert the phi node instructions. We want to do this to have a single defined register per instruction.
|
||||
// If we keep the PhiNode (as a DataNode) and a PhiNode is of DoubleWordKind then we have to execute
|
||||
// some special code for the high word annotation.
|
||||
//
|
||||
RegisterAllocatorTools::insertPhiNodeInstructions(controlGraph, emitter);
|
||||
|
||||
// Perform some tests on the instruction graph.
|
||||
//
|
||||
DEBUG_ONLY(RegisterAllocatorTools::testTheInstructionGraph(controlGraph, vrManager));
|
||||
|
||||
// Replace the phi node instructions by their equivalent copy instructions.
|
||||
//
|
||||
PhiNodeRemover<LowRegisterPressure>::replacePhiNodes(controlGraph, vrManager, emitter);
|
||||
|
||||
// Do the register allocation.
|
||||
//
|
||||
RegisterAllocator registerAllocator(pool, controlGraph, vrManager, emitter);
|
||||
registerAllocator.doGraphColoring();
|
||||
}
|
||||
|
||||
void RegisterAllocator::doGraphColoring()
|
||||
{
|
||||
// Initialize the liverange map.
|
||||
//
|
||||
initLiveRanges();
|
||||
|
||||
// Build the live ranges. We do this to compress the number of RegisterNames
|
||||
// used in the insterference graph.
|
||||
//
|
||||
LiveRange<LowRegisterPressure>::build(*this);
|
||||
|
||||
// Remove unnecessary copies.
|
||||
//
|
||||
RegisterAllocatorTools::removeUnnecessaryCopies(*this);
|
||||
|
||||
for (Uint8 loop = 0; loop < 10; loop++) {
|
||||
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("********* RegisterAllocator loop %d *********\n", loop));
|
||||
|
||||
while(true) {
|
||||
// Build the interference graph.
|
||||
//
|
||||
iGraph.build();
|
||||
|
||||
// Coalesce the copy instructions.
|
||||
//
|
||||
if (!Coalescing<LowRegisterPressure>::coalesce(*this))
|
||||
break;
|
||||
}
|
||||
|
||||
// Print the interference graph.
|
||||
//
|
||||
DEBUG_LOG_ONLY(iGraph.printPretty(UT_LOG_MODULE(RegAlloc)));
|
||||
|
||||
// Calculate the spill costs.
|
||||
//
|
||||
Spilling<LowRegisterPressure>::calculateSpillCosts(*this);
|
||||
DEBUG_LOG_ONLY(RegisterAllocatorTools::printSpillCosts(*this));
|
||||
|
||||
// Calculate the split costs.
|
||||
//
|
||||
Splits<LowRegisterPressure>::calculateSplitCosts(*this);
|
||||
DEBUG_LOG_ONLY(RegisterAllocatorTools::printSplitCosts(*this));
|
||||
|
||||
// Build the live range graph.
|
||||
//
|
||||
lGraph.build();
|
||||
DEBUG_LOG_ONLY(lGraph.printPretty(UT_LOG_MODULE(RegAlloc)));
|
||||
|
||||
// Color the graph. If it succeeds then we're done with the
|
||||
// register allocation.
|
||||
//
|
||||
if (Coloring<LowRegisterPressure>::color(*this)) {
|
||||
// Write the final colors in the instruction graph.
|
||||
//
|
||||
Coloring<LowRegisterPressure>::finalColoring(*this);
|
||||
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("********** RegisterAllocator done **********\n"));
|
||||
DEBUG_LOG_ONLY(RegisterAllocatorTools::printInstructions(*this));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// We need to spill some registers.
|
||||
//
|
||||
Spilling<LowRegisterPressure>::insertSpillCode(*this);
|
||||
|
||||
// Insert the split instructions.
|
||||
//
|
||||
Splits<LowRegisterPressure>::insertSplitCode(*this);
|
||||
|
||||
// Update the live ranges.
|
||||
//
|
||||
// FIX
|
||||
}
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
RegisterAllocatorTools::updateInstructionGraph(*this);
|
||||
RegisterAllocatorTools::printInstructions(*this);
|
||||
#endif
|
||||
fprintf(stderr, "!!! Coloring failed after 10 loops !!!\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void RegisterAllocator::initLiveRanges()
|
||||
{
|
||||
Uint32 count = this->nameCount;
|
||||
RegisterName* name2range = new(pool) RegisterName[nameCount];
|
||||
for (RegisterName r = RegisterName(1); r < count; r = RegisterName(r + 1))
|
||||
name2range[r] = r;
|
||||
this->name2range = name2range;
|
||||
rangeCount = count;
|
||||
}
|
||||
88
mozilla/ef/Compiler/RegisterAllocator/RegisterAllocator.h
Normal file
88
mozilla/ef/Compiler/RegisterAllocator/RegisterAllocator.h
Normal file
@@ -0,0 +1,88 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _REGISTER_ALLOCATOR_H_
|
||||
#define _REGISTER_ALLOCATOR_H_
|
||||
|
||||
class Pool;
|
||||
class ControlGraph;
|
||||
class InstructionEmitter;
|
||||
struct SpillCost;
|
||||
struct SplitCost;
|
||||
|
||||
#include "Liveness.h"
|
||||
#include "VirtualRegister.h"
|
||||
#include "RegisterPressure.h" // This should included by Backend.cpp
|
||||
#include "InterferenceGraph.h"
|
||||
#include "LiveRangeGraph.h"
|
||||
|
||||
//template <class RegisterPressure>
|
||||
class RegisterAllocator
|
||||
{
|
||||
public:
|
||||
|
||||
Pool& pool; //
|
||||
ControlGraph& controlGraph; //
|
||||
VirtualRegisterManager& vrManager; //
|
||||
InstructionEmitter& emitter; //
|
||||
|
||||
RegisterName* name2range; //
|
||||
RegisterName* color; //
|
||||
SpillCost* spillCost; //
|
||||
SparseSet* willSpill; //
|
||||
SplitCost* splitCost; //
|
||||
NameLinkedList** splitAround; //
|
||||
InterferenceGraph<LowRegisterPressure> iGraph; //
|
||||
LiveRangeGraph<LowRegisterPressure> lGraph; //
|
||||
LivenessInfo<LowRegisterPressure> liveness; //
|
||||
Uint32 nameCount; //
|
||||
Uint32 rangeCount; //
|
||||
bool splitFound; //
|
||||
|
||||
private:
|
||||
|
||||
//
|
||||
//
|
||||
void doGraphColoring();
|
||||
|
||||
public:
|
||||
|
||||
//
|
||||
//
|
||||
inline RegisterAllocator(Pool& pool, ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter);
|
||||
|
||||
//
|
||||
//
|
||||
bool canInterfere(RegisterName /*name1*/, RegisterName /*name2*/) const {return true;}
|
||||
|
||||
//
|
||||
//
|
||||
void initLiveRanges();
|
||||
|
||||
//
|
||||
//
|
||||
static void allocateRegisters(Pool& pool, ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter);
|
||||
};
|
||||
|
||||
//
|
||||
//
|
||||
inline RegisterAllocator::RegisterAllocator(Pool& pool, ControlGraph& controlGraph, VirtualRegisterManager& vrManager, InstructionEmitter& emitter)
|
||||
: pool(pool), controlGraph(controlGraph), vrManager(vrManager), emitter(emitter), iGraph(*this), lGraph(*this), nameCount(vrManager.getSize()) {}
|
||||
|
||||
#endif // _REGISTER_ALLOCATOR_H_
|
||||
|
||||
355
mozilla/ef/Compiler/RegisterAllocator/RegisterAllocatorTools.cpp
Normal file
355
mozilla/ef/Compiler/RegisterAllocator/RegisterAllocatorTools.cpp
Normal file
@@ -0,0 +1,355 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "LogModule.h"
|
||||
#include "RegisterAllocatorTools.h"
|
||||
#include "Pool.h"
|
||||
#include "ControlGraph.h"
|
||||
#include "ControlNodes.h"
|
||||
#include "Primitives.h"
|
||||
#include "InstructionEmitter.h"
|
||||
#include "Instruction.h"
|
||||
#include "RegisterAllocator.h"
|
||||
#include "Spilling.h"
|
||||
#include "Splits.h"
|
||||
#include "BitSet.h"
|
||||
|
||||
UT_EXTERN_LOG_MODULE(RegAlloc);
|
||||
|
||||
#ifdef DEBUG
|
||||
void RegisterAllocatorTools::testTheInstructionGraph(ControlGraph& controlGraph, VirtualRegisterManager& vrManager)
|
||||
{
|
||||
// Test the declared VirtualRegisters. The register allocator tries to condense the register universe.
|
||||
// Any gap in the VirtualRegister names will be a loss of efficiency !!!!
|
||||
|
||||
Uint32 nameCount = vrManager.getSize();
|
||||
BitSet registerSeen(controlGraph.pool, nameCount);
|
||||
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
|
||||
InstructionList& instructions = nodes[n]->getInstructions();
|
||||
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister())
|
||||
registerSeen.set(usePtr->getRegisterName());
|
||||
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
registerSeen.set(definePtr->getRegisterName());
|
||||
}
|
||||
|
||||
InstructionList& phiNodes = nodes[n]->getPhiNodeInstructions();
|
||||
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
|
||||
Instruction& instruction = phiNodes.get(p);
|
||||
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister())
|
||||
registerSeen.set(usePtr->getRegisterName());
|
||||
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
registerSeen.set(definePtr->getRegisterName());
|
||||
}
|
||||
}
|
||||
|
||||
bool renameRegisters = false;
|
||||
for (BitSet::iterator i = registerSeen.nextZero(0); !registerSeen.done(i); i = registerSeen.nextZero(i)) {
|
||||
renameRegisters = true;
|
||||
fprintf(stderr,
|
||||
"WARNING: The VirtualRegister vr%d has been allocated during CodeGeneration but\n"
|
||||
" is never used nor defined by any instruction in the instruction graph\n"
|
||||
" PLEASE FIX \n",
|
||||
i);
|
||||
}
|
||||
if (renameRegisters) {
|
||||
Instruction** definingInstruction = new Instruction*[nameCount];
|
||||
memset(definingInstruction, '\0', nameCount * sizeof(Instruction*));
|
||||
RegisterName* newName = new RegisterName[nameCount];
|
||||
memset(newName, '\0', nameCount * sizeof(RegisterName));
|
||||
RegisterName nextName = RegisterName(1);
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
|
||||
InstructionList& instructions = nodes[n]->getInstructions();
|
||||
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister()) {
|
||||
RegisterName name = usePtr->getRegisterName();
|
||||
if (newName[name] == rnInvalid) {
|
||||
newName[name] = nextName;
|
||||
definingInstruction[nextName] = vrManager.getVirtualRegister(name).getDefiningInstruction();
|
||||
nextName = RegisterName(nextName + 1);
|
||||
}
|
||||
usePtr->setRegisterName(newName[name]);
|
||||
}
|
||||
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister()) {
|
||||
RegisterName name = definePtr->getRegisterName();
|
||||
if (newName[name] == rnInvalid) {
|
||||
newName[name] = nextName;
|
||||
definingInstruction[nextName] = vrManager.getVirtualRegister(name).getDefiningInstruction();
|
||||
nextName = RegisterName(nextName + 1);
|
||||
}
|
||||
definePtr->setRegisterName(newName[name]);
|
||||
}
|
||||
}
|
||||
|
||||
InstructionList& phiNodes = nodes[n]->getPhiNodeInstructions();
|
||||
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
|
||||
Instruction& instruction = phiNodes.get(p);
|
||||
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister()) {
|
||||
RegisterName name = usePtr->getRegisterName();
|
||||
if (newName[name] == rnInvalid) {
|
||||
newName[name] = nextName;
|
||||
definingInstruction[nextName] = vrManager.getVirtualRegister(name).getDefiningInstruction();
|
||||
nextName = RegisterName(nextName + 1);
|
||||
}
|
||||
usePtr->setRegisterName(newName[name]);
|
||||
}
|
||||
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister()) {
|
||||
RegisterName name = definePtr->getRegisterName();
|
||||
if (newName[name] == rnInvalid) {
|
||||
newName[name] = nextName;
|
||||
definingInstruction[nextName] = vrManager.getVirtualRegister(name).getDefiningInstruction();
|
||||
nextName = RegisterName(nextName + 1);
|
||||
}
|
||||
definePtr->setRegisterName(newName[name]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vrManager.setSize(nextName);
|
||||
|
||||
for (RegisterName r = RegisterName(1); r < nextName; r = RegisterName(r + 1))
|
||||
vrManager.getVirtualRegister(r).definingInstruction = definingInstruction[r];
|
||||
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("RegisterMap:\n"));
|
||||
for (Uint32 i = 1; i < nameCount; i++)
|
||||
if (newName[i] != 0)
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\tvr%d becomes vr%d.\n", i, newName[i]));
|
||||
else
|
||||
UT_OBJECTLOG(UT_LOG_MODULE(RegAlloc), PR_LOG_ALWAYS, ("\tvr%d is dead.\n", i));
|
||||
|
||||
|
||||
delete newName;
|
||||
delete definingInstruction;
|
||||
}
|
||||
|
||||
}
|
||||
#endif // DEBUG
|
||||
|
||||
void RegisterAllocatorTools::removeUnnecessaryCopies(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
ControlGraph& controlGraph = registerAllocator.controlGraph;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
RegisterName* name2range = registerAllocator.name2range;
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
InstructionList& instructions = nodes[n]->getInstructions();
|
||||
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i);) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
i = instructions.advance(i);
|
||||
|
||||
if (instruction.getFlags() & ifCopy) {
|
||||
assert(instruction.getInstructionUseBegin() != instruction.getInstructionUseEnd() && instruction.getInstructionUseBegin()[0].isRegister());
|
||||
assert(instruction.getInstructionDefineBegin() != instruction.getInstructionDefineEnd() && instruction.getInstructionDefineBegin()[0].isRegister());
|
||||
|
||||
RegisterName source = name2range[instruction.getInstructionUseBegin()[0].getRegisterName()];
|
||||
RegisterName destination = name2range[instruction.getInstructionDefineBegin()[0].getRegisterName()];
|
||||
|
||||
if (source == destination)
|
||||
instruction.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void RegisterAllocatorTools::updateInstructionGraph(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
ControlGraph& controlGraph = registerAllocator.controlGraph;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
RegisterName* name2range = registerAllocator.name2range;
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
InstructionList& instructions = nodes[n]->getInstructions();
|
||||
for (InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister())
|
||||
usePtr->setRegisterName(name2range[usePtr->getRegisterName()]);
|
||||
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
definePtr->setRegisterName(name2range[definePtr->getRegisterName()]);
|
||||
}
|
||||
|
||||
InstructionList& phiNodes = nodes[n]->getPhiNodeInstructions();
|
||||
for (InstructionList::iterator p = phiNodes.begin(); !phiNodes.done(p); p = phiNodes.advance(p)) {
|
||||
Instruction& instruction = phiNodes.get(p);
|
||||
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
for (InstructionUse* usePtr = instruction.getInstructionUseBegin(); usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister())
|
||||
usePtr->setRegisterName(name2range[usePtr->getRegisterName()]);
|
||||
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
for (InstructionDefine* definePtr = instruction.getInstructionDefineBegin(); definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
definePtr->setRegisterName(name2range[definePtr->getRegisterName()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void RegisterAllocatorTools::insertPhiNodeInstructions(ControlGraph& controlGraph, InstructionEmitter& emitter)
|
||||
{
|
||||
Pool& pool = controlGraph.pool;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
ControlNode& node = *nodes[n];
|
||||
DoublyLinkedList<PhiNode>& phiNodes = node.getPhiNodes();
|
||||
|
||||
if (!phiNodes.empty()) {
|
||||
|
||||
// Set the index of the incoming edges.
|
||||
Uint32 index = 0;
|
||||
const DoublyLinkedList<ControlEdge>& predecessors = node.getPredecessors();
|
||||
for (DoublyLinkedList<ControlEdge>::iterator p = predecessors.begin(); !predecessors.done(p); p = predecessors.advance(p))
|
||||
predecessors.get(p).setIndex(index++);
|
||||
|
||||
// Insert the phi node instruction in the instruction list.
|
||||
for (DoublyLinkedList<PhiNode>::iterator i = phiNodes.begin(); !phiNodes.done(i); i = phiNodes.advance(i)) {
|
||||
PhiNode& phiNode = phiNodes.get(i);
|
||||
ValueKind kind = phiNode.getKind();
|
||||
|
||||
if (!isStorableKind(kind))
|
||||
continue;
|
||||
|
||||
RegisterClassKind classKind = rckGeneral; // FIX: get class kind from phi node kind.
|
||||
Uint32 nInputs = phiNode.nInputs();
|
||||
|
||||
PhiNodeInstruction& phiNodeInstruction = *new(pool) PhiNodeInstruction(&phiNode, pool, nInputs);
|
||||
|
||||
emitter.defineProducer(phiNode, phiNodeInstruction, 0, classKind, drLow);
|
||||
for (Uint32 whichInput = 0; whichInput < nInputs; whichInput++)
|
||||
emitter.useProducer(phiNode.nthInputVariable(whichInput), phiNodeInstruction, whichInput, classKind, drLow);
|
||||
|
||||
node.addPhiNodeInstruction(phiNodeInstruction);
|
||||
|
||||
if (isDoublewordKind(kind)) {
|
||||
PhiNodeInstruction& phiNodeInstruction = *new(pool) PhiNodeInstruction(&phiNode, pool, nInputs);
|
||||
|
||||
emitter.defineProducer(phiNode, phiNodeInstruction, 0, classKind, drHigh);
|
||||
for (Uint32 whichInput = 0; whichInput < nInputs; whichInput++)
|
||||
emitter.useProducer(phiNode.nthInputVariable(whichInput), phiNodeInstruction, whichInput, classKind, drHigh);
|
||||
|
||||
node.addPhiNodeInstruction(phiNodeInstruction);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
|
||||
void RegisterAllocatorTools::printSpillCosts(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
LogModuleObject log = UT_LOG_MODULE(RegAlloc);
|
||||
Uint32 rangeCount = registerAllocator.rangeCount;
|
||||
SpillCost* cost = registerAllocator.spillCost;
|
||||
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Spill costs:\n"));
|
||||
for (Uint32 i = 1; i < rangeCount; i++) {
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\trange %d : ", i));
|
||||
if (cost[i].infinite)
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("infinite\n"));
|
||||
else
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("%f\n", cost[i].cost));
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterAllocatorTools::printSplitCosts(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
LogModuleObject log = UT_LOG_MODULE(RegAlloc);
|
||||
Uint32 rangeCount = registerAllocator.rangeCount;
|
||||
SplitCost* cost = registerAllocator.splitCost;
|
||||
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("Split costs:\n"));
|
||||
for (Uint32 i = 1; i < rangeCount; i++) {
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\trange %d : loads = %f stores = %f\n", i, cost[i].loads, cost[i].stores));
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterAllocatorTools::printInstructions(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
LogModuleObject log = UT_LOG_MODULE(RegAlloc);
|
||||
ControlNode** nodes = registerAllocator.controlGraph.dfsList;
|
||||
Uint32 nNodes = registerAllocator.controlGraph.nNodes;
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("N%d:\n", n));
|
||||
|
||||
InstructionList& phiNodes = nodes[n]->getPhiNodeInstructions();
|
||||
InstructionList& instructions = nodes[n]->getInstructions();
|
||||
|
||||
if (!phiNodes.empty()) {
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (" PhiNodes:\n", n));
|
||||
for(InstructionList::iterator i = phiNodes.begin(); !phiNodes.done(i); i = phiNodes.advance(i)) {
|
||||
phiNodes.get(i).printPretty(log);
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
|
||||
}
|
||||
if (!instructions.empty())
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, (" Instructions:\n", n));
|
||||
}
|
||||
|
||||
for(InstructionList::iterator i = instructions.begin(); !instructions.done(i); i = instructions.advance(i)) {
|
||||
instructions.get(i).printPretty(log);
|
||||
UT_OBJECTLOG(log, PR_LOG_ALWAYS, ("\n"));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // DEBUG_LOG
|
||||
117
mozilla/ef/Compiler/RegisterAllocator/RegisterAllocatorTools.h
Normal file
117
mozilla/ef/Compiler/RegisterAllocator/RegisterAllocatorTools.h
Normal file
@@ -0,0 +1,117 @@
|
||||
// -*- mode:C++; tab-width:4; truncate-lines:t -*-
|
||||
//
|
||||
// CONFIDENTIAL AND PROPRIETARY SOURCE CODE OF
|
||||
// NETSCAPE COMMUNICATIONS CORPORATION
|
||||
// Copyright © 1996, 1997 Netscape Communications Corporation. All Rights
|
||||
// Reserved. Use of this Source Code is subject to the terms of the
|
||||
// applicable license agreement from Netscape Communications Corporation.
|
||||
// The copyright notice(s) in this Source Code does not indicate actual or
|
||||
// intended publication of this Source Code.
|
||||
//
|
||||
// $Id: RegisterAllocatorTools.h,v 1.1.2.1 1999-03-02 16:12:05 fur%netscape.com Exp $
|
||||
//
|
||||
|
||||
#ifndef _REGISTER_ALLOCATOR_TOOLS_H_
|
||||
#define _REGISTER_ALLOCATOR_TOOLS_H_
|
||||
|
||||
#include "LogModule.h"
|
||||
#include "RegisterTypes.h"
|
||||
#include <string.h>
|
||||
|
||||
class RegisterAllocator;
|
||||
class ControlGraph;
|
||||
class InstructionEmitter;
|
||||
class VirtualRegisterManager;
|
||||
|
||||
struct RegisterAllocatorTools
|
||||
{
|
||||
//
|
||||
//
|
||||
static void insertPhiNodeInstructions(ControlGraph& controlGraph, InstructionEmitter& emitter);
|
||||
|
||||
//
|
||||
//
|
||||
static void updateInstructionGraph(RegisterAllocator& registerAllocator);
|
||||
|
||||
//
|
||||
//
|
||||
static void removeUnnecessaryCopies(RegisterAllocator& registerAllocator);
|
||||
|
||||
#ifdef DEBUG
|
||||
//
|
||||
//
|
||||
static void testTheInstructionGraph(ControlGraph& controlGraph, VirtualRegisterManager& vrManager);
|
||||
#endif // DEBUG
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
//
|
||||
//
|
||||
static void printInstructions(RegisterAllocator& registerAllocator);
|
||||
|
||||
//
|
||||
//
|
||||
static void printSpillCosts(RegisterAllocator& registerAllocator);
|
||||
|
||||
//
|
||||
//
|
||||
static void printSplitCosts(RegisterAllocator& registerAllocator);
|
||||
#endif // DEBUG_LOG
|
||||
};
|
||||
|
||||
//
|
||||
// FIX: this should go in a class (LookupTable ?)
|
||||
//
|
||||
|
||||
inline RegisterName findRoot(RegisterName name, RegisterName* table)
|
||||
{
|
||||
RegisterName* stack = table;
|
||||
RegisterName* stackPtr = stack;
|
||||
|
||||
RegisterName newName;
|
||||
while((newName = table[name]) != name) {
|
||||
*--stackPtr = name;
|
||||
name = newName;
|
||||
}
|
||||
|
||||
while (stackPtr != stack)
|
||||
table[*stackPtr++] = name;
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
inline void init(RegisterName* table, Uint32 nameCount)
|
||||
{
|
||||
for (RegisterName r = RegisterName(0); r < nameCount; r = RegisterName(r + 1))
|
||||
table[r] = r;
|
||||
}
|
||||
|
||||
inline Uint32 compress(RegisterName* name2range, RegisterName* table, Uint32 nameCount, Uint32 tableSize)
|
||||
{
|
||||
RegisterName* liveRange = new RegisterName[tableSize];
|
||||
memset(liveRange, '\0', tableSize * sizeof(RegisterName));
|
||||
|
||||
// Update the lookup table.
|
||||
for (RegisterName r = RegisterName(1); r < tableSize; r = RegisterName(r + 1))
|
||||
findRoot(r, table);
|
||||
|
||||
// Count the liveranges.
|
||||
Uint32 liveRangeCount = 1;
|
||||
for (RegisterName s = RegisterName(1); s < tableSize; s = RegisterName(s + 1))
|
||||
if (table[s] == s)
|
||||
liveRange[s] = RegisterName(liveRangeCount++);
|
||||
|
||||
for (RegisterName t = RegisterName(1); t < nameCount; t = RegisterName(t + 1))
|
||||
name2range[t] = liveRange[table[name2range[t]]];
|
||||
|
||||
return liveRangeCount;
|
||||
}
|
||||
|
||||
inline double doLog10(Uint32 power)
|
||||
{
|
||||
double log = 1.0;
|
||||
while (power--)
|
||||
log *= 10.0;
|
||||
return log;
|
||||
}
|
||||
|
||||
#endif // _REGISTER_ALLOCATOR_TOOLS_H_
|
||||
38
mozilla/ef/Compiler/RegisterAllocator/RegisterAssigner.h
Normal file
38
mozilla/ef/Compiler/RegisterAllocator/RegisterAssigner.h
Normal file
@@ -0,0 +1,38 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _REGISTER_ASSIGNER_H_
|
||||
#define _REGISTER_ASSIGNER_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "VirtualRegister.h"
|
||||
|
||||
class FastBitMatrix;
|
||||
|
||||
class RegisterAssigner
|
||||
{
|
||||
protected:
|
||||
VirtualRegisterManager& vRegManager;
|
||||
|
||||
public:
|
||||
RegisterAssigner(VirtualRegisterManager& vrMan) : vRegManager(vrMan) {}
|
||||
|
||||
virtual bool assignRegisters(FastBitMatrix& interferenceMatrix) = 0;
|
||||
};
|
||||
|
||||
#endif /* _REGISTER_ASSIGNER_H_ */
|
||||
25
mozilla/ef/Compiler/RegisterAllocator/RegisterClass.h
Normal file
25
mozilla/ef/Compiler/RegisterAllocator/RegisterClass.h
Normal file
@@ -0,0 +1,25 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _REGISTER_CLASS_H_
|
||||
#define _REGISTER_CLASS_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "RegisterTypes.h"
|
||||
|
||||
#endif // _REGISTER_CLASS_H_
|
||||
37
mozilla/ef/Compiler/RegisterAllocator/RegisterPressure.h
Normal file
37
mozilla/ef/Compiler/RegisterAllocator/RegisterPressure.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _REGISTER_PRESSURE_H_
|
||||
#define _REGISTER_PRESSURE_H_
|
||||
|
||||
#include "BitSet.h"
|
||||
#include "HashSet.h"
|
||||
|
||||
struct LowRegisterPressure
|
||||
{
|
||||
typedef BitSet Set;
|
||||
static const bool setIsOrdered = true;
|
||||
};
|
||||
|
||||
struct HighRegisterPressure
|
||||
{
|
||||
typedef HashSet Set;
|
||||
static const bool setIsOrdered = false;
|
||||
};
|
||||
|
||||
#endif // _REGISTER_PRESSURE_H_
|
||||
104
mozilla/ef/Compiler/RegisterAllocator/RegisterTypes.h
Normal file
104
mozilla/ef/Compiler/RegisterAllocator/RegisterTypes.h
Normal file
@@ -0,0 +1,104 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _REGISTER_TYPES_H_
|
||||
#define _REGISTER_TYPES_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// RegisterName -
|
||||
//
|
||||
|
||||
enum RegisterName {
|
||||
rnInvalid = 0,
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// RegisterClassKind -
|
||||
//
|
||||
|
||||
enum RegisterClassKind {
|
||||
rckInvalid = 0,
|
||||
rckGeneral,
|
||||
rckStackSlot,
|
||||
|
||||
nRegisterClassKind
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// RegisterID -
|
||||
//
|
||||
|
||||
enum RegisterID {
|
||||
invalidID = 0
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// RegisterKind -
|
||||
//
|
||||
|
||||
enum RegisterKind {
|
||||
rkCallerSave = 0,
|
||||
rkCalleeSave,
|
||||
};
|
||||
|
||||
struct NameLinkedList {
|
||||
RegisterName name;
|
||||
NameLinkedList* next;
|
||||
};
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
const registerNameMask = 0x03ffffff;
|
||||
const coloredRegisterMask = 0x04000000;
|
||||
const machineRegisterMask = 0x08000000;
|
||||
const registerClassMask = 0xf0000000;
|
||||
|
||||
const registerNameShift = 0;
|
||||
const coloredRegisterShift = 26;
|
||||
const machineRegisterShift = 27;
|
||||
const registerClassShift = 28;
|
||||
|
||||
#else // DEBUG
|
||||
|
||||
const registerNameMask = 0x0fffffff;
|
||||
const registerClassMask = 0xf0000000;
|
||||
|
||||
const registerNameShift = 0;
|
||||
const registerClassShift = 28;
|
||||
|
||||
#endif // DEBUG
|
||||
|
||||
|
||||
inline RegisterClassKind getClass(RegisterID registerID) {return RegisterClassKind((registerID & registerClassMask) >> registerClassShift);}
|
||||
inline RegisterName getName(RegisterID registerID) {return RegisterName((registerID & registerNameMask) >> registerNameShift);}
|
||||
inline void setClass(RegisterID& registerID, RegisterClassKind classKind) {registerID = RegisterID((registerID & ~registerClassMask) | ((classKind << registerClassShift) & registerClassMask));}
|
||||
inline void setName(RegisterID& registerID, RegisterName name) {assert((name & ~registerNameMask) == 0); registerID = RegisterID((registerID & ~registerNameMask) | ((name << registerNameShift) & registerNameMask));}
|
||||
inline RegisterID buildRegisterID(RegisterName name, RegisterClassKind classKind) {return RegisterID(((classKind << registerClassShift) & registerClassMask) | ((name << registerNameShift) & registerNameMask));}
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
inline bool isMachineRegister(RegisterID rid) {return (rid & machineRegisterMask) != 0;}
|
||||
inline void setMachineRegister(RegisterID& rid) {rid = RegisterID(rid | machineRegisterMask);}
|
||||
inline bool isColoredRegister(RegisterID rid) {return (rid & coloredRegisterMask) != 0;}
|
||||
inline void setColoredRegister(RegisterID& rid) {rid = RegisterID(rid | coloredRegisterMask);}
|
||||
|
||||
#endif // DEBUG
|
||||
|
||||
#endif // _REGISTER_TYPES_H_
|
||||
32
mozilla/ef/Compiler/RegisterAllocator/SSATools.cpp
Normal file
32
mozilla/ef/Compiler/RegisterAllocator/SSATools.cpp
Normal file
@@ -0,0 +1,32 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "SSATools.h"
|
||||
#include "ControlGraph.h"
|
||||
#include "VirtualRegister.h"
|
||||
#include "Liveness.h"
|
||||
|
||||
void replacePhiNodes(ControlGraph& controlGraph, VirtualRegisterManager& vrManager)
|
||||
{
|
||||
if (!controlGraph.hasBackEdges)
|
||||
return;
|
||||
|
||||
Liveness liveness(controlGraph.pool);
|
||||
liveness.buildLivenessAnalysis(controlGraph, vrManager);
|
||||
}
|
||||
29
mozilla/ef/Compiler/RegisterAllocator/SSATools.h
Normal file
29
mozilla/ef/Compiler/RegisterAllocator/SSATools.h
Normal file
@@ -0,0 +1,29 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SSA_TOOLS_H_
|
||||
#define _SSA_TOOLS_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
|
||||
class ControlGraph;
|
||||
class VirtualRegisterManager;
|
||||
|
||||
extern void replacePhiNodes(ControlGraph& controlGraph, VirtualRegisterManager& vrManager);
|
||||
|
||||
#endif // _SSA_TOOLS_H_
|
||||
37
mozilla/ef/Compiler/RegisterAllocator/SparseSet.cpp
Normal file
37
mozilla/ef/Compiler/RegisterAllocator/SparseSet.cpp
Normal file
@@ -0,0 +1,37 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "SparseSet.h"
|
||||
#include "BitSet.h"
|
||||
#include "Pool.h"
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
// Print the set.
|
||||
//
|
||||
void SparseSet::printPretty(LogModuleObject log)
|
||||
{
|
||||
Pool pool;
|
||||
BitSet set(pool, universeSize);
|
||||
|
||||
for (Uint32 i = 0; i < count; i++)
|
||||
set.set(node[i].element);
|
||||
|
||||
set.printPretty(log);
|
||||
}
|
||||
#endif // DEBUG_LOG
|
||||
168
mozilla/ef/Compiler/RegisterAllocator/SparseSet.h
Normal file
168
mozilla/ef/Compiler/RegisterAllocator/SparseSet.h
Normal file
@@ -0,0 +1,168 @@
|
||||
// -*- mode:C++; tab-width:4; truncate-lines:t -*-
|
||||
//
|
||||
// CONFIDENTIAL AND PROPRIETARY SOURCE CODE OF
|
||||
// NETSCAPE COMMUNICATIONS CORPORATION
|
||||
// Copyright © 1996, 1997 Netscape Communications Corporation. All Rights
|
||||
// Reserved. Use of this Source Code is subject to the terms of the
|
||||
// applicable license agreement from Netscape Communications Corporation.
|
||||
// The copyright notice(s) in this Source Code does not indicate actual or
|
||||
// intended publication of this Source Code.
|
||||
//
|
||||
// $Id: SparseSet.h,v 1.1.2.1 1999-03-02 16:12:07 fur%netscape.com Exp $
|
||||
//
|
||||
|
||||
#ifndef _SPARSE_SET_H_
|
||||
#define _SPARSE_SET_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "Pool.h"
|
||||
#include "LogModule.h"
|
||||
#include "BitSet.h"
|
||||
|
||||
class SparseSet
|
||||
{
|
||||
private:
|
||||
|
||||
struct Node {
|
||||
Uint32 element;
|
||||
Uint32 stackIndex;
|
||||
};
|
||||
|
||||
Node* node;
|
||||
Uint32 count;
|
||||
Uint32 universeSize;
|
||||
|
||||
private:
|
||||
|
||||
// No copy constructor.
|
||||
SparseSet(const SparseSet&);
|
||||
|
||||
// Check if the given set's universe is of the same size than this universe.
|
||||
void checkUniverseCompatibility(const SparseSet& set) const {assert(set.universeSize == universeSize);}
|
||||
// Check if pos is valid for this set's universe.
|
||||
void checkMember(Int32 pos) const {assert(pos >=0 && Uint32(pos) < universeSize);}
|
||||
|
||||
public:
|
||||
|
||||
SparseSet(Pool& pool, Uint32 universeSize) : universeSize(universeSize) {node = new(pool) Node[universeSize]; clear();}
|
||||
|
||||
// Clear the sparse set.
|
||||
void clear() {count = 0;}
|
||||
// Clear the element at index.
|
||||
inline void clear(Uint32 index);
|
||||
// Set the element at index.
|
||||
inline void set(Uint32 index);
|
||||
// Return true if the element at index is set.
|
||||
inline bool test(Uint32 index) const;
|
||||
// Union with the given sparse set.
|
||||
inline void or(const SparseSet& set);
|
||||
// Intersection with the given sparse set.
|
||||
inline void and(const SparseSet& set);
|
||||
// Difference with the given sparse set.
|
||||
inline void difference(const SparseSet& set);
|
||||
// Copy set.
|
||||
inline SparseSet& operator = (const SparseSet& set);
|
||||
inline SparseSet& operator = (const BitSet& set);
|
||||
// Return true if the sparse sets are identical.
|
||||
friend bool operator == (const SparseSet& set1, const SparseSet& set2);
|
||||
// Return true if the sparse sets are different.
|
||||
friend bool operator != (const SparseSet& set1, const SparseSet& set2);
|
||||
|
||||
// Logical operators.
|
||||
SparseSet& operator |= (const SparseSet& set) {or(set); return *this;}
|
||||
SparseSet& operator &= (const SparseSet& set) {and(set); return *this;}
|
||||
SparseSet& operator -= (const SparseSet& set) {difference(set); return *this;}
|
||||
|
||||
// Iterator to conform with the set API.
|
||||
typedef Int32 iterator;
|
||||
// Return the iterator for the first element of this set.
|
||||
iterator begin() const {return count - 1;}
|
||||
// Return the next iterator.
|
||||
iterator advance(iterator pos) const {return --pos;}
|
||||
// Return true if the iterator is at the end of the set.
|
||||
bool done(iterator pos) const {return pos < 0;}
|
||||
// Return the element for the given iterator;
|
||||
Uint32 get(iterator pos) const {return node[pos].element;}
|
||||
// Return one element of this set.
|
||||
Uint32 getOne() const {assert(count > 0); return node[0].element;}
|
||||
// Return the size of this set.
|
||||
Uint32 getSize() const {return count;}
|
||||
|
||||
#ifdef DEBUG_LOG
|
||||
// Print the set.
|
||||
void printPretty(LogModuleObject log);
|
||||
#endif // DEBUG_LOG
|
||||
};
|
||||
|
||||
inline void SparseSet::clear(Uint32 element)
|
||||
{
|
||||
checkMember(element);
|
||||
Uint32 count = this->count;
|
||||
Node* node = this->node;
|
||||
|
||||
Uint32 stackIndex = node[element].stackIndex;
|
||||
|
||||
if ((stackIndex < count) && (node[stackIndex].element == element)) {
|
||||
Uint32 stackTop = node[count - 1].element;
|
||||
|
||||
node[stackIndex].element = stackTop;
|
||||
node[stackTop].stackIndex = stackIndex;
|
||||
this->count = count - 1;
|
||||
}
|
||||
}
|
||||
|
||||
inline void SparseSet::set(Uint32 element)
|
||||
{
|
||||
checkMember(element);
|
||||
Uint32 count = this->count;
|
||||
Node* node = this->node;
|
||||
|
||||
Uint32 stackIndex = node[element].stackIndex;
|
||||
|
||||
if ((stackIndex >= count) || (node[stackIndex].element != element)) {
|
||||
node[count].element = element;
|
||||
node[element].stackIndex = count;
|
||||
this->count = count + 1;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool SparseSet::test(Uint32 element) const
|
||||
{
|
||||
checkMember(element);
|
||||
Node* node = this->node;
|
||||
|
||||
Uint32 stackIndex = node[element].stackIndex;
|
||||
return ((stackIndex < count) && (node[stackIndex].element == element));
|
||||
}
|
||||
|
||||
inline SparseSet& SparseSet::operator = (const SparseSet& set)
|
||||
{
|
||||
checkUniverseCompatibility(set);
|
||||
Uint32 sourceCount = set.getSize();
|
||||
Node* node = this->node;
|
||||
|
||||
memcpy(node, set.node, sourceCount * sizeof(Node));
|
||||
|
||||
for (Uint32 i = 0; i < sourceCount; i++) {
|
||||
Uint32 element = node[i].element;
|
||||
node[element].stackIndex = i;
|
||||
}
|
||||
|
||||
count = sourceCount;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
inline SparseSet& SparseSet::operator = (const BitSet& set)
|
||||
{
|
||||
// FIX: there's room for optimization here.
|
||||
assert(universeSize == set.getSize());
|
||||
|
||||
clear();
|
||||
for (Int32 i = set.firstOne(); i != -1; i = set.nextOne(i))
|
||||
this->set(i);
|
||||
return *this;
|
||||
}
|
||||
|
||||
#endif // _SPARSE_SET_H_
|
||||
270
mozilla/ef/Compiler/RegisterAllocator/Spilling.cpp
Normal file
270
mozilla/ef/Compiler/RegisterAllocator/Spilling.cpp
Normal file
@@ -0,0 +1,270 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef NEW_LAURENTM_CODE
|
||||
#define INCLUDE_EMITTER
|
||||
#include "CpuInfo.h"
|
||||
#include "Fundamentals.h"
|
||||
#include "ControlNodes.h"
|
||||
#include "Instruction.h"
|
||||
#include "InstructionEmitter.h"
|
||||
#include "Spilling.h"
|
||||
|
||||
|
||||
void Spilling::
|
||||
insertSpillCode(ControlNode** dfsList, Uint32 nNodes)
|
||||
{
|
||||
PRUint32 nVirtualRegisters = vRegManager.count();
|
||||
FastBitSet currentLive(vRegManager.pool, nVirtualRegisters);
|
||||
FastBitSet usedInThisInstruction(vRegManager.pool, nVirtualRegisters);
|
||||
RegisterFifo grNeedLoad(nVirtualRegisters);
|
||||
RegisterFifo fpNeedLoad(nVirtualRegisters);
|
||||
|
||||
for (PRInt32 n = nNodes - 1; n >= 0; n--)
|
||||
{
|
||||
PR_ASSERT(grNeedLoad.empty() & fpNeedLoad.empty());
|
||||
ControlNode& node = *dfsList[n];
|
||||
|
||||
currentLive = node.liveAtEnd;
|
||||
|
||||
PRUint32 nGeneralAlive = 0;
|
||||
PRUint32 nFloatingPointAlive = 0;
|
||||
|
||||
// Get the number of registers alive at the end of this node.
|
||||
for (PRInt32 j = currentLive.firstOne(); j != -1; j = currentLive.nextOne(j))
|
||||
{
|
||||
VirtualRegister& vReg = vRegManager.getVirtualRegister(j);
|
||||
if (vReg.spillInfo.willSpill)
|
||||
{
|
||||
currentLive.clear(j);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (vReg.getClass())
|
||||
{
|
||||
case vrcInteger:
|
||||
nGeneralAlive++;
|
||||
break;
|
||||
case vrcFloatingPoint:
|
||||
case vrcFixedPoint:
|
||||
nFloatingPointAlive++;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if(node.dfsNum == 8) printf("\n________Begin Node %d________\n", node.dfsNum);
|
||||
|
||||
InstructionList& instructions = node.getInstructions();
|
||||
for (InstructionList::iterator i = instructions.end(); !instructions.done(i); i = instructions.retreat(i))
|
||||
{
|
||||
Instruction& instruction = instructions.get(i);
|
||||
InstructionUse* useBegin = instruction.getInstructionUseBegin();
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
InstructionUse* usePtr;
|
||||
InstructionDefine* defBegin = instruction.getInstructionDefineBegin();
|
||||
InstructionDefine* defEnd = instruction.getInstructionDefineEnd();
|
||||
InstructionDefine* defPtr;
|
||||
|
||||
// if(node.dfsNum == 8) { printf("\n");
|
||||
// instruction.printPretty(stdout);
|
||||
// printf("\n"); }
|
||||
|
||||
// Handle definitions
|
||||
for (defPtr = defBegin; defPtr < defEnd; defPtr++)
|
||||
if (defPtr->isVirtualRegister())
|
||||
{
|
||||
VirtualRegister& vReg = defPtr->getVirtualRegister();
|
||||
currentLive.clear(vReg.getRegisterIndex());
|
||||
switch (vReg.getClass())
|
||||
{
|
||||
case vrcInteger:
|
||||
nGeneralAlive--;
|
||||
break;
|
||||
case vrcFloatingPoint:
|
||||
case vrcFixedPoint:
|
||||
nFloatingPointAlive--;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for deaths
|
||||
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isVirtualRegister())
|
||||
{
|
||||
VirtualRegister& vReg = usePtr->getVirtualRegister();
|
||||
if (!currentLive.test(vReg.getRegisterIndex()))
|
||||
// This is the last use of this register.
|
||||
{
|
||||
currentLive.set(vReg.getRegisterIndex());
|
||||
switch (vReg.getClass())
|
||||
{
|
||||
case vrcInteger:
|
||||
nGeneralAlive++;
|
||||
while (/*(nGeneralAlive > NUMBER_OF_GREGISTERS) &&*/ !grNeedLoad.empty())
|
||||
{
|
||||
PRUint32 toLoad = grNeedLoad.get();
|
||||
currentLive.clear(toLoad);
|
||||
nGeneralAlive--;
|
||||
|
||||
VirtualRegister& nReg = vRegManager.getVirtualRegister(toLoad);
|
||||
Instruction& lastUsingInstruction = *nReg.spillInfo.lastUsingInstruction;
|
||||
emitter.emitLoadAfter(*lastUsingInstruction.getPrimitive(), lastUsingInstruction.getLinks().prev,
|
||||
nReg.getAlias(), *nReg.equivalentRegister[vrcStackSlot]);
|
||||
nReg.releaseSelf();
|
||||
}
|
||||
break;
|
||||
case vrcFloatingPoint:
|
||||
case vrcFixedPoint:
|
||||
nFloatingPointAlive++;
|
||||
while (/*(nFloatingPointAlive > NUMBER_OF_FPREGISTERS) &&*/ !fpNeedLoad.empty())
|
||||
{
|
||||
PRUint32 toLoad = fpNeedLoad.get();
|
||||
currentLive.clear(toLoad);
|
||||
nFloatingPointAlive--;
|
||||
|
||||
VirtualRegister& nReg = vRegManager.getVirtualRegister(toLoad);
|
||||
Instruction& lastUsingInstruction = *nReg.spillInfo.lastUsingInstruction;
|
||||
emitter.emitLoadAfter(*lastUsingInstruction.getPrimitive(), lastUsingInstruction.getLinks().prev,
|
||||
nReg.getAlias(), *nReg.equivalentRegister[vrcStackSlot]);
|
||||
nReg.releaseSelf();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle uses
|
||||
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isVirtualRegister())
|
||||
{
|
||||
VirtualRegister& vReg = usePtr->getVirtualRegister();
|
||||
PRUint32 registerIndex = vReg.getRegisterIndex();
|
||||
|
||||
if (vReg.spillInfo.willSpill) {
|
||||
#if defined(GENERATE_FOR_X86)
|
||||
if (!instruction.switchUseToSpill((usePtr - useBegin), *vReg.equivalentRegister[vrcStackSlot]))
|
||||
#endif
|
||||
{
|
||||
switch (vReg.getClass())
|
||||
{
|
||||
case vrcInteger:
|
||||
if (!grNeedLoad.test(registerIndex))
|
||||
{
|
||||
grNeedLoad.put(registerIndex);
|
||||
VirtualRegister& alias = vRegManager.newVirtualRegister(vrcInteger);
|
||||
if (vReg.isPreColored())
|
||||
alias.preColorRegister(vReg.getPreColor());
|
||||
/* if (vReg.hasSpecialInterference) {
|
||||
alias.specialInterference.sizeTo(NUMBER_OF_REGISTERS);
|
||||
alias.specialInterference = vReg.specialInterference;
|
||||
alias.hasSpecialInterference = true;
|
||||
} */
|
||||
vReg.setAlias(alias);
|
||||
vReg.retainSelf();
|
||||
}
|
||||
break;
|
||||
case vrcFloatingPoint:
|
||||
case vrcFixedPoint:
|
||||
if (!fpNeedLoad.test(registerIndex))
|
||||
{
|
||||
fpNeedLoad.put(registerIndex);
|
||||
VirtualRegister& alias = vRegManager.newVirtualRegister(vReg.getClass());
|
||||
if (vReg.isPreColored())
|
||||
alias.preColorRegister(vReg.getPreColor());
|
||||
/*if (vReg.hasSpecialInterference) {
|
||||
alias.specialInterference.sizeTo(NUMBER_OF_REGISTERS);
|
||||
alias.specialInterference = vReg.specialInterference;
|
||||
alias.hasSpecialInterference = true;
|
||||
} */
|
||||
vReg.setAlias(alias);
|
||||
vReg.retainSelf();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
usePtr->getVirtualRegisterPtr().initialize(vReg.getAlias());
|
||||
usedInThisInstruction.set(registerIndex);
|
||||
vReg.spillInfo.lastUsingInstruction = &instruction;
|
||||
}
|
||||
currentLive.clear(registerIndex);
|
||||
} else { // will not spill
|
||||
currentLive.set(registerIndex);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle definitions
|
||||
for (defPtr = defBegin; defPtr < defEnd; defPtr++)
|
||||
if (defPtr->isVirtualRegister())
|
||||
{
|
||||
VirtualRegister& vReg = defPtr->getVirtualRegister();
|
||||
|
||||
if (vReg.spillInfo.willSpill)
|
||||
#if defined(GENERATE_FOR_X86)
|
||||
if (!instruction.switchDefineToSpill((defPtr - defBegin), *vReg.equivalentRegister[vrcStackSlot]))
|
||||
#endif
|
||||
{
|
||||
if (usedInThisInstruction.test(vReg.getRegisterIndex()))
|
||||
// this virtualRegister was used in this instruction and is also defined. We need to move
|
||||
// this virtual register to its alias first and then save it to memory.
|
||||
{
|
||||
emitter.emitStoreAfter(*instruction.getPrimitive(), &instruction.getLinks(),
|
||||
vReg.getAlias(), *vReg.equivalentRegister[vrcStackSlot]);
|
||||
defPtr->getVirtualRegisterPtr().initialize(vReg.getAlias());
|
||||
}
|
||||
else
|
||||
{
|
||||
emitter.emitStoreAfter(*instruction.getPrimitive(), &instruction.getLinks(),
|
||||
vReg, *vReg.equivalentRegister[vrcStackSlot]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
while (!grNeedLoad.empty())
|
||||
{
|
||||
PRUint32 nl = grNeedLoad.get();
|
||||
VirtualRegister& nlReg = vRegManager.getVirtualRegister(nl);
|
||||
Instruction& lastUse = *nlReg.spillInfo.lastUsingInstruction;
|
||||
|
||||
emitter.emitLoadAfter(*lastUse.getPrimitive(), lastUse.getLinks().prev,
|
||||
nlReg.getAlias(), *nlReg.equivalentRegister[vrcStackSlot]);
|
||||
nlReg.releaseSelf();
|
||||
}
|
||||
while (!fpNeedLoad.empty())
|
||||
{
|
||||
PRUint32 nl = fpNeedLoad.get();
|
||||
VirtualRegister& nlReg = vRegManager.getVirtualRegister(nl);
|
||||
Instruction& lastUse = *nlReg.spillInfo.lastUsingInstruction;
|
||||
|
||||
emitter.emitLoadAfter(*lastUse.getPrimitive(), lastUse.getLinks().prev,
|
||||
nlReg.getAlias(), *nlReg.equivalentRegister[vrcStackSlot]);
|
||||
nlReg.releaseSelf();
|
||||
}
|
||||
|
||||
// if(node.dfsNum == 8) printf("\n________End Node %d________\n", node.dfsNum);
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
269
mozilla/ef/Compiler/RegisterAllocator/Spilling.h
Normal file
269
mozilla/ef/Compiler/RegisterAllocator/Spilling.h
Normal file
@@ -0,0 +1,269 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SPILLING_H_
|
||||
#define _SPILLING_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include <string.h>
|
||||
#include "RegisterAllocator.h"
|
||||
#include "RegisterAllocatorTools.h"
|
||||
#include "ControlGraph.h"
|
||||
#include "ControlNodes.h"
|
||||
#include "Instruction.h"
|
||||
#include "SparseSet.h"
|
||||
|
||||
template <class RegisterPressure>
|
||||
class Spilling
|
||||
{
|
||||
private:
|
||||
static void insertStoreAfter(Instruction& instruction, RegisterName name);
|
||||
static void insertLoadBefore(Instruction& instruction, RegisterName name);
|
||||
|
||||
public:
|
||||
static void calculateSpillCosts(RegisterAllocator& registerAllocator);
|
||||
static void insertSpillCode(RegisterAllocator& registerAllocator);
|
||||
};
|
||||
|
||||
struct SpillCost
|
||||
{
|
||||
double loads;
|
||||
double stores;
|
||||
double copies;
|
||||
double cost;
|
||||
bool infinite;
|
||||
};
|
||||
|
||||
template <class RegisterPressure>
|
||||
void Spilling<RegisterPressure>::insertSpillCode(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
Uint32 rangeCount = registerAllocator.rangeCount;
|
||||
RegisterName* name2range = registerAllocator.name2range;
|
||||
|
||||
Pool& pool = registerAllocator.pool;
|
||||
SparseSet currentLive(pool, rangeCount);
|
||||
SparseSet needLoad(pool, rangeCount);
|
||||
SparseSet mustSpill(pool, rangeCount);
|
||||
SparseSet& willSpill = *registerAllocator.willSpill;
|
||||
|
||||
ControlGraph& controlGraph = registerAllocator.controlGraph;
|
||||
RegisterPressure::Set* liveOut = registerAllocator.liveness.liveOut;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
|
||||
needLoad.clear();
|
||||
currentLive = liveOut[n];
|
||||
mustSpill = currentLive;
|
||||
|
||||
InstructionList& instructions = nodes[n]->getInstructions();
|
||||
for (InstructionList::iterator i = instructions.end(); !instructions.done(i);) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
i = instructions.retreat(i);
|
||||
|
||||
InstructionUse* useBegin = instruction.getInstructionUseBegin();
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
InstructionUse* usePtr;
|
||||
InstructionDefine* defineBegin = instruction.getInstructionDefineBegin();
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
InstructionDefine* definePtr;
|
||||
|
||||
bool foundLiveDefine = false;
|
||||
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister()) {
|
||||
if (currentLive.test(name2range[definePtr->getRegisterName()])) {
|
||||
foundLiveDefine = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
foundLiveDefine = true;
|
||||
break;
|
||||
}
|
||||
if (defineBegin != defineEnd && !foundLiveDefine) {
|
||||
fprintf(stderr, "!!! Removed instruction because it was only defining unused registers !!!\n");
|
||||
instruction.remove();
|
||||
}
|
||||
|
||||
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister()) {
|
||||
RegisterName range = name2range[definePtr->getRegisterName()];
|
||||
#ifdef DEBUG
|
||||
if (needLoad.test(range))
|
||||
if (!mustSpill.test(range) && registerAllocator.spillCost[range].infinite && willSpill.test(range)) {
|
||||
fprintf(stderr, "Tried to spill a register with infinite spill cost\n");
|
||||
abort();
|
||||
}
|
||||
#endif // DEBUG
|
||||
if (willSpill.test(range))
|
||||
insertStoreAfter(instruction, range);
|
||||
|
||||
needLoad.clear(range);
|
||||
}
|
||||
|
||||
if (instruction.getFlags() & ifCopy)
|
||||
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister()) {
|
||||
RegisterName range = name2range[usePtr->getRegisterName()];
|
||||
if (!currentLive.test(range))
|
||||
for (SparseSet::iterator r = needLoad.begin(); !needLoad.done(r); r = needLoad.advance(r)) {
|
||||
RegisterName load = RegisterName(needLoad.get(r));
|
||||
if (willSpill.test(load))
|
||||
insertLoadBefore(instruction, load);
|
||||
mustSpill.set(load);
|
||||
}
|
||||
needLoad.clear();
|
||||
}
|
||||
|
||||
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
currentLive.clear(name2range[definePtr->getRegisterName()]);
|
||||
|
||||
|
||||
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister()) {
|
||||
RegisterName range = name2range[usePtr->getRegisterName()];
|
||||
currentLive.set(range);
|
||||
needLoad.set(range);
|
||||
}
|
||||
}
|
||||
|
||||
for (SparseSet::iterator l = needLoad.begin(); !needLoad.done(l); l = needLoad.advance(l)) {
|
||||
RegisterName load = RegisterName(needLoad.get(l));
|
||||
if (willSpill.test(load))
|
||||
insertLoadBefore(instructions.first(), load);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
void Spilling<RegisterPressure>::insertLoadBefore(Instruction& /*instruction*/, RegisterName name)
|
||||
{
|
||||
fprintf(stdout, "will insert load for range %d\n", name);
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
void Spilling<RegisterPressure>::insertStoreAfter(Instruction& /*instruction*/, RegisterName name)
|
||||
{
|
||||
fprintf(stdout, "will insert store for range %d\n", name);
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
void Spilling<RegisterPressure>::calculateSpillCosts(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
Uint32 rangeCount = registerAllocator.rangeCount;
|
||||
RegisterName* name2range = registerAllocator.name2range;
|
||||
|
||||
Pool& pool = registerAllocator.pool;
|
||||
SparseSet live(pool, rangeCount);
|
||||
SparseSet needLoad(pool, rangeCount);
|
||||
SparseSet mustSpill(pool, rangeCount);
|
||||
|
||||
SparseSet alreadyStored(pool, rangeCount); // FIX: should get this from previous spilling.
|
||||
|
||||
SpillCost* cost = new SpillCost[rangeCount];
|
||||
memset(cost, '\0', rangeCount * sizeof(SpillCost));
|
||||
|
||||
ControlGraph& controlGraph = registerAllocator.controlGraph;
|
||||
RegisterPressure::Set* liveOut = registerAllocator.liveness.liveOut;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
ControlNode& node = *nodes[n];
|
||||
|
||||
double weight = doLog10(node.loopDepth);
|
||||
|
||||
needLoad.clear();
|
||||
live = liveOut[n];
|
||||
mustSpill = live;
|
||||
|
||||
InstructionList& instructions = nodes[n]->getInstructions();
|
||||
for (InstructionList::iterator i = instructions.end(); !instructions.done(i); i = instructions.retreat(i)) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
|
||||
InstructionUse* useBegin = instruction.getInstructionUseBegin();
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
InstructionUse* usePtr;
|
||||
InstructionDefine* defineBegin = instruction.getInstructionDefineBegin();
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
InstructionDefine* definePtr;
|
||||
|
||||
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister()) {
|
||||
RegisterName range = name2range[definePtr->getRegisterName()];
|
||||
|
||||
if (needLoad.test(range))
|
||||
if (!mustSpill.test(range))
|
||||
cost[range].infinite = true;
|
||||
|
||||
if ((false /* !rematerializable(range) */ || !needLoad.test(range)) && !alreadyStored.test(range))
|
||||
cost[range].stores += weight;
|
||||
|
||||
needLoad.clear(range);
|
||||
}
|
||||
|
||||
if (instruction.getFlags() & ifCopy)
|
||||
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister())
|
||||
if (!live.test(name2range[usePtr->getRegisterName()])) {
|
||||
for (SparseSet::iterator l = needLoad.begin(); !needLoad.done(l); l = needLoad.advance(l)) {
|
||||
Uint32 range = needLoad.get(l);
|
||||
cost[range].loads += weight;
|
||||
mustSpill.set(range);
|
||||
}
|
||||
needLoad.clear();
|
||||
}
|
||||
|
||||
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
live.clear(name2range[definePtr->getRegisterName()]);
|
||||
|
||||
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister()) {
|
||||
RegisterName range = name2range[usePtr->getRegisterName()];
|
||||
|
||||
live.set(range);
|
||||
needLoad.set(range);
|
||||
}
|
||||
|
||||
if (instruction.getFlags() & ifCopy) {
|
||||
assert(useBegin != useEnd && useBegin[0].isRegister());
|
||||
assert(defineBegin != defineEnd && defineBegin[0].isRegister());
|
||||
|
||||
RegisterName source = name2range[useBegin[0].getRegisterName()];
|
||||
RegisterName destination = name2range[defineBegin[0].getRegisterName()];
|
||||
|
||||
cost[source].copies += weight;
|
||||
cost[destination].copies += weight;
|
||||
}
|
||||
}
|
||||
|
||||
for (SparseSet::iterator s = needLoad.begin(); !needLoad.done(s); s = needLoad.advance(s))
|
||||
cost[needLoad.get(s)].loads += weight;
|
||||
}
|
||||
|
||||
for (Uint32 r = 0; r < rangeCount; r++) {
|
||||
SpillCost& c = cost[r];
|
||||
c.cost = 2 * (c.loads + c.stores) - c.copies;
|
||||
}
|
||||
|
||||
registerAllocator.spillCost = cost;
|
||||
}
|
||||
|
||||
#endif // _SPILLING_H_
|
||||
239
mozilla/ef/Compiler/RegisterAllocator/Splits.h
Normal file
239
mozilla/ef/Compiler/RegisterAllocator/Splits.h
Normal file
@@ -0,0 +1,239 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SPLITS_H_
|
||||
#define _SPLITS_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include <string.h>
|
||||
#include "Pool.h"
|
||||
#include "ControlGraph.h"
|
||||
#include "ControlNodes.h"
|
||||
#include "Instruction.h"
|
||||
#include "RegisterAllocator.h"
|
||||
#include "RegisterAllocatorTools.h"
|
||||
|
||||
UT_EXTERN_LOG_MODULE(RegAlloc);
|
||||
|
||||
template <class RegisterPressure>
|
||||
struct Splits
|
||||
{
|
||||
static void calculateSplitCosts(RegisterAllocator& registerAllocator);
|
||||
static bool findSplit(RegisterAllocator& registerAllocator, RegisterName* color, RegisterName range);
|
||||
static void insertSplitCode(RegisterAllocator& registerAllocator);
|
||||
};
|
||||
|
||||
struct SplitCost
|
||||
{
|
||||
double loads;
|
||||
double stores;
|
||||
};
|
||||
|
||||
template <class RegisterPressure>
|
||||
void Splits<RegisterPressure>::insertSplitCode(RegisterAllocator& /*registerAllocator*/)
|
||||
{
|
||||
// FIX
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
bool Splits<RegisterPressure>::findSplit(RegisterAllocator& registerAllocator, RegisterName* color, RegisterName range)
|
||||
{
|
||||
Pool& pool = registerAllocator.pool;
|
||||
NameLinkedList** neighborsWithColor = new(pool) NameLinkedList*[6]; // FIX
|
||||
memset(neighborsWithColor, '\0', 6 * sizeof(NameLinkedList*));
|
||||
|
||||
InterferenceGraph<RegisterPressure>& iGraph = registerAllocator.iGraph;
|
||||
|
||||
for (InterferenceVector* vector = iGraph.getInterferenceVector(range); vector != NULL; vector = vector->next)
|
||||
for (Int32 i = vector->count - 1; i >=0; --i) {
|
||||
RegisterName neighbor = vector->neighbors[i];
|
||||
RegisterName c = color[neighbor];
|
||||
|
||||
if (c < 6) { // FIX
|
||||
NameLinkedList* node = new(pool) NameLinkedList();
|
||||
node->name = neighbor;
|
||||
node->next = neighborsWithColor[c];
|
||||
neighborsWithColor[c] = node;
|
||||
}
|
||||
}
|
||||
|
||||
bool splitAroundName = true;
|
||||
|
||||
LiveRangeGraph<RegisterPressure>& lGraph = registerAllocator.lGraph;
|
||||
RegisterName bestColor = RegisterName(6); // FIX
|
||||
double bestCost = registerAllocator.spillCost[range].cost;
|
||||
SplitCost* splitCost = registerAllocator.splitCost;
|
||||
|
||||
for (RegisterName i = RegisterName(0); i < 6; i = RegisterName(i + 1)) { // FIX
|
||||
|
||||
double splitAroundNameCost = 0.0;
|
||||
bool canSplitAroundName = true;
|
||||
|
||||
SplitCost& sCost = splitCost[range];
|
||||
double addedCost = 2.0 * (sCost.stores + sCost.loads);
|
||||
|
||||
for (NameLinkedList* node = neighborsWithColor[i]; node != NULL; node = node->next) {
|
||||
RegisterName neighbor = node->name;
|
||||
if (lGraph.haveEdge(neighbor, range)) {
|
||||
canSplitAroundName = false;
|
||||
break;
|
||||
} else
|
||||
splitAroundNameCost += addedCost;
|
||||
}
|
||||
if (canSplitAroundName && splitAroundNameCost < bestCost) {
|
||||
bestCost = splitAroundNameCost;
|
||||
bestColor = i;
|
||||
splitAroundName = true;
|
||||
}
|
||||
|
||||
double splitAroundColorCost = 0.0;
|
||||
bool canSplitAroundColor = true;
|
||||
|
||||
for (NameLinkedList* node = neighborsWithColor[i]; node != NULL; node = node->next) {
|
||||
RegisterName neighbor = node->name;
|
||||
if (lGraph.haveEdge(range, neighbor)) {
|
||||
canSplitAroundColor = false;
|
||||
break;
|
||||
} else {
|
||||
SplitCost& sCost = splitCost[neighbor];
|
||||
double addedCost = 2.0 * (sCost.stores + sCost.loads);
|
||||
splitAroundColorCost += addedCost;
|
||||
}
|
||||
}
|
||||
if (canSplitAroundColor && splitAroundColorCost < bestCost) {
|
||||
bestCost = splitAroundColorCost;
|
||||
bestColor = i;
|
||||
splitAroundName = false;
|
||||
}
|
||||
}
|
||||
if (bestColor < RegisterName(6)) {
|
||||
color[range] = bestColor;
|
||||
registerAllocator.splitFound = true;
|
||||
|
||||
NameLinkedList** splitAround = registerAllocator.splitAround;
|
||||
|
||||
if (splitAroundName)
|
||||
for (NameLinkedList* node = neighborsWithColor[bestColor]; node != NULL; node = node->next) {
|
||||
NameLinkedList* newNode = new(pool) NameLinkedList();
|
||||
newNode->name = node->name;
|
||||
newNode->next = splitAround[range];
|
||||
splitAround[range] = newNode;
|
||||
}
|
||||
else
|
||||
for (NameLinkedList* node = neighborsWithColor[bestColor]; node != NULL; node = node->next) {
|
||||
NameLinkedList* newNode = new(pool) NameLinkedList();
|
||||
RegisterName neighbor = node->name;
|
||||
newNode->name = range;
|
||||
newNode->next = splitAround[neighbor];
|
||||
splitAround[neighbor] = newNode;
|
||||
}
|
||||
|
||||
trespass("Found a split");
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class RegisterPressure>
|
||||
void Splits<RegisterPressure>::calculateSplitCosts(RegisterAllocator& registerAllocator)
|
||||
{
|
||||
Pool& pool = registerAllocator.pool;
|
||||
Uint32 rangeCount = registerAllocator.rangeCount;
|
||||
RegisterName* name2range = registerAllocator.name2range;
|
||||
|
||||
SplitCost* splitCost = new(pool) SplitCost[rangeCount];
|
||||
memset(splitCost, '\0', rangeCount * sizeof(SplitCost));
|
||||
|
||||
SparseSet live(pool, rangeCount);
|
||||
RegisterPressure::Set* liveIn = registerAllocator.liveness.liveIn;
|
||||
RegisterPressure::Set* liveOut = registerAllocator.liveness.liveOut;
|
||||
|
||||
ControlGraph& controlGraph = registerAllocator.controlGraph;
|
||||
ControlNode** nodes = controlGraph.dfsList;
|
||||
Uint32 nNodes = controlGraph.nNodes;
|
||||
|
||||
for (Uint32 n = 0; n < nNodes; n++) {
|
||||
ControlNode& node = *nodes[n];
|
||||
double weight = doLog10(node.loopDepth);
|
||||
|
||||
live = liveOut[n];
|
||||
|
||||
ControlEdge* successorsEnd = node.getSuccessorsEnd();
|
||||
for (ControlEdge* successorsPtr = node.getSuccessorsBegin(); successorsPtr < successorsEnd; successorsPtr++) {
|
||||
ControlNode& successor = successorsPtr->getTarget();
|
||||
|
||||
if (successor.getControlKind() != ckEnd) {
|
||||
RegisterPressure::Set& successorLiveIn = liveIn[successor.dfsNum];
|
||||
|
||||
for (SparseSet::iterator i = live.begin(); !live.done(i); i = live.advance(i)) {
|
||||
RegisterName name = RegisterName(live.get(i));
|
||||
if (!successorLiveIn.test(name))
|
||||
splitCost[name].loads += doLog10(successor.loopDepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionList& instructions = node.getInstructions();
|
||||
for (InstructionList::iterator i = instructions.end(); !instructions.done(i); i = instructions.retreat(i)) {
|
||||
Instruction& instruction = instructions.get(i);
|
||||
|
||||
InstructionUse* useBegin = instruction.getInstructionUseBegin();
|
||||
InstructionUse* useEnd = instruction.getInstructionUseEnd();
|
||||
InstructionUse* usePtr;
|
||||
InstructionDefine* defineBegin = instruction.getInstructionDefineBegin();
|
||||
InstructionDefine* defineEnd = instruction.getInstructionDefineEnd();
|
||||
InstructionDefine* definePtr;
|
||||
|
||||
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
splitCost[name2range[definePtr->getRegisterName()]].stores += weight;
|
||||
|
||||
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister()) {
|
||||
RegisterName range = name2range[usePtr->getRegisterName()];
|
||||
if (!live.test(range)) {
|
||||
if (&instruction != &instructions.last())
|
||||
splitCost[range].loads += weight;
|
||||
else {
|
||||
ControlEdge* successorsEnd = node.getSuccessorsEnd();
|
||||
for (ControlEdge* successorsPtr = node.getSuccessorsBegin(); successorsPtr < successorsEnd; successorsPtr++)
|
||||
splitCost[range].loads += doLog10(successorsPtr->getTarget().loopDepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (definePtr = defineBegin; definePtr < defineEnd; definePtr++)
|
||||
if (definePtr->isRegister())
|
||||
live.clear(name2range[definePtr->getRegisterName()]);
|
||||
|
||||
for (usePtr = useBegin; usePtr < useEnd; usePtr++)
|
||||
if (usePtr->isRegister())
|
||||
live.set(name2range[usePtr->getRegisterName()]);
|
||||
}
|
||||
}
|
||||
|
||||
NameLinkedList** splitAround = new(pool) NameLinkedList*[rangeCount];
|
||||
memset(splitAround, '\0', rangeCount * sizeof(NameLinkedList*));
|
||||
registerAllocator.splitAround = splitAround;
|
||||
|
||||
registerAllocator.splitCost = splitCost;
|
||||
registerAllocator.splitFound = false;
|
||||
}
|
||||
|
||||
#endif // _SPLITS_H_
|
||||
186
mozilla/ef/Compiler/RegisterAllocator/Timer.cpp
Normal file
186
mozilla/ef/Compiler/RegisterAllocator/Timer.cpp
Normal file
@@ -0,0 +1,186 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "HashTable.h"
|
||||
#include "Timer.h"
|
||||
#include "Pool.h"
|
||||
|
||||
static Pool pool; // Pool for the Timer class.
|
||||
static HashTable<TimerEntry*> timerEntries(pool); // Timers hashtable.
|
||||
|
||||
const nTimersInABlock = 128; // Number of timers in a block.
|
||||
static PRTime *timers = new(pool) PRTime[nTimersInABlock]; // A block of timers.
|
||||
static Uint8 nextTimer = 0; // nextAvailableTimer.
|
||||
|
||||
//
|
||||
// Calibrate the call to PR_Now().
|
||||
//
|
||||
static PRTime calibrate()
|
||||
{
|
||||
PRTime t = PR_Now();
|
||||
PRTime& a = *new(pool) PRTime();
|
||||
|
||||
// Call 10 times the PR_Now() function.
|
||||
a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now();
|
||||
a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now(); a = PR_Now();
|
||||
t = (PR_Now() - t + 9) / 10;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
static PRTime adjust = calibrate();
|
||||
|
||||
//
|
||||
// Return the named timer..
|
||||
//
|
||||
TimerEntry& Timer::getTimerEntry(const char* name)
|
||||
{
|
||||
if (!timerEntries.exists(name)) {
|
||||
TimerEntry* newEntry = new(pool) TimerEntry();
|
||||
newEntry->accumulator = 0;
|
||||
newEntry->running = false;
|
||||
timerEntries.add(name, newEntry);
|
||||
}
|
||||
|
||||
return *timerEntries[name];
|
||||
}
|
||||
|
||||
//
|
||||
// Return a reference to a new timer.
|
||||
//
|
||||
PRTime& Timer::getNewTimer()
|
||||
{
|
||||
if (nextTimer >= nTimersInABlock) {
|
||||
timers = new(pool) PRTime[nTimersInABlock];
|
||||
nextTimer = 0;
|
||||
}
|
||||
return timers[nextTimer++];
|
||||
}
|
||||
|
||||
static Uint32 timersAreFrozen = 0;
|
||||
|
||||
//
|
||||
// Start the named timer.
|
||||
//
|
||||
void Timer::start(const char* name)
|
||||
{
|
||||
if (timersAreFrozen)
|
||||
return;
|
||||
|
||||
freezeTimers();
|
||||
|
||||
TimerEntry& timer = getTimerEntry(name);
|
||||
PR_ASSERT(!timer.running);
|
||||
|
||||
timer.accumulator = 0;
|
||||
timer.running = true;
|
||||
timer.done = false;
|
||||
|
||||
unfreezeTimers();
|
||||
}
|
||||
|
||||
//
|
||||
// Stop the named timer.
|
||||
//
|
||||
void Timer::stop(const char* name)
|
||||
{
|
||||
if (timersAreFrozen)
|
||||
return;
|
||||
|
||||
freezeTimers();
|
||||
|
||||
TimerEntry& timer = getTimerEntry(name);
|
||||
PR_ASSERT(timer.running);
|
||||
timer.running = false;
|
||||
timer.done = true;
|
||||
|
||||
unfreezeTimers();
|
||||
}
|
||||
|
||||
//
|
||||
// Freeze all the running timers.
|
||||
//
|
||||
void Timer::freezeTimers()
|
||||
{
|
||||
PRTime when = PR_Now() - adjust;
|
||||
|
||||
if (timersAreFrozen == 0) {
|
||||
Vector<TimerEntry*> entries = timerEntries;
|
||||
Uint32 count = entries.size();
|
||||
|
||||
for (Uint32 i = 0; i < count; i++) {
|
||||
TimerEntry& entry = *entries[i];
|
||||
if (entry.running) {
|
||||
entry.accumulator += (when - *entry.startTime);
|
||||
}
|
||||
}
|
||||
}
|
||||
timersAreFrozen++;
|
||||
}
|
||||
|
||||
//
|
||||
// Unfreeze all the running timers.
|
||||
//
|
||||
void Timer::unfreezeTimers()
|
||||
{
|
||||
PR_ASSERT(timersAreFrozen != 0);
|
||||
timersAreFrozen--;
|
||||
|
||||
if (timersAreFrozen == 0) {
|
||||
Vector<TimerEntry *> entries = timerEntries;
|
||||
Uint32 count = entries.size();
|
||||
|
||||
PRTime& newStart = getNewTimer();
|
||||
|
||||
for (Uint32 i = 0; i < count; i++) {
|
||||
TimerEntry& entry = *entries[i];
|
||||
if (entry.running) {
|
||||
entry.startTime = &newStart;
|
||||
}
|
||||
}
|
||||
|
||||
newStart = PR_Now();
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Print the named timer in the file f.
|
||||
//
|
||||
void Timer::print(FILE* f, const char *name)
|
||||
{
|
||||
if (timersAreFrozen)
|
||||
return;
|
||||
|
||||
freezeTimers();
|
||||
|
||||
TimerEntry& timer = getTimerEntry(name);
|
||||
|
||||
PR_ASSERT(timer.done);
|
||||
PRTime elapsed = timer.accumulator;
|
||||
|
||||
if (elapsed >> 32) {
|
||||
fprintf(f, "[timer %s out of range]\n", name);
|
||||
} else {
|
||||
fprintf(f, "[%dus in %s]\n", Uint32(elapsed), name);
|
||||
}
|
||||
fflush(f);
|
||||
|
||||
unfreezeTimers();
|
||||
}
|
||||
|
||||
80
mozilla/ef/Compiler/RegisterAllocator/Timer.h
Normal file
80
mozilla/ef/Compiler/RegisterAllocator/Timer.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _TIMER_H_
|
||||
#define _TIMER_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "HashTable.h"
|
||||
#include "prtime.h"
|
||||
|
||||
//
|
||||
// Naming convention:
|
||||
// As the class Timer contains only static methods, the timer's name should start with the
|
||||
// module name. Otherwise starting 2 timers with the same name will assert.
|
||||
//
|
||||
|
||||
#ifndef NO_TIMER
|
||||
|
||||
struct TimerEntry
|
||||
{
|
||||
PRTime *startTime; // Current time when we start the timer.
|
||||
PRTime accumulator; // Time spent in this timer.
|
||||
bool running; // True if the timer is running.
|
||||
bool done; // True if the timer was running and was stopped.
|
||||
};
|
||||
|
||||
class Timer
|
||||
{
|
||||
private:
|
||||
|
||||
// Return the named timer.
|
||||
static TimerEntry& getTimerEntry(const char* name);
|
||||
// Return a reference to a new Timer.
|
||||
static PRTime& getNewTimer();
|
||||
|
||||
public:
|
||||
|
||||
// Start the timer.
|
||||
static void start(const char* name);
|
||||
// Stop the timer.
|
||||
static void stop(const char* name);
|
||||
// Freeze all the running timers.
|
||||
static void freezeTimers();
|
||||
// Unfreeze all the running timers.
|
||||
static void unfreezeTimers();
|
||||
// Print the timer.
|
||||
static void print(FILE* f, const char *name);
|
||||
};
|
||||
|
||||
inline void startTimer(const char* name) {Timer::start(name);}
|
||||
inline void stopTimer(const char* name) {Timer::stop(name); Timer::print(stdout, name);}
|
||||
#define START_TIMER_SAFE Timer::freezeTimers();
|
||||
#define END_TIMER_SAFE Timer::unfreezeTimers();
|
||||
#define TIMER_SAFE(x) START_TIMER_SAFE x; END_TIMER_SAFE
|
||||
|
||||
#else /* NO_TIMER */
|
||||
|
||||
inline void startTimer(const char* /*name*/) {}
|
||||
inline void stopTimer(const char* /*name*/) {}
|
||||
#define START_TIMER_SAFE
|
||||
#define END_TIMER_SAFE
|
||||
#define TIMER_SAFE(x) x;
|
||||
|
||||
#endif /* NO_TIMER */
|
||||
#endif /* _TIMER_H_ */
|
||||
40
mozilla/ef/Compiler/RegisterAllocator/VirtualRegister.cpp
Normal file
40
mozilla/ef/Compiler/RegisterAllocator/VirtualRegister.cpp
Normal file
@@ -0,0 +1,40 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "VirtualRegister.h"
|
||||
#include "Instruction.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// VirtualRegister -
|
||||
|
||||
#ifdef MANUAL_TEMPLATES
|
||||
template class IndexedPool<VirtualRegister>;
|
||||
#endif
|
||||
|
||||
// Set the defining instruction.
|
||||
//
|
||||
void VirtualRegister::setDefiningInstruction(Instruction& instruction)
|
||||
{
|
||||
if (definingInstruction != NULL) {
|
||||
if ((instruction.getFlags() & ifCopy) && (definingInstruction->getFlags() & ifPhiNode))
|
||||
return;
|
||||
}
|
||||
definingInstruction = &instruction;
|
||||
}
|
||||
|
||||
116
mozilla/ef/Compiler/RegisterAllocator/VirtualRegister.h
Normal file
116
mozilla/ef/Compiler/RegisterAllocator/VirtualRegister.h
Normal file
@@ -0,0 +1,116 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _VIRTUAL_REGISTER_H_
|
||||
#define _VIRTUAL_REGISTER_H_
|
||||
|
||||
#include "Fundamentals.h"
|
||||
#include "IndexedPool.h"
|
||||
#include <string.h>
|
||||
|
||||
#include "RegisterTypes.h"
|
||||
#include "RegisterClass.h"
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// VirtualRegister - 24b
|
||||
|
||||
class Instruction;
|
||||
|
||||
class VirtualRegister : public IndexedObject<VirtualRegister>
|
||||
{
|
||||
public:
|
||||
|
||||
Instruction* definingInstruction; // Instruction defining this VR.
|
||||
|
||||
// Initialize a VR of the given classKind.
|
||||
VirtualRegister(RegisterClassKind /*classKind*/) : definingInstruction(NULL) {}
|
||||
|
||||
// Return the defining instruction for this VR.
|
||||
Instruction* getDefiningInstruction() const {return definingInstruction;}
|
||||
// Set the defining instruction.
|
||||
void setDefiningInstruction(Instruction& insn);
|
||||
};
|
||||
|
||||
// Return true if the VirtualRegisters are equals. The only way 2 VRs can be equal is if
|
||||
// they have the same index. If they have the same index then they are at the same
|
||||
// address in the indexed pool.
|
||||
//
|
||||
inline bool operator == (const VirtualRegister& regA, const VirtualRegister& regB) {return ®A == ®B;}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// VirtualRegisterManager -
|
||||
|
||||
struct PreColoredRegister
|
||||
{
|
||||
RegisterID id;
|
||||
RegisterName color;
|
||||
};
|
||||
|
||||
class VirtualRegisterManager
|
||||
{
|
||||
private:
|
||||
|
||||
IndexedPool<VirtualRegister> registerPool;
|
||||
PreColoredRegister machineRegister[6];
|
||||
|
||||
public:
|
||||
VirtualRegisterManager()
|
||||
{
|
||||
for (Uint32 i = 0; i < 6; i++)
|
||||
machineRegister[i].id = invalidID;
|
||||
}
|
||||
|
||||
// Return the VirtualRegister at the given index.
|
||||
VirtualRegister& getVirtualRegister(RegisterName name) const {return registerPool.get(name);}
|
||||
|
||||
// Return a new VirtualRegister.
|
||||
RegisterID newVirtualRegister(RegisterClassKind classKind)
|
||||
{
|
||||
VirtualRegister& vReg = *new(registerPool) VirtualRegister(classKind);
|
||||
RegisterID rid;
|
||||
|
||||
setName(rid, RegisterName(vReg.getIndex()));
|
||||
setClass(rid, classKind);
|
||||
return rid;
|
||||
}
|
||||
|
||||
RegisterID newMachineRegister(RegisterName name, RegisterClassKind classKind)
|
||||
{
|
||||
RegisterID rid = machineRegister[name].id;
|
||||
|
||||
if (rid == invalidID) {
|
||||
rid = newVirtualRegister(classKind);
|
||||
DEBUG_ONLY(setMachineRegister(rid));
|
||||
machineRegister[name].id = rid;
|
||||
machineRegister[name].color = name;
|
||||
}
|
||||
|
||||
return rid;
|
||||
}
|
||||
|
||||
PreColoredRegister* getMachineRegistersBegin() const {return (PreColoredRegister*) machineRegister;} // FIX
|
||||
PreColoredRegister* getMachineRegistersEnd() const {return (PreColoredRegister*) &machineRegister[6];} // FIX
|
||||
|
||||
// Return the VirtualRegister universe size.
|
||||
Uint32 getSize() {return registerPool.getSize();}
|
||||
|
||||
void setSize(Uint32 size) {registerPool.setSize(size);}
|
||||
};
|
||||
|
||||
#endif // _VIRTUAL_REGISTER_H_
|
||||
@@ -1,152 +0,0 @@
|
||||
#
|
||||
# ***** BEGIN LICENSE BLOCK *****
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is the Netscape security libraries.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1994-2000
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Dr Vipul Gupta <vipul.gupta@sun.com> and
|
||||
# Douglas Stebila <douglas@stebila.ca>, Sun Microsystems
|
||||
# Laboratories
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ***** END LICENSE BLOCK *****
|
||||
CORE_DEPTH = ../../..
|
||||
|
||||
MODULE = nss
|
||||
|
||||
ifndef FREEBL_RECURSIVE_BUILD
|
||||
LIBRARY_NAME = freebl
|
||||
else
|
||||
ifdef USE_PURE_32
|
||||
CORE_DEPTH = ../../../..
|
||||
LIBRARY_NAME = freebl_pure32
|
||||
else
|
||||
LIBRARY_NAME = freebl_hybrid
|
||||
endif
|
||||
endif
|
||||
|
||||
# same version as rest of freebl
|
||||
LIBRARY_VERSION = _3
|
||||
|
||||
DEFINES += -DSHLIB_SUFFIX=\"$(DLL_SUFFIX)\" -DSHLIB_PREFIX=\"$(DLL_PREFIX)\"
|
||||
|
||||
REQUIRES =
|
||||
|
||||
EXPORTS = \
|
||||
blapit.h \
|
||||
shsign.h \
|
||||
ecl-exp.h \
|
||||
$(NULL)
|
||||
|
||||
PRIVATE_EXPORTS = \
|
||||
blapi.h \
|
||||
secmpi.h \
|
||||
secrng.h \
|
||||
ec.h \
|
||||
ecl.h \
|
||||
ecl-curve.h \
|
||||
$(NULL)
|
||||
|
||||
MPI_HDRS = mpi-config.h mpi.h mpi-priv.h mplogic.h mpprime.h logtab.h mp_gf2m.h
|
||||
MPI_SRCS = mpprime.c mpmontg.c mplogic.c mpi.c mp_gf2m.c mpcpucache.c
|
||||
ECL_HDRS = ecl-exp.h ecl.h ec2.h ecp.h ecl-priv.h
|
||||
ifdef NSS_ENABLE_ECC
|
||||
ECL_SRCS = ecl.c ecl_curve.c ecl_mult.c ecl_gf.c \
|
||||
ec2_aff.c ec2_mont.c ec2_proj.c \
|
||||
ec2_163.c ec2_193.c ec2_233.c \
|
||||
ecp_aff.c ecp_jac.c ecp_mont.c \
|
||||
ecp_192.c ecp_224.c \
|
||||
ec_naf.c ecp_jm.c
|
||||
else
|
||||
ECL_SRCS = $(NULL)
|
||||
endif
|
||||
|
||||
CSRCS = \
|
||||
ldvector.c \
|
||||
prng_fips1861.c \
|
||||
sysrand.c \
|
||||
sha_fast.c \
|
||||
md2.c \
|
||||
md5.c \
|
||||
sha512.c \
|
||||
alg2268.c \
|
||||
arcfour.c \
|
||||
arcfive.c \
|
||||
desblapi.c \
|
||||
des.c \
|
||||
rijndael.c \
|
||||
aeskeywrap.c \
|
||||
dh.c \
|
||||
ec.c \
|
||||
pqg.c \
|
||||
dsa.c \
|
||||
rsa.c \
|
||||
shvfy.c \
|
||||
$(MPI_SRCS) \
|
||||
$(ECL_SRCS) \
|
||||
$(NULL)
|
||||
|
||||
ALL_CSRCS := $(CSRCS)
|
||||
|
||||
ALL_HDRS = \
|
||||
blapi.h \
|
||||
blapit.h \
|
||||
des.h \
|
||||
ec.h \
|
||||
loader.h \
|
||||
rijndael.h \
|
||||
secmpi.h \
|
||||
sha.h \
|
||||
sha_fast.h \
|
||||
shsign.h \
|
||||
vis_proto.h \
|
||||
$(NULL)
|
||||
|
||||
ifdef NSS_ENABLE_ECC
|
||||
DEFINES += -DNSS_ENABLE_ECC
|
||||
endif
|
||||
|
||||
ifdef AES_GEN_TBL
|
||||
DEFINES += -DRIJNDAEL_GENERATE_TABLES
|
||||
else
|
||||
ifdef AES_GEN_TBL_M
|
||||
DEFINES += -DRIJNDAEL_GENERATE_TABLES_MACRO
|
||||
else
|
||||
ifdef AES_GEN_VAL
|
||||
DEFINES += -DRIJNDAEL_GENERATE_VALUES
|
||||
else
|
||||
ifdef AES_GEN_VAL_M
|
||||
DEFINES += -DRIJNDAEL_GENERATE_VALUES_MACRO
|
||||
else
|
||||
DEFINES += -DRIJNDAEL_INCLUDE_TABLES
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
@@ -1,278 +0,0 @@
|
||||
#
|
||||
# Makefile for MPI library
|
||||
|
||||
# ***** BEGIN LICENSE BLOCK *****
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Netscape Communications Corporation
|
||||
# Richard C. Swift (swift@netscape.com)
|
||||
# Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ***** END LICENSE BLOCK *****
|
||||
|
||||
#
|
||||
# $Id: Makefile,v 1.21 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
#
|
||||
|
||||
## Define CC to be the C compiler you wish to use. The GNU cc
|
||||
## compiler (gcc) should work, at the very least
|
||||
#CC=cc
|
||||
#CC=gcc
|
||||
|
||||
##
|
||||
## Define PERL to point to your local Perl interpreter. It
|
||||
## should be Perl 5.x, although it's conceivable that Perl 4
|
||||
## might work ... I haven't tested it.
|
||||
##
|
||||
#PERL=/usr/bin/perl
|
||||
PERL=perl
|
||||
|
||||
include target.mk
|
||||
|
||||
CFLAGS+= $(XCFLAGS)
|
||||
|
||||
##
|
||||
## Define LIBS to include any libraries you need to link against.
|
||||
## If NO_TABLE is define, LIBS should include '-lm' or whatever is
|
||||
## necessary to bring in the math library. Otherwise, it can be
|
||||
## left alone, unless your system has other peculiar requirements.
|
||||
##
|
||||
LIBS=#-lmalloc#-lefence#-lm
|
||||
|
||||
##
|
||||
## Define RANLIB to be the library header randomizer; you might not
|
||||
## need this on some systems (just set it to 'echo' on these systems,
|
||||
## such as IRIX)
|
||||
##
|
||||
RANLIB=echo
|
||||
|
||||
##
|
||||
## This is the version string used for the documentation and
|
||||
## building the distribution tarball. Don't mess with it unless
|
||||
## you are releasing a new version
|
||||
VERS=1.7p6
|
||||
|
||||
## ----------------------------------------------------------------------
|
||||
## You probably don't need to change anything below this line...
|
||||
##
|
||||
|
||||
##
|
||||
## This is the list of source files that need to be packed into
|
||||
## the distribution file
|
||||
SRCS= mpi.c mpprime.c mplogic.c mp_gf2m.c mpmontg.c mpi-test.c primes.c tests/ \
|
||||
utils/gcd.c utils/invmod.c utils/lap.c \
|
||||
utils/ptab.pl utils/sieve.c utils/isprime.c\
|
||||
utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
|
||||
utils/bbsrand.c utils/prng.c utils/primegen.c \
|
||||
utils/basecvt.c utils/makeprime.c\
|
||||
utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
|
||||
utils/mpi.h utils/mpprime.h mulsqr.c \
|
||||
make-test-arrays test-arrays.txt all-tests make-logtab \
|
||||
types.pl stats timetest multest
|
||||
|
||||
## These are the header files that go into the distribution file
|
||||
HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h mp_gf2m.h \
|
||||
mp_gf2m-priv.h utils/bbs_rand.h tests/mpi.h tests/mpprime.h
|
||||
|
||||
## These are the documentation files that go into the distribution file
|
||||
DOCS=README doc utils/README utils/PRIMES
|
||||
|
||||
## This is the list of tools built by 'make tools'
|
||||
TOOLS=gcd invmod isprime lap dec2hex hex2dec primegen prng \
|
||||
basecvt fact exptmod pi makeprime identest
|
||||
|
||||
LIBOBJS = mpprime.o mpmontg.o mplogic.o mp_gf2m.o mpi.o $(AS_OBJS)
|
||||
LIBHDRS = mpi-config.h mpi-priv.h mpi.h
|
||||
APPHDRS = mpi-config.h mpi.h mplogic.h mp_gf2m.h mpprime.h
|
||||
|
||||
help:
|
||||
@ echo ""
|
||||
@ echo "The following targets can be built with this Makefile:"
|
||||
@ echo ""
|
||||
@ echo "libmpi.a - arithmetic and prime testing library"
|
||||
@ echo "mpi-test - test driver (requires MP_IOFUNC)"
|
||||
@ echo "tools - command line tools"
|
||||
@ echo "doc - manual pages for tools"
|
||||
@ echo "clean - clean up objects and such"
|
||||
@ echo "distclean - get ready for distribution"
|
||||
@ echo "dist - distribution tarball"
|
||||
@ echo ""
|
||||
|
||||
.SUFFIXES: .c .o .i
|
||||
|
||||
.c.i:
|
||||
$(CC) $(CFLAGS) -E $< > $@
|
||||
|
||||
#.c.o: $*.h $*.c
|
||||
# $(CC) $(CFLAGS) -c $<
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
$(LIBOBJS): $(LIBHDRS)
|
||||
|
||||
logtab.h: make-logtab
|
||||
$(PERL) make-logtab > logtab.h
|
||||
|
||||
mpi.o: mpi.c logtab.h $(LIBHDRS)
|
||||
|
||||
mplogic.o: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
|
||||
|
||||
mp_gf2m.o: mp_gf2m.c mpi-priv.h mp_gf2m.h mp_gf2m-priv.h $(LIBHDRS)
|
||||
|
||||
mpmontg.o: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
|
||||
|
||||
mpprime.o: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
|
||||
|
||||
mpi_mips.o: mpi_mips.s
|
||||
$(CC) -o $@ $(ASFLAGS) -c mpi_mips.s
|
||||
|
||||
mpi_sparc.o : montmulf.h
|
||||
|
||||
mpv_sparcv9.s: vis_64.il mpv_sparc.c
|
||||
$(CC) -o $@ $(SOLARIS_FPU_FLAGS) -S vis_64.il mpv_sparc.c
|
||||
|
||||
mpv_sparcv8.s: vis_64.il mpv_sparc.c
|
||||
$(CC) -o $@ $(SOLARIS_FPU_FLAGS) -S vis_32.il mpv_sparc.c
|
||||
|
||||
montmulfv8.o montmulfv9.o mpv_sparcv8.o mpv_sparcv9.o : %.o : %.s
|
||||
$(CC) -o $@ $(SOLARIS_ASM_FLAGS) -c $<
|
||||
|
||||
# This rule is used to build the .s sources, which are then hand optimized.
|
||||
#montmulfv8.s montmulfv9.s : montmulf%.s : montmulf%.il montmulf.c montmulf.h
|
||||
# $(CC) -o $@ $(SOLARIS_ASM_FLAGS) -S montmulf$*.il montmulf.c
|
||||
|
||||
|
||||
libmpi.a: $(LIBOBJS)
|
||||
ar -cvr libmpi.a $(LIBOBJS)
|
||||
$(RANLIB) libmpi.a
|
||||
|
||||
lib libs: libmpi.a
|
||||
|
||||
mpi.i: mpi.h
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
MPTESTOBJS = mptest1.o mptest2.o mptest3.o mptest3a.o mptest4.o mptest4a.o \
|
||||
mptest4b.o mptest6.o mptest7.o mptest8.o mptest9.o mptestb.o
|
||||
MPTESTS = $(MPTESTOBJS:.o=)
|
||||
|
||||
$(MPTESTOBJS): mptest%.o: tests/mptest-%.c $(LIBHDRS)
|
||||
$(CC) $(CFLAGS) -o $@ -c $<
|
||||
|
||||
$(MPTESTS): mptest%: mptest%.o libmpi.a
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
tests: mptest1 mptest2 mptest3 mptest3a mptest4 mptest4a mptest4b mptest6 \
|
||||
mptestb bbsrand
|
||||
|
||||
utests: mptest7 mptest8 mptest9
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
EXTRAOBJS = bbsrand.o bbs_rand.o prng.o
|
||||
UTILOBJS = primegen.o metime.o identest.o basecvt.o fact.o exptmod.o pi.o \
|
||||
makeprime.o gcd.o invmod.o lap.o isprime.o \
|
||||
dec2hex.o hex2dec.o
|
||||
UTILS = $(UTILOBJS:.o=)
|
||||
|
||||
$(UTILS): % : %.o libmpi.a
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
$(UTILOBJS) $(EXTRAOBJS): %.o : utils/%.c $(LIBHDRS)
|
||||
$(CC) $(CFLAGS) -o $@ -c $<
|
||||
|
||||
prng: prng.o bbs_rand.o libmpi.a
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
bbsrand: bbsrand.o bbs_rand.o libmpi.a
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
utils: $(UTILS) prng bbsrand
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
test-info.c: test-arrays.txt
|
||||
$(PERL) make-test-arrays test-arrays.txt > test-info.c
|
||||
|
||||
mpi-test.o: mpi-test.c test-info.c $(LIBHDRS)
|
||||
$(CC) $(CFLAGS) -o $@ -c $<
|
||||
|
||||
mpi-test: mpi-test.o libmpi.a
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
mdxptest.o: mdxptest.c $(LIBHDRS) mpi-priv.h
|
||||
|
||||
mdxptest: mdxptest.o libmpi.a
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
mulsqr.o: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h
|
||||
$(CC) $(CFLAGS) -DMP_SQUARE=1 -o $@ -c mulsqr.c
|
||||
|
||||
mulsqr: mulsqr.o libmpi.a
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
alltests: tests utests mpi-test
|
||||
|
||||
tools: $(TOOLS)
|
||||
|
||||
doc:
|
||||
(cd doc; ./build)
|
||||
|
||||
clean:
|
||||
rm -f *.o *.a *.i
|
||||
rm -f core
|
||||
rm -f *~ .*~
|
||||
rm -f utils/*.o
|
||||
rm -f utils/core
|
||||
rm -f utils/*~ utils/.*~
|
||||
|
||||
clobber: clean
|
||||
rm -f $(TOOLS) $(UTILS)
|
||||
|
||||
distclean: clean
|
||||
rm -f mptest? mpi-test metime mulsqr karatsuba
|
||||
rm -f mptest?a mptest?b
|
||||
rm -f utils/mptest?
|
||||
rm -f test-info.c logtab.h
|
||||
rm -f libmpi.a
|
||||
rm -f $(TOOLS)
|
||||
|
||||
dist: Makefile $(HDRS) $(SRCS) $(DOCS)
|
||||
tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
|
||||
pgps -ab mpi-$(VERS).tar
|
||||
chmod +r mpi-$(VERS).tar.asc
|
||||
gzip -9 mpi-$(VERS).tar
|
||||
|
||||
# END
|
||||
@@ -1,280 +0,0 @@
|
||||
#
|
||||
# Makefile.win - gmake Makefile for building MPI with VACPP on OS/2
|
||||
#
|
||||
# ***** BEGIN LICENSE BLOCK *****
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Netscape Communications Corporation
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ***** END LICENSE BLOCK *****
|
||||
|
||||
#
|
||||
# $Id: Makefile.os2,v 1.3 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
#
|
||||
|
||||
## Define CC to be the C compiler you wish to use. The GNU cc
|
||||
## compiler (gcc) should work, at the very least
|
||||
#CC=cc
|
||||
#CC=gcc
|
||||
CC=icc.exe
|
||||
AS=alp.exe
|
||||
|
||||
##
|
||||
## Define PERL to point to your local Perl interpreter. It
|
||||
## should be Perl 5.x, although it's conceivable that Perl 4
|
||||
## might work ... I haven't tested it.
|
||||
##
|
||||
#PERL=/usr/bin/perl
|
||||
PERL=perl
|
||||
|
||||
##
|
||||
## Define CFLAGS to contain any local options your compiler
|
||||
## setup requires.
|
||||
##
|
||||
## Conditional compilation options are no longer here; see
|
||||
## the file 'mpi-config.h' instead.
|
||||
##
|
||||
MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
|
||||
|
||||
#OS/2
|
||||
AS_SRCS = mpi_x86.asm
|
||||
MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
|
||||
#CFLAGS= -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC \
|
||||
-DDEBUG -D_DEBUG -UNDEBUG -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
|
||||
#CFLAGS = -O2 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
|
||||
-DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
|
||||
#CFLAGS = -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
|
||||
-DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
|
||||
CFLAGS = /Ti+ -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
|
||||
$(MPICMN)
|
||||
ASFLAGS =
|
||||
|
||||
##
|
||||
## Define LIBS to include any libraries you need to link against.
|
||||
## If NO_TABLE is define, LIBS should include '-lm' or whatever is
|
||||
## necessary to bring in the math library. Otherwise, it can be
|
||||
## left alone, unless your system has other peculiar requirements.
|
||||
##
|
||||
LIBS=#-lmalloc#-lefence#-lm
|
||||
|
||||
##
|
||||
## Define RANLIB to be the library header randomizer; you might not
|
||||
## need this on some systems (just set it to 'echo' on these systems,
|
||||
## such as IRIX)
|
||||
##
|
||||
RANLIB=echo
|
||||
|
||||
##
|
||||
## This is the version string used for the documentation and
|
||||
## building the distribution tarball. Don't mess with it unless
|
||||
## you are releasing a new version
|
||||
VERS=1.7p6
|
||||
|
||||
## ----------------------------------------------------------------------
|
||||
## You probably don't need to change anything below this line...
|
||||
##
|
||||
|
||||
##
|
||||
## This is the list of source files that need to be packed into
|
||||
## the distribution file
|
||||
SRCS= mpi.c mpprime.c mplogic.c mpmontg.c mpi-test.c primes.c tests/ \
|
||||
utils/gcd.c utils/invmod.c utils/lap.c \
|
||||
utils/ptab.pl utils/sieve.c utils/isprime.c\
|
||||
utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
|
||||
utils/bbsrand.c utils/prng.c utils/primegen.c \
|
||||
utils/basecvt.c utils/makeprime.c\
|
||||
utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
|
||||
utils/mpi.h utils/mpprime.h mulsqr.c \
|
||||
make-test-arrays test-arrays.txt all-tests make-logtab \
|
||||
types.pl stats timetest multest
|
||||
|
||||
## These are the header files that go into the distribution file
|
||||
HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h \
|
||||
utils/bbs_rand.h tests/mpi.h tests/mpprime.h
|
||||
|
||||
## These are the documentation files that go into the distribution file
|
||||
DOCS=README doc utils/README utils/PRIMES
|
||||
|
||||
## This is the list of tools built by 'make tools'
|
||||
TOOLS=gcd.exe invmod.exe isprime.exe lap.exe dec2hex.exe hex2dec.exe \
|
||||
primegen.exe prng.exe basecvt.exe fact.exe exptmod.exe pi.exe makeprime.exe
|
||||
|
||||
AS_OBJS = $(AS_SRCS:.asm=.obj)
|
||||
LIBOBJS = mpprime.obj mpmontg.obj mplogic.obj mpi.obj $(AS_OBJS)
|
||||
LIBHDRS = mpi-config.h mpi-priv.h mpi.h
|
||||
APPHDRS = mpi-config.h mpi.h mplogic.h mpprime.h
|
||||
|
||||
|
||||
help:
|
||||
@ echo ""
|
||||
@ echo "The following targets can be built with this Makefile:"
|
||||
@ echo ""
|
||||
@ echo "mpi.lib - arithmetic and prime testing library"
|
||||
@ echo "mpi-test.exe - test driver (requires MP_IOFUNC)"
|
||||
@ echo "tools - command line tools"
|
||||
@ echo "doc - manual pages for tools"
|
||||
@ echo "clean - clean up objects and such"
|
||||
@ echo "distclean - get ready for distribution"
|
||||
@ echo "dist - distribution tarball"
|
||||
@ echo ""
|
||||
|
||||
.SUFFIXES: .c .obj .i .lib .exe .asm
|
||||
|
||||
.c.i:
|
||||
$(CC) $(CFLAGS) -E $< > $@
|
||||
|
||||
.c.obj:
|
||||
$(CC) $(CFLAGS) -c $<
|
||||
|
||||
.asm.obj:
|
||||
$(AS) $(ASFLAGS) $<
|
||||
|
||||
.obj.exe:
|
||||
$(CC) $(CFLAGS) -Fo$@ $<
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
$(LIBOBJS): $(LIBHDRS)
|
||||
|
||||
logtab.h: make-logtab
|
||||
$(PERL) make-logtab > logtab.h
|
||||
|
||||
mpi.obj: mpi.c logtab.h $(LIBHDRS)
|
||||
|
||||
mplogic.obj: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
|
||||
|
||||
mpmontg.obj: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
|
||||
|
||||
mpprime.obj: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
|
||||
|
||||
mpi_mips.obj: mpi_mips.s
|
||||
$(CC) -Fo$@ $(ASFLAGS) -c mpi_mips.s
|
||||
|
||||
mpi.lib: $(LIBOBJS)
|
||||
ilib /out:mpi.lib $(LIBOBJS)
|
||||
$(RANLIB) mpi.lib
|
||||
|
||||
lib libs: mpi.lib
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
MPTESTOBJS = mptest1.obj mptest2.obj mptest3.obj mptest3a.obj mptest4.obj \
|
||||
mptest4a.obj mptest4b.obj mptest6.obj mptest7.obj mptest8.obj mptest9.obj
|
||||
MPTESTS = $(MPTESTOBJS:.obj=.exe)
|
||||
|
||||
$(MPTESTOBJS): mptest%.obj: tests/mptest-%.c $(LIBHDRS)
|
||||
$(CC) $(CFLAGS) -Fo$@ -c $<
|
||||
|
||||
$(MPTESTS): mptest%.exe: mptest%.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
tests: mptest1.exe mptest2.exe mptest3.exe mptest3a.exe mptest4.exe \
|
||||
mptest4a.exe mptest4b.exe mptest6.exe bbsrand.exe
|
||||
|
||||
utests: mptest7.exe mptest8.exe mptest9.exe
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
EXTRAOBJS = bbsrand.obj bbs_rand.obj prng.obj
|
||||
UTILOBJS = primegen.obj metime.obj identest.obj basecvt.obj fact.obj \
|
||||
exptmod.obj pi.obj makeprime.obj karatsuba.obj gcd.obj invmod.obj lap.obj \
|
||||
isprime.obj dec2hex.obj hex2dec.obj
|
||||
UTILS = $(UTILOBJS:.obj=.exe)
|
||||
|
||||
$(UTILS): %.exe : %.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
$(UTILOBJS) $(EXTRAOBJS): %.obj : utils/%.c $(LIBHDRS)
|
||||
$(CC) $(CFLAGS) -Fo$@ -c $<
|
||||
|
||||
prng.exe: prng.obj bbs_rand.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
bbsrand.exe: bbsrand.obj bbs_rand.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
utils: $(UTILS) prng.exe bbsrand.exe
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
test-info.c: test-arrays.txt
|
||||
$(PERL) make-test-arrays test-arrays.txt > test-info.c
|
||||
|
||||
mpi-test.obj: mpi-test.c test-info.c $(LIBHDRS)
|
||||
$(CC) $(CFLAGS) -Fo$@ -c $<
|
||||
|
||||
mpi-test.exe: mpi-test.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
mdxptest.obj: mdxptest.c $(LIBHDRS) mpi-priv.h
|
||||
|
||||
mdxptest.exe: mdxptest.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
mulsqr.obj: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h
|
||||
$(CC) $(CFLAGS) -DMP_SQUARE=1 -Fo$@ -c mulsqr.c
|
||||
|
||||
mulsqr.exe: mulsqr.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
alltests: tests utests mpi-test.exe
|
||||
|
||||
tools: $(TOOLS)
|
||||
|
||||
doc:
|
||||
(cd doc; ./build)
|
||||
|
||||
clean:
|
||||
rm -f *.obj *.lib *.pdb *.ilk
|
||||
cd utils; rm -f *.obj *.lib *.pdb *.ilk
|
||||
|
||||
distclean: clean
|
||||
rm -f mptest? mpi-test metime mulsqr karatsuba
|
||||
rm -f mptest?a mptest?b
|
||||
rm -f utils/mptest?
|
||||
rm -f test-info.c logtab.h
|
||||
rm -f mpi.lib
|
||||
rm -f $(TOOLS)
|
||||
|
||||
dist: Makefile $(HDRS) $(SRCS) $(DOCS)
|
||||
tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
|
||||
pgps -ab mpi-$(VERS).tar
|
||||
chmod +r mpi-$(VERS).tar.asc
|
||||
gzip -9 mpi-$(VERS).tar
|
||||
|
||||
|
||||
print:
|
||||
@echo LIBOBJS = $(LIBOBJS)
|
||||
# END
|
||||
@@ -1,280 +0,0 @@
|
||||
#
|
||||
# Makefile.win - gmake Makefile for building MPI with MSVC on NT
|
||||
|
||||
# ***** BEGIN LICENSE BLOCK *****
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Netscape Communications Corporation
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ***** END LICENSE BLOCK *****
|
||||
|
||||
#
|
||||
# $Id: Makefile.win,v 1.3 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
#
|
||||
|
||||
## Define CC to be the C compiler you wish to use. The GNU cc
|
||||
## compiler (gcc) should work, at the very least
|
||||
#CC=cc
|
||||
#CC=gcc
|
||||
CC=cl.exe
|
||||
AS=ml.exe
|
||||
|
||||
##
|
||||
## Define PERL to point to your local Perl interpreter. It
|
||||
## should be Perl 5.x, although it's conceivable that Perl 4
|
||||
## might work ... I haven't tested it.
|
||||
##
|
||||
#PERL=/usr/bin/perl
|
||||
PERL=perl
|
||||
|
||||
##
|
||||
## Define CFLAGS to contain any local options your compiler
|
||||
## setup requires.
|
||||
##
|
||||
## Conditional compilation options are no longer here; see
|
||||
## the file 'mpi-config.h' instead.
|
||||
##
|
||||
MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC
|
||||
|
||||
#NT
|
||||
AS_SRCS = mpi_x86.asm
|
||||
MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
|
||||
#CFLAGS= -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC \
|
||||
-DDEBUG -D_DEBUG -UNDEBUG -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
|
||||
#CFLAGS = -O2 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
|
||||
-DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
|
||||
#CFLAGS = -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
|
||||
-DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
|
||||
CFLAGS = -O2 -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
|
||||
-DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
|
||||
ASFLAGS = -Cp -Sn -Zi -coff -I.
|
||||
|
||||
##
|
||||
## Define LIBS to include any libraries you need to link against.
|
||||
## If NO_TABLE is define, LIBS should include '-lm' or whatever is
|
||||
## necessary to bring in the math library. Otherwise, it can be
|
||||
## left alone, unless your system has other peculiar requirements.
|
||||
##
|
||||
LIBS=#-lmalloc#-lefence#-lm
|
||||
|
||||
##
|
||||
## Define RANLIB to be the library header randomizer; you might not
|
||||
## need this on some systems (just set it to 'echo' on these systems,
|
||||
## such as IRIX)
|
||||
##
|
||||
RANLIB=echo
|
||||
|
||||
##
|
||||
## This is the version string used for the documentation and
|
||||
## building the distribution tarball. Don't mess with it unless
|
||||
## you are releasing a new version
|
||||
VERS=1.7p6
|
||||
|
||||
## ----------------------------------------------------------------------
|
||||
## You probably don't need to change anything below this line...
|
||||
##
|
||||
|
||||
##
|
||||
## This is the list of source files that need to be packed into
|
||||
## the distribution file
|
||||
SRCS= mpi.c mpprime.c mplogic.c mpmontg.c mpi-test.c primes.c tests/ \
|
||||
utils/gcd.c utils/invmod.c utils/lap.c \
|
||||
utils/ptab.pl utils/sieve.c utils/isprime.c\
|
||||
utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
|
||||
utils/bbsrand.c utils/prng.c utils/primegen.c \
|
||||
utils/basecvt.c utils/makeprime.c\
|
||||
utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
|
||||
utils/mpi.h utils/mpprime.h mulsqr.c \
|
||||
make-test-arrays test-arrays.txt all-tests make-logtab \
|
||||
types.pl stats timetest multest
|
||||
|
||||
## These are the header files that go into the distribution file
|
||||
HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h \
|
||||
utils/bbs_rand.h tests/mpi.h tests/mpprime.h
|
||||
|
||||
## These are the documentation files that go into the distribution file
|
||||
DOCS=README doc utils/README utils/PRIMES
|
||||
|
||||
## This is the list of tools built by 'make tools'
|
||||
TOOLS=gcd.exe invmod.exe isprime.exe lap.exe dec2hex.exe hex2dec.exe \
|
||||
primegen.exe prng.exe basecvt.exe fact.exe exptmod.exe pi.exe makeprime.exe
|
||||
|
||||
AS_OBJS = $(AS_SRCS:.asm=.obj)
|
||||
LIBOBJS = mpprime.obj mpmontg.obj mplogic.obj mpi.obj $(AS_OBJS)
|
||||
LIBHDRS = mpi-config.h mpi-priv.h mpi.h
|
||||
APPHDRS = mpi-config.h mpi.h mplogic.h mpprime.h
|
||||
|
||||
|
||||
help:
|
||||
@ echo ""
|
||||
@ echo "The following targets can be built with this Makefile:"
|
||||
@ echo ""
|
||||
@ echo "mpi.lib - arithmetic and prime testing library"
|
||||
@ echo "mpi-test - test driver (requires MP_IOFUNC)"
|
||||
@ echo "tools - command line tools"
|
||||
@ echo "doc - manual pages for tools"
|
||||
@ echo "clean - clean up objects and such"
|
||||
@ echo "distclean - get ready for distribution"
|
||||
@ echo "dist - distribution tarball"
|
||||
@ echo ""
|
||||
|
||||
.SUFFIXES: .c .obj .i .lib .exe .asm
|
||||
|
||||
.c.i:
|
||||
$(CC) $(CFLAGS) -E $< > $@
|
||||
|
||||
.c.obj:
|
||||
$(CC) $(CFLAGS) -c $<
|
||||
|
||||
.asm.obj:
|
||||
$(AS) $(ASFLAGS) -c $<
|
||||
|
||||
.obj.exe:
|
||||
$(CC) $(CFLAGS) -Fo$@ $<
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
$(LIBOBJS): $(LIBHDRS)
|
||||
|
||||
logtab.h: make-logtab
|
||||
$(PERL) make-logtab > logtab.h
|
||||
|
||||
mpi.obj: mpi.c logtab.h $(LIBHDRS)
|
||||
|
||||
mplogic.obj: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
|
||||
|
||||
mpmontg.obj: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
|
||||
|
||||
mpprime.obj: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
|
||||
|
||||
mpi_mips.obj: mpi_mips.s
|
||||
$(CC) -Fo$@ $(ASFLAGS) -c mpi_mips.s
|
||||
|
||||
mpi.lib: $(LIBOBJS)
|
||||
ar -cvr mpi.lib $(LIBOBJS)
|
||||
$(RANLIB) mpi.lib
|
||||
|
||||
lib libs: mpi.lib
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
MPTESTOBJS = mptest1.obj mptest2.obj mptest3.obj mptest3a.obj mptest4.obj \
|
||||
mptest4a.obj mptest4b.obj mptest6.obj mptest7.obj mptest8.obj mptest9.obj
|
||||
MPTESTS = $(MPTESTOBJS:.obj=.exe)
|
||||
|
||||
$(MPTESTOBJS): mptest%.obj: tests/mptest-%.c $(LIBHDRS)
|
||||
$(CC) $(CFLAGS) -Fo$@ -c $<
|
||||
|
||||
$(MPTESTS): mptest%.exe: mptest%.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
tests: mptest1.exe mptest2.exe mptest3.exe mptest3a.exe mptest4.exe \
|
||||
mptest4a.exe mptest4b.exe mptest6.exe bbsrand.exe
|
||||
|
||||
utests: mptest7.exe mptest8.exe mptest9.exe
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
EXTRAOBJS = bbsrand.obj bbs_rand.obj prng.obj
|
||||
UTILOBJS = primegen.obj metime.obj identest.obj basecvt.obj fact.obj \
|
||||
exptmod.obj pi.obj makeprime.obj karatsuba.obj gcd.obj invmod.obj lap.obj \
|
||||
isprime.obj dec2hex.obj hex2dec.obj
|
||||
UTILS = $(UTILOBJS:.obj=.exe)
|
||||
|
||||
$(UTILS): %.exe : %.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
$(UTILOBJS) $(EXTRAOBJS): %.obj : utils/%.c $(LIBHDRS)
|
||||
$(CC) $(CFLAGS) -Fo$@ -c $<
|
||||
|
||||
prng.exe: prng.obj bbs_rand.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
bbsrand.exe: bbsrand.obj bbs_rand.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
utils: $(UTILS) prng.exe bbsrand.exe
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
test-info.c: test-arrays.txt
|
||||
$(PERL) make-test-arrays test-arrays.txt > test-info.c
|
||||
|
||||
mpi-test.obj: mpi-test.c test-info.c $(LIBHDRS)
|
||||
$(CC) $(CFLAGS) -Fo$@ -c $<
|
||||
|
||||
mpi-test.exe: mpi-test.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
mdxptest.obj: mdxptest.c $(LIBHDRS) mpi-priv.h
|
||||
|
||||
mdxptest.exe: mdxptest.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
mulsqr.obj: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h
|
||||
$(CC) $(CFLAGS) -DMP_SQUARE=1 -Fo$@ -c mulsqr.c
|
||||
|
||||
mulsqr.exe: mulsqr.obj mpi.lib $(LIBS)
|
||||
$(CC) $(CFLAGS) -Fo$@ $^
|
||||
|
||||
#---------------------------------------
|
||||
|
||||
alltests: tests utests mpi-test.exe
|
||||
|
||||
tools: $(TOOLS)
|
||||
|
||||
doc:
|
||||
(cd doc; ./build)
|
||||
|
||||
clean:
|
||||
rm -f *.obj *.lib *.pdb *.ilk
|
||||
cd utils; rm -f *.obj *.lib *.pdb *.ilk
|
||||
|
||||
distclean: clean
|
||||
rm -f mptest? mpi-test metime mulsqr karatsuba
|
||||
rm -f mptest?a mptest?b
|
||||
rm -f utils/mptest?
|
||||
rm -f test-info.c logtab.h
|
||||
rm -f mpi.lib
|
||||
rm -f $(TOOLS)
|
||||
|
||||
dist: Makefile $(HDRS) $(SRCS) $(DOCS)
|
||||
tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
|
||||
pgps -ab mpi-$(VERS).tar
|
||||
chmod +r mpi-$(VERS).tar.asc
|
||||
gzip -9 mpi-$(VERS).tar
|
||||
|
||||
|
||||
print:
|
||||
@echo LIBOBJS = $(LIBOBJS)
|
||||
# END
|
||||
@@ -1,799 +0,0 @@
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
|
||||
The contents of this file are subject to the Mozilla Public License Version
|
||||
1.1 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.mozilla.org/MPL/
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
for the specific language governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
|
||||
library.
|
||||
|
||||
The Initial Developer of the Original Code is
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Portions created by the Initial Developer are Copyright (C) 1997-2000
|
||||
the Initial Developer. All Rights Reserved.
|
||||
|
||||
Contributor(s):
|
||||
|
||||
Alternatively, the contents of this file may be used under the terms of
|
||||
either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
of those above. If you wish to allow use of your version of this file only
|
||||
under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
use your version of this file under the terms of the MPL, indicate your
|
||||
decision by deleting the provisions above and replace them with the notice
|
||||
and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
the provisions above, a recipient may use your version of this file under
|
||||
the terms of any one of the MPL, the GPL or the LGPL.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
|
||||
About the MPI Library
|
||||
---------------------
|
||||
|
||||
The files 'mpi.h' and 'mpi.c' define a simple, arbitrary precision
|
||||
signed integer arithmetic package. The implementation is not the most
|
||||
efficient possible, but the code is small and should be fairly easily
|
||||
portable to just about any machine that supports an ANSI C compiler,
|
||||
as long as it is capable of at least 16-bit arithmetic (but also see
|
||||
below for more on this).
|
||||
|
||||
This library was written with an eye to cryptographic applications;
|
||||
thus, some care is taken to make sure that temporary values are not
|
||||
left lying around in memory when they are no longer in use. This adds
|
||||
some overhead for zeroing buffers before they are released back into
|
||||
the free pool; however, it gives you the assurance that there is only
|
||||
one copy of your important values residing in your process's address
|
||||
space at a time. Obviously, it is difficult to guarantee anything, in
|
||||
a pre-emptive multitasking environment, but this at least helps you
|
||||
keep a lid on the more obvious ways your data can get spread around in
|
||||
memory.
|
||||
|
||||
|
||||
Using the Library
|
||||
-----------------
|
||||
|
||||
To use the MPI library in your program, you must include the header:
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
This header provides all the type and function declarations you'll
|
||||
need to use the library. Almost all the names defined by the library
|
||||
begin with the prefix 'mp_', so it should be easy to keep them from
|
||||
clashing with your program's namespace (he says, glibly, knowing full
|
||||
well there are always pathological cases).
|
||||
|
||||
There are a few things you may want to configure about the library.
|
||||
By default, the MPI library uses an unsigned short for its digit type,
|
||||
and an unsigned int for its word type. The word type must be big
|
||||
enough to contain at least two digits, for the primitive arithmetic to
|
||||
work out. On my machine, a short is 2 bytes and an int is 4 bytes --
|
||||
but if you have 64-bit ints, you might want to use a 4-byte digit and
|
||||
an 8-byte word. I have tested the library using 1-byte digits and
|
||||
2-byte words, as well. Whatever you choose to do, the things you need
|
||||
to change are:
|
||||
|
||||
(1) The type definitions for mp_digit and mp_word.
|
||||
|
||||
(2) The macro DIGIT_FMT which tells mp_print() how to display a
|
||||
single digit. This is just a printf() format string, so you
|
||||
can adjust it appropriately.
|
||||
|
||||
(3) The macros DIGIT_MAX and MP_WORD_MAX, which specify the
|
||||
largest value expressible in an mp_digit and an mp_word,
|
||||
respectively.
|
||||
|
||||
Both the mp_digit and mp_word should be UNSIGNED integer types. The
|
||||
code relies on having the full positive precision of the type used for
|
||||
digits and words.
|
||||
|
||||
The remaining type definitions should be left alone, for the most
|
||||
part. The code in the library does not make any significant
|
||||
assumptions about the sizes of things, but there is little if any
|
||||
reason to change the other parameters, so I would recommend you leave
|
||||
them as you found them.
|
||||
|
||||
The library comes with a Perl script, 'types.pl', which will scan your
|
||||
current Makefile settings, and attempt to find good definitions for
|
||||
these types. It relies on a Unix sort of build environment, so it
|
||||
probably won't work under MacOS or Windows, but it can be convenient
|
||||
if you're porting to a new flavour of Unix. Just run 'types.pl' at
|
||||
the command line, and it will spit out its results to the standard
|
||||
output.
|
||||
|
||||
|
||||
Conventions
|
||||
-----------
|
||||
|
||||
Most functions in the library return a value of type mp_err. This
|
||||
permits the library to communicate success or various kinds of failure
|
||||
to the calling program. The return values currently defined are:
|
||||
|
||||
MP_OKAY - okay, operation succeeded, all's well
|
||||
MP_YES - okay, the answer is yes (same as MP_OKAY)
|
||||
MP_NO - okay, but answer is no (not MP_OKAY)
|
||||
MP_MEM - operation ran out of memory
|
||||
MP_RANGE - input parameter was out of range
|
||||
MP_BADARG - an invalid input parameter was provided
|
||||
MP_UNDEF - no output value is defined for this input
|
||||
|
||||
The only function which currently uses MP_UNDEF is mp_invmod().
|
||||
Division by zero is undefined, but the division functions will return
|
||||
MP_RANGE for a zero divisor. MP_BADARG usually means you passed a
|
||||
bogus mp_int structure to the function. MP_YES and MP_NO are not used
|
||||
by the library itself; they're defined so you can use them in your own
|
||||
extensions.
|
||||
|
||||
If you need a readable interpretation of these error codes in your
|
||||
program, you may also use the mp_strerror() function. This function
|
||||
takes an mp_err as input, and returns a pointer to a human-readable
|
||||
string describing the meaning of the error. These strings are stored
|
||||
as constants within the library, so the caller should not attempt to
|
||||
modify or free the memory associated with these strings.
|
||||
|
||||
The library represents values in signed-magnitude format. Values
|
||||
strictly less than zero are negative, all others are considered
|
||||
positive (zero is positive by fiat). You can access the 'sign' member
|
||||
of the mp_int structure directly, but better is to use the mp_cmp_z()
|
||||
function, to find out which side of zero the value lies on.
|
||||
|
||||
Most arithmetic functions have a single-digit variant, as well as the
|
||||
full arbitrary-precision. An mp_digit is an unsigned value between 0
|
||||
and DIGIT_MAX inclusive. The radix is available as RADIX. The number
|
||||
of bits in a given digit is given as DIGIT_BIT.
|
||||
|
||||
Generally, input parameters are given before output parameters.
|
||||
Unless otherwise specified, any input parameter can be re-used as an
|
||||
output parameter, without confusing anything.
|
||||
|
||||
The basic numeric type defined by the library is an mp_int. Virtually
|
||||
all the functions in the library take a pointer to an mp_int as one of
|
||||
their parameters. An explanation of how to create and use these
|
||||
<HR>
|
||||
<A NAME="p23">
|
||||
<H3>Problem 23:</H3>
|
||||
|
||||
structures follows. And so, without further ado...
|
||||
|
||||
|
||||
Initialization and Cleanup
|
||||
--------------------------
|
||||
|
||||
The basic numeric type defined by the library is an 'mp_int'.
|
||||
However, it is not sufficient to simply declare a variable of type
|
||||
mp_int in your program. These variables also need to be initialized
|
||||
before they can be used, to allocate the internal storage they require
|
||||
for computation.
|
||||
|
||||
This is done using one of the following functions:
|
||||
|
||||
mp_init(mp_int *mp);
|
||||
mp_init_copy(mp_int *mp, mp_int *from);
|
||||
mp_init_size(mp_int *mp, mp_size p);
|
||||
|
||||
Each of these requires a pointer to a structure of type mp_int. The
|
||||
basic mp_init() simply initializes the mp_int to a default size, and
|
||||
sets its value to zero. If you would like to initialize a copy of an
|
||||
existing mp_int, use mp_init_copy(), where the 'from' parameter is the
|
||||
mp_int you'd like to make a copy of. The third function,
|
||||
mp_init_size(), permits you to specify how many digits of precision
|
||||
should be preallocated for your mp_int. This can help the library
|
||||
avoid unnecessary re-allocations later on.
|
||||
|
||||
The default precision used by mp_init() can be retrieved using:
|
||||
|
||||
precision = mp_get_prec();
|
||||
|
||||
This returns the number of digits that will be allocated. You can
|
||||
change this value by using:
|
||||
|
||||
mp_set_prec(unsigned int prec);
|
||||
|
||||
Any positive value is acceptable -- if you pass zero, the default
|
||||
precision will be re-set to the compiled-in library default (this is
|
||||
specified in the header file 'mpi-config.h', and typically defaults to
|
||||
8 or 16).
|
||||
|
||||
Just as you must allocate an mp_int before you can use it, you must
|
||||
clean up the structure when you are done with it. This is performed
|
||||
using the mp_clear() function. Remember that any mp_int that you
|
||||
create as a local variable in a function must be mp_clear()'d before
|
||||
that function exits, or else the memory allocated to that mp_int will
|
||||
be orphaned and unrecoverable.
|
||||
|
||||
To set an mp_int to a given value, the following functions are given:
|
||||
|
||||
mp_set(mp_int *mp, mp_digit d);
|
||||
mp_set_int(mp_int *mp, long z);
|
||||
|
||||
The mp_set() function sets the mp_int to a single digit value, while
|
||||
mp_set_int() sets the mp_int to a signed long integer value.
|
||||
|
||||
To set an mp_int to zero, use:
|
||||
|
||||
mp_zero(mp_int *mp);
|
||||
|
||||
|
||||
Copying and Moving
|
||||
------------------
|
||||
|
||||
If you have two initialized mp_int's, and you want to copy the value
|
||||
of one into the other, use:
|
||||
|
||||
mp_copy(from, to)
|
||||
|
||||
This takes care of clearing the old value of 'to', and copies the new
|
||||
value into it. If 'to' is not yet initialized, use mp_init_copy()
|
||||
instead (see above).
|
||||
|
||||
Note: The library tries, whenever possible, to avoid allocating
|
||||
---- new memory. Thus, mp_copy() tries first to satisfy the needs
|
||||
of the copy by re-using the memory already allocated to 'to'.
|
||||
Only if this proves insufficient will mp_copy() actually
|
||||
allocate new memory.
|
||||
|
||||
For this reason, if you know a priori that 'to' has enough
|
||||
available space to hold 'from', you don't need to check the
|
||||
return value of mp_copy() for memory failure. The USED()
|
||||
macro tells you how many digits are used by an mp_int, and
|
||||
the ALLOC() macro tells you how many are allocated.
|
||||
|
||||
If you have two initialized mp_int's, and you want to exchange their
|
||||
values, use:
|
||||
|
||||
mp_exch(a, b)
|
||||
|
||||
This is better than using mp_copy() with a temporary, since it will
|
||||
not (ever) touch the memory allocator -- it just swaps the exact
|
||||
contents of the two structures. The mp_exch() function cannot fail;
|
||||
if you pass it an invalid structure, it just ignores it, and does
|
||||
nothing.
|
||||
|
||||
|
||||
Basic Arithmetic
|
||||
----------------
|
||||
|
||||
Once you have initialized your integers, you can operate on them. The
|
||||
basic arithmetic functions on full mp_int values are:
|
||||
|
||||
mp_add(a, b, c) - computes c = a + b
|
||||
mp_sub(a, b, c) - computes c = a - b
|
||||
mp_mul(a, b, c) - computes c = a * b
|
||||
mp_sqr(a, b) - computes b = a * a
|
||||
mp_div(a, b, q, r) - computes q, r such that a = bq + r
|
||||
mp_div_2d(a, d, q, r) - computes q = a / 2^d, r = a % 2^d
|
||||
mp_expt(a, b, c) - computes c = a ** b
|
||||
mp_2expt(a, k) - computes a = 2^k
|
||||
mp_sqrt(a, c) - computes c = floor(sqrt(a))
|
||||
|
||||
The mp_div_2d() function efficiently computes division by powers of
|
||||
two. Either the q or r parameter may be NULL, in which case that
|
||||
portion of the computation will be discarded.
|
||||
|
||||
The algorithms used for some of the computations here are described in
|
||||
the following files which are included with this distribution:
|
||||
|
||||
mul.txt Describes the multiplication algorithm
|
||||
div.txt Describes the division algorithm
|
||||
expt.txt Describes the exponentiation algorithm
|
||||
sqrt.txt Describes the square-root algorithm
|
||||
square.txt Describes the squaring algorithm
|
||||
|
||||
There are single-digit versions of most of these routines, as well.
|
||||
In the following prototypes, 'd' is a single mp_digit:
|
||||
|
||||
mp_add_d(a, d, c) - computes c = a + d
|
||||
mp_sub_d(a, d, c) - computes c = a - d
|
||||
mp_mul_d(a, d, c) - computes c = a * d
|
||||
mp_mul_2(a, c) - computes c = a * 2
|
||||
mp_div_d(a, d, q, r) - computes q, r such that a = bq + r
|
||||
mp_div_2(a, c) - computes c = a / 2
|
||||
mp_expt_d(a, d, c) - computes c = a ** d
|
||||
|
||||
The mp_mul_2() and mp_div_2() functions take advantage of the internal
|
||||
representation of an mp_int to do multiplication by two more quickly
|
||||
than mp_mul_d() would. Other basic functions of an arithmetic variety
|
||||
include:
|
||||
|
||||
mp_zero(a) - assign 0 to a
|
||||
mp_neg(a, c) - negate a: c = -a
|
||||
mp_abs(a, c) - absolute value: c = |a|
|
||||
|
||||
|
||||
Comparisons
|
||||
-----------
|
||||
|
||||
Several comparison functions are provided. Each of these, unless
|
||||
otherwise specified, returns zero if the comparands are equal, < 0 if
|
||||
the first is less than the second, and > 0 if the first is greater
|
||||
than the second:
|
||||
|
||||
mp_cmp_z(a) - compare a <=> 0
|
||||
mp_cmp_d(a, d) - compare a <=> d, d is a single digit
|
||||
mp_cmp(a, b) - compare a <=> b
|
||||
mp_cmp_mag(a, b) - compare |a| <=> |b|
|
||||
mp_cmp_int(a, z) - compare a <=> z, z is a signed long integer
|
||||
mp_isodd(a) - return nonzero if odd, zero otherwise
|
||||
mp_iseven(a) - return nonzero if even, zero otherwise
|
||||
|
||||
|
||||
Modular Arithmetic
|
||||
------------------
|
||||
|
||||
Modular variations of the basic arithmetic functions are also
|
||||
supported. These are available if the MP_MODARITH parameter in
|
||||
mpi-config.h is turned on (it is by default). The modular arithmetic
|
||||
functions are:
|
||||
|
||||
mp_mod(a, m, c) - compute c = a (mod m), 0 <= c < m
|
||||
mp_mod_d(a, d, c) - compute c = a (mod d), 0 <= c < d (see below)
|
||||
mp_addmod(a, b, m, c) - compute c = (a + b) mod m
|
||||
mp_submod(a, b, m, c) - compute c = (a - b) mod m
|
||||
mp_mulmod(a, b, m, c) - compute c = (a * b) mod m
|
||||
mp_sqrmod(a, m, c) - compute c = (a * a) mod m
|
||||
mp_exptmod(a, b, m, c) - compute c = (a ** b) mod m
|
||||
mp_exptmod_d(a, d, m, c)- compute c = (a ** d) mod m
|
||||
|
||||
The mp_sqr() function squares its input argument. A call to mp_sqr(a,
|
||||
c) is identical in meaning to mp_mul(a, a, c); however, if the
|
||||
MP_SQUARE variable is set true in mpi-config.h (see below), then it
|
||||
will be implemented with a different algorithm, that is supposed to
|
||||
take advantage of the redundant computation that takes place during
|
||||
squaring. Unfortunately, some compilers result in worse performance
|
||||
on this code, so you can change the behaviour at will. There is a
|
||||
utility program "mulsqr.c" that lets you test which does better on
|
||||
your system.
|
||||
|
||||
The mp_sqrmod() function is analogous to the mp_sqr() function; it
|
||||
uses the mp_sqr() function rather than mp_mul(), and then performs the
|
||||
modular reduction. This probably won't help much unless you are doing
|
||||
a lot of them.
|
||||
|
||||
See the file 'square.txt' for a synopsis of the algorithm used.
|
||||
|
||||
Note: The mp_mod_d() function computes a modular reduction around
|
||||
---- a single digit d. The result is a single digit c.
|
||||
|
||||
Because an inverse is defined for a (mod m) if and only if (a, m) = 1
|
||||
(that is, if a and m are relatively prime), mp_invmod() may not be
|
||||
able to compute an inverse for the arguments. In this case, it
|
||||
returns the value MP_UNDEF, and does not modify c. If an inverse is
|
||||
defined, however, it returns MP_OKAY, and sets c to the value of the
|
||||
inverse (mod m).
|
||||
|
||||
See the file 'redux.txt' for a description of the modular reduction
|
||||
algorithm used by mp_exptmod().
|
||||
|
||||
|
||||
Greatest Common Divisor
|
||||
-----------------------
|
||||
|
||||
If The greates common divisor of two values can be found using one of the
|
||||
following functions:
|
||||
|
||||
mp_gcd(a, b, c) - compute c = (a, b) using binary algorithm
|
||||
mp_lcm(a, b, c) - compute c = [a, b] = ab / (a, b)
|
||||
mp_xgcd(a, b, g, x, y) - compute g, x, y so that ax + by = g = (a, b)
|
||||
|
||||
Also provided is a function to compute modular inverses, if they
|
||||
exist:
|
||||
|
||||
mp_invmod(a, m, c) - compute c = a^-1 (mod m), if it exists
|
||||
|
||||
The function mp_xgcd() computes the greatest common divisor, and also
|
||||
returns values of x and y satisfying Bezout's identity. This is used
|
||||
by mp_invmod() to find modular inverses. However, if you do not need
|
||||
these values, you will find that mp_gcd() is MUCH more efficient,
|
||||
since it doesn't need all the intermediate values that mp_xgcd()
|
||||
requires in order to compute x and y.
|
||||
|
||||
The mp_gcd() (and mp_xgcd()) functions use the binary (extended) GCD
|
||||
algorithm due to Josef Stein.
|
||||
|
||||
|
||||
Input & Output Functions
|
||||
------------------------
|
||||
|
||||
The following basic I/O routines are provided. These are present at
|
||||
all times:
|
||||
|
||||
mp_read_radix(mp, str, r) - convert a string in radix r to an mp_int
|
||||
mp_read_raw(mp, s, len) - convert a string of bytes to an mp_int
|
||||
mp_radix_size(mp, r) - return length of buffer needed by mp_toradix()
|
||||
mp_raw_size(mp) - return length of buffer needed by mp_toraw()
|
||||
mp_toradix(mp, str, r) - convert an mp_int to a string of radix r
|
||||
digits
|
||||
mp_toraw(mp, str) - convert an mp_int to a string of bytes
|
||||
mp_tovalue(ch, r) - convert ch to its value when taken as
|
||||
a radix r digit, or -1 if invalid
|
||||
mp_strerror(err) - get a string describing mp_err value 'err'
|
||||
|
||||
If you compile the MPI library with MP_IOFUNC defined, you will also
|
||||
have access to the following additional I/O function:
|
||||
|
||||
mp_print(mp, ofp) - print an mp_int as text to output stream ofp
|
||||
|
||||
Note that mp_radix_size() returns a size in bytes guaranteed to be AT
|
||||
LEAST big enough for the digits output by mp_toradix(). Because it
|
||||
uses an approximation technique to figure out how many digits will be
|
||||
needed, it may return a figure which is larger than necessary. Thus,
|
||||
the caller should not rely on the value to determine how many bytes
|
||||
will actually be written by mp_toradix(). The string mp_toradix()
|
||||
creates will be NUL terminated, so the standard C library function
|
||||
strlen() should be able to ascertain this for you, if you need it.
|
||||
|
||||
The mp_read_radix() and mp_toradix() functions support bases from 2 to
|
||||
64 inclusive. If you require more general radix conversion facilities
|
||||
than this, you will need to write them yourself (that's why mp_div_d()
|
||||
is provided, after all).
|
||||
|
||||
Note: mp_read_radix() will accept as digits either capital or
|
||||
---- lower-case letters. However, the current implementation of
|
||||
mp_toradix() only outputs upper-case letters, when writing
|
||||
bases betwee 10 and 36. The underlying code supports using
|
||||
lower-case letters, but the interface stub does not have a
|
||||
selector for it. You can add one yourself if you think it
|
||||
is worthwhile -- I do not. Bases from 36 to 64 use lower-
|
||||
case letters as distinct from upper-case. Bases 63 and
|
||||
64 use the characters '+' and '/' as digits.
|
||||
|
||||
Note also that compiling with MP_IOFUNC defined will cause
|
||||
inclusion of <stdio.h>, so if you are trying to write code
|
||||
which does not depend on the standard C library, you will
|
||||
probably want to avoid this option. This is needed because
|
||||
the mp_print() function takes a standard library FILE * as
|
||||
one of its parameters, and uses the fprintf() function.
|
||||
|
||||
The mp_toraw() function converts the integer to a sequence of bytes,
|
||||
in big-endian ordering (most-significant byte first). Assuming your
|
||||
bytes are 8 bits wide, this corresponds to base 256. The sign is
|
||||
encoded as a single leading byte, whose value is 0 for zero or
|
||||
positive values, or 1 for negative values. The mp_read_raw() function
|
||||
reverses this process -- it takes a buffer of bytes, interprets the
|
||||
first as a sign indicator (0 = zero/positive, nonzero = negative), and
|
||||
the rest as a sequence of 1-byte digits in big-endian ordering.
|
||||
|
||||
The mp_raw_size() function returns the exact number of bytes required
|
||||
to store the given integer in "raw" format (as described in the
|
||||
previous paragraph). Zero is returned in case of error; a valid
|
||||
integer will require at least three bytes of storage.
|
||||
|
||||
In previous versions of the MPI library, an "external representation
|
||||
format" was supported. This was removed, however, because I found I
|
||||
was never using it, it was not as portable as I would have liked, and
|
||||
I decided it was a waste of space.
|
||||
|
||||
|
||||
Other Functions
|
||||
---------------
|
||||
|
||||
The files 'mpprime.h' and 'mpprime.c' define some routines which are
|
||||
useful for divisibility testing and probabilistic primality testing.
|
||||
The routines defined are:
|
||||
|
||||
mpp_divis(a, b) - is a divisible by b?
|
||||
mpp_divis_d(a, d) - is a divisible by digit d?
|
||||
mpp_random(a) - set a to random value at current precision
|
||||
mpp_random_size(a, prec) - set a to random value at given precision
|
||||
|
||||
Note: The mpp_random() and mpp_random_size() functions use the C
|
||||
---- library's rand() function to generate random values. It is
|
||||
up to the caller to seed this generator before it is called.
|
||||
These functions are not suitable for generating quantities
|
||||
requiring cryptographic-quality randomness; they are intended
|
||||
primarily for use in primality testing.
|
||||
|
||||
Note too that the MPI library does not call srand(), so your
|
||||
application should do this, if you ever want the sequence
|
||||
to change.
|
||||
|
||||
mpp_divis_vector(a, v, s, w) - is a divisible by any of the s digits
|
||||
in v? If so, let w be the index of
|
||||
that digit
|
||||
|
||||
mpp_divis_primes(a, np) - is a divisible by any of the first np
|
||||
primes? If so, set np to the prime
|
||||
which divided a.
|
||||
|
||||
mpp_fermat(a, d) - test if w^a = w (mod a). If so,
|
||||
returns MP_YES, otherwise MP_NO.
|
||||
|
||||
mpp_pprime(a, nt) - perform nt iterations of the Rabin-
|
||||
Miller probabilistic primality test
|
||||
on a. Returns MP_YES if all tests
|
||||
passed, or MP_NO if any test fails.
|
||||
|
||||
The mpp_fermat() function works based on Fermat's little theorem, a
|
||||
consequence of which is that if p is a prime, and (w, p) = 1, then:
|
||||
|
||||
w^p = w (mod p)
|
||||
|
||||
Put another way, if w^p != w (mod p), then p is not prime. The test
|
||||
is expensive to compute, but it helps to quickly eliminate an enormous
|
||||
class of composite numbers prior to Rabin-Miller testing.
|
||||
|
||||
Building the Library
|
||||
--------------------
|
||||
|
||||
The MPI library is designed to be as self-contained as possible. You
|
||||
should be able to compile it with your favourite ANSI C compiler, and
|
||||
link it into your program directly. If you are on a Unix system using
|
||||
the GNU C compiler (gcc), the following should work:
|
||||
|
||||
% gcc -ansi -pedantic -Wall -O2 -c mpi.c
|
||||
|
||||
The file 'mpi-config.h' defines several configurable parameters for
|
||||
the library, which you can adjust to suit your application. At the
|
||||
time of this writing, the available options are:
|
||||
|
||||
MP_IOFUNC - Define true to include the mp_print() function,
|
||||
which is moderately useful for debugging. This
|
||||
implicitly includes <stdio.h>.
|
||||
|
||||
MP_MODARITH - Define true to include the modular arithmetic
|
||||
functions. If you don't need modular arithmetic
|
||||
in your application, you can set this to zero to
|
||||
leave out all the modular routines.
|
||||
|
||||
MP_NUMTH - Define true to include number theoretic functions
|
||||
such as mp_gcd(), mp_lcm(), and mp_invmod().
|
||||
|
||||
MP_LOGTAB - If true, the file "logtab.h" is included, which
|
||||
is basically a static table of base 2 logarithms.
|
||||
These are used to compute how big the buffers for
|
||||
radix conversion need to be. If you set this false,
|
||||
the library includes <math.h> and uses log(). This
|
||||
typically forces you to link against math libraries.
|
||||
|
||||
MP_MEMSET - If true, use memset() to zero buffers. If you run
|
||||
into weird alignment related bugs, set this to zero
|
||||
and an explicit loop will be used.
|
||||
|
||||
MP_MEMCPY - If true, use memcpy() to copy buffers. If you run
|
||||
into weird alignment bugs, set this to zero and an
|
||||
explicit loop will be used.
|
||||
|
||||
MP_CRYPTO - If true, whenever arrays of digits are free'd, they
|
||||
are zeroed first. This is useful if you're using
|
||||
the library in a cryptographic environment; however,
|
||||
it does add overhead to each free operation. For
|
||||
performance, if you don't care about zeroing your
|
||||
buffers, set this to false.
|
||||
|
||||
MP_ARGCHK - Set to 0, 1, or 2. This defines how the argument
|
||||
checking macro, ARGCHK(), gets expanded. If this
|
||||
is set to zero, ARGCHK() expands to nothing; no
|
||||
argument checks are performed. If this is 1, the
|
||||
ARGCHK() macro expands to code that returns MP_BADARG
|
||||
or similar at runtime. If it is 2, ARGCHK() expands
|
||||
to an assert() call that aborts the program on a
|
||||
bad input.
|
||||
|
||||
MP_DEBUG - Turns on debugging output. This is probably not at
|
||||
all useful unless you are debugging the library. It
|
||||
tends to spit out a LOT of output.
|
||||
|
||||
MP_DEFPREC - The default precision of a newly-created mp_int, in
|
||||
digits. The precision can be changed at runtime by
|
||||
the mp_set_prec() function, but this is its initial
|
||||
value.
|
||||
|
||||
MP_SQUARE - If this is set to a nonzero value, the mp_sqr()
|
||||
function will use an alternate algorithm that takes
|
||||
advantage of the redundant inner product computation
|
||||
when both multiplicands are identical. Unfortunately,
|
||||
with some compilers this is actually SLOWER than just
|
||||
calling mp_mul() with the same argument twice. So
|
||||
if you set MP_SQUARE to zero, mp_sqr() will be expan-
|
||||
ded into a call to mp_mul(). This applies to all
|
||||
the uses of mp_sqr(), including mp_sqrmod() and the
|
||||
internal calls to s_mp_sqr() inside mpi.c
|
||||
|
||||
The program 'mulsqr' (mulsqr.c) can be used to test
|
||||
which works best for your configuration. Set up the
|
||||
CC and CFLAGS variables in the Makefile, then type:
|
||||
|
||||
make mulsqr
|
||||
|
||||
Invoke it with arguments similar to the following:
|
||||
|
||||
mulsqr 25000 1024
|
||||
|
||||
That is, 25000 products computed on 1024-bit values.
|
||||
The output will compare the two timings, and recommend
|
||||
a setting for MP_SQUARE. It is off by default.
|
||||
|
||||
If you would like to use the mp_print() function (see above), be sure
|
||||
to define MP_IOFUNC in mpi-config.h. Many of the test drivers in the
|
||||
'tests' subdirectory expect this to be defined (although the test
|
||||
driver 'mpi-test' doesn't need it)
|
||||
|
||||
The Makefile which comes with the library should take care of building
|
||||
the library for you, if you have set the CC and CFLAGS variables at
|
||||
the top of the file appropriately. By default, they are set up to
|
||||
use the GNU C compiler:
|
||||
|
||||
CC=gcc
|
||||
CFLAGS=-ansi -pedantic -Wall -O2
|
||||
|
||||
If all goes well, the library should compile without warnings using
|
||||
this combination. You should, of course, make whatever adjustments
|
||||
you find necessary.
|
||||
|
||||
The MPI library distribution comes with several additional programs
|
||||
which are intended to demonstrate the use of the library, and provide
|
||||
a framework for testing it. There are a handful of test driver
|
||||
programs, in the files named 'mptest-X.c', where X is a digit. Also,
|
||||
there are some simple command-line utilities (in the 'utils'
|
||||
directory) for manipulating large numbers. These include:
|
||||
|
||||
basecvt.c A radix-conversion program, supporting bases from
|
||||
2 to 64 inclusive.
|
||||
|
||||
bbsrand.c A BBS (quadratic residue) pseudo-random number
|
||||
generator. The file 'bbsrand.c' is just the driver
|
||||
for the program; the real code lives in the files
|
||||
'bbs_rand.h' and 'bbs_rand.c'
|
||||
|
||||
dec2hex.c Converts decimal to hexadecimal
|
||||
|
||||
gcd.c Computes the greatest common divisor of two values.
|
||||
If invoked as 'xgcd', also computes constants x and
|
||||
y such that (a, b) = ax + by, in accordance with
|
||||
Bezout's identity.
|
||||
|
||||
hex2dec.c Converts hexadecimal to decimal
|
||||
|
||||
invmod.c Computes modular inverses
|
||||
|
||||
isprime.c Performs the Rabin-Miller probabilistic primality
|
||||
test on a number. Values which fail this test are
|
||||
definitely composite, and those which pass are very
|
||||
likely to be prime (although there are no guarantees)
|
||||
|
||||
lap.c Computes the order (least annihilating power) of
|
||||
a value v modulo m. Very dumb algorithm.
|
||||
|
||||
primegen.c Generates large (probable) primes.
|
||||
|
||||
prng.c A pseudo-random number generator based on the
|
||||
BBS generator code in 'bbs_rand.c'
|
||||
|
||||
sieve.c Implements the Sieve of Eratosthenes, using a big
|
||||
bitmap, to generate a list of prime numbers.
|
||||
|
||||
fact.c Computes the factorial of an arbitrary precision
|
||||
integer (iterative).
|
||||
|
||||
exptmod.c Computes arbitrary precision modular exponentiation
|
||||
from the command line (exptmod a b m -> a^b (mod m))
|
||||
|
||||
Most of these can be built from the Makefile that comes with the
|
||||
library. Try 'make tools', if your environment supports it. (If you
|
||||
are compiling on a Macintosh, I'm afraid you'll have to build them by
|
||||
hand -- fortunately, this is not difficult -- the library itself
|
||||
should compile just fine under Metrowerks CodeWarrior).
|
||||
|
||||
|
||||
Testing the Library
|
||||
-------------------
|
||||
|
||||
Automatic test vectors are included, in the form of a program called
|
||||
'mpi-test'. To build this program and run all the tests, simply
|
||||
invoke the shell script 'all-tests'. If all the tests pass, you
|
||||
should see a message:
|
||||
|
||||
All tests passed
|
||||
|
||||
If something went wrong, you'll get:
|
||||
|
||||
One or more tests failed.
|
||||
|
||||
If this happens, scan back through the preceding lines, to see which
|
||||
test failed. Any failure indicates a bug in the library, which needs
|
||||
to be fixed before it will give accurate results. If you get any such
|
||||
thing, please let me know, and I'll try to fix it. Please let me know
|
||||
what platform and compiler you were using, as well as which test
|
||||
failed. If a reason for failure was given, please send me that text
|
||||
as well.
|
||||
|
||||
If you're on a system such as the Macintosh, where the standard Unix
|
||||
build tools don't work, you can build the 'mpi-test' program manually,
|
||||
and run it by hand. This is tedious and obnoxious, sorry.
|
||||
|
||||
Further manual testing can be performed by building the manual testing
|
||||
programs, whose source is found in the 'tests' subdirectory. Each
|
||||
test is in a source file called 'mptest-X.c'. The Makefile contains a
|
||||
target to build all of them at once:
|
||||
|
||||
make tests
|
||||
|
||||
Read the comments at the top of each source file to see what the
|
||||
driver is supposed to test. You probably don't need to do this; these
|
||||
programs were only written to help me as I was developing the library.
|
||||
|
||||
The relevant files are:
|
||||
|
||||
mpi-test.c The source for the test driver
|
||||
|
||||
make-test-arrays A Perl script to generate some of the internal
|
||||
data structures used by mpi-test.c
|
||||
|
||||
test-arrays.txt The source file for make-test-arrays
|
||||
|
||||
all-tests A Bourne shell script which runs all the
|
||||
tests in the mpi-test suite
|
||||
|
||||
Running 'make mpi-test' should build the mpi-test program. If you
|
||||
cannot use make, here is what needs to be done:
|
||||
|
||||
(1) Use 'make-test-arrays' to generate the file 'test-info.c' from
|
||||
the 'test-arrays.txt' file. Since Perl can be found everywhere,
|
||||
even on the Macintosh, this should be no trouble. Under Unix,
|
||||
this looks like:
|
||||
|
||||
make-test-arrays test-arrays.txt > test-info.c
|
||||
|
||||
(2) Build the MPI library:
|
||||
|
||||
gcc -ansi -pedantic -Wall -c mpi.c
|
||||
|
||||
(3) Build the mpi-test program:
|
||||
|
||||
gcc -ansi -pedantic -Wall -o mpi-test mpi.o mpi-test.c
|
||||
|
||||
When you've got mpi-test, you can use 'all-tests' to run all the tests
|
||||
made available by mpi-test. If any of them fail, there should be a
|
||||
diagnostic indicating what went wrong. These are fairly high-level
|
||||
diagnostics, and won't really help you debug the problem; they're
|
||||
simply intended to help you isolate which function caused the problem.
|
||||
If you encounter a problem of this sort, feel free to e-mail me, and I
|
||||
will certainly attempt to help you debug it.
|
||||
|
||||
Note: Several of the tests hard-wired into 'mpi-test' operate under
|
||||
---- the assumption that you are using at least a 16-bit mp_digit
|
||||
type. If that is not true, several tests might fail, because
|
||||
of range problems with the maximum digit value.
|
||||
|
||||
If you are using an 8-bit digit, you will also need to
|
||||
modify the code for mp_read_raw(), which assumes that
|
||||
multiplication by 256 can be done with mp_mul_d(), a
|
||||
fact that fails when DIGIT_MAX is 255. You can replace
|
||||
the call with s_mp_lshd(), which will give you the same
|
||||
effect, and without doing as much work. :)
|
||||
|
||||
Acknowledgements:
|
||||
----------------
|
||||
|
||||
The algorithms used in this library were drawn primarily from Volume
|
||||
2 of Donald Knuth's magnum opus, _The Art of Computer Programming_,
|
||||
"Semi-Numerical Methods". Barrett's algorithm for modular reduction
|
||||
came from Menezes, Oorschot, and Vanstone's _Handbook of Applied
|
||||
Cryptography_, Chapter 14.
|
||||
|
||||
Thanks are due to Tom St. Denis, for finding an obnoxious sign-related
|
||||
bug in mp_read_raw() that made things break on platforms which use
|
||||
signed chars.
|
||||
|
||||
About the Author
|
||||
----------------
|
||||
|
||||
This software was written by Michael J. Fromberger. You can contact
|
||||
the author as follows:
|
||||
|
||||
E-mail: <sting@linguist.dartmouth.edu>
|
||||
|
||||
Postal: 8000 Cummings Hall, Thayer School of Engineering
|
||||
Dartmouth College, Hanover, New Hampshire, USA
|
||||
|
||||
PGP key: http://linguist.dartmouth.edu/~sting/keys/mjf.html
|
||||
9736 188B 5AFA 23D6 D6AA BE0D 5856 4525 289D 9907
|
||||
|
||||
Last updated: 16-Jan-2000
|
||||
@@ -1,115 +0,0 @@
|
||||
#!/bin/sh
|
||||
# ***** BEGIN LICENSE BLOCK *****
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1997
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ***** END LICENSE BLOCK *****
|
||||
|
||||
ECHO=/bin/echo
|
||||
MAKE=gmake
|
||||
|
||||
$ECHO "\n** Running unit tests for MPI library\n"
|
||||
|
||||
# Build the mpi-test program, which comprises all the unit tests for
|
||||
# the MPI library...
|
||||
|
||||
$ECHO "Bringing mpi-test up to date ... "
|
||||
if $MAKE mpi-test ; then
|
||||
:
|
||||
else
|
||||
$ECHO " "
|
||||
$ECHO "Make failed to build mpi-test."
|
||||
$ECHO " "
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -x mpi-test ] ; then
|
||||
$ECHO " "
|
||||
$ECHO "Cannot find 'mpi-test' program, testing cannot continue."
|
||||
$ECHO " "
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get the list of available test suites...
|
||||
tests=`mpi-test list | awk '{print $1}'`
|
||||
errs=0
|
||||
|
||||
# Run each test suite and check the result code of mpi-test
|
||||
for test in $tests ; do
|
||||
$ECHO "$test ... \c"
|
||||
if mpi-test $test ; then
|
||||
$ECHO "passed"
|
||||
else
|
||||
$ECHO "FAILED"
|
||||
errs=1
|
||||
fi
|
||||
done
|
||||
|
||||
# If any tests failed, we'll stop at this point
|
||||
if [ "$errs" = "0" ] ; then
|
||||
$ECHO "All unit tests passed"
|
||||
else
|
||||
$ECHO "One or more tests failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Now try to build the 'pi' program, and see if it can compute the
|
||||
# first thousand digits of pi correctly
|
||||
$ECHO "\n** Running other tests\n"
|
||||
|
||||
$ECHO "Bringing 'pi' up to date ... "
|
||||
if $MAKE pi ; then
|
||||
:
|
||||
else
|
||||
$ECHO "\nMake failed to build pi.\n"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -x pi ] ; then
|
||||
$ECHO "\nCannot find 'pi' program; testing cannot continue.\n"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
./pi 2000 > /tmp/pi.tmp.$$
|
||||
if cmp tests/pi2k.txt /tmp/pi.tmp.$$ ; then
|
||||
$ECHO "Okay! The pi test passes."
|
||||
else
|
||||
$ECHO "Oops! The pi test failed. :("
|
||||
exit 1
|
||||
fi
|
||||
|
||||
rm -f /tmp/pi.tmp.$$
|
||||
|
||||
exit 0
|
||||
|
||||
# Here there be dragons
|
||||
@@ -1,11 +0,0 @@
|
||||
Within this directory, each of the file listed below is licensed under
|
||||
the terms given in the file LICENSE-MPL, also in this directory.
|
||||
|
||||
basecvt.pod
|
||||
gcd.pod
|
||||
invmod.pod
|
||||
isprime.pod
|
||||
lap.pod
|
||||
mpi-test.pod
|
||||
prime.txt
|
||||
prng.pod
|
||||
@@ -1,32 +0,0 @@
|
||||
The contents of this file are subject to the Mozilla Public
|
||||
License Version 1.1 (the "License"); you may not use this file
|
||||
except in compliance with the License. You may obtain a copy of
|
||||
the License at http://www.mozilla.org/MPL/
|
||||
|
||||
Software distributed under the License is distributed on an "AS
|
||||
IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
implied. See the License for the specific language governing
|
||||
rights and limitations under the License.
|
||||
|
||||
The Original Code is the Netscape security libraries.
|
||||
|
||||
The Initial Developer of the Original Code is Netscape
|
||||
Communications Corporation. Portions created by Netscape are
|
||||
Copyright (C) 1994-2000 Netscape Communications Corporation. All
|
||||
Rights Reserved.
|
||||
|
||||
Contributor(s):
|
||||
|
||||
Alternatively, the contents of this file may be used under the
|
||||
terms of the GNU General Public License Version 2 or later (the
|
||||
"GPL"), in which case the provisions of the GPL are applicable
|
||||
instead of those above. If you wish to allow use of your
|
||||
version of this file only under the terms of the GPL and not to
|
||||
allow others to use your version of this file under the MPL,
|
||||
indicate your decision by deleting the provisions above and
|
||||
replace them with the notice and other provisions required by
|
||||
the GPL. If you do not delete the provisions above, a recipient
|
||||
may use your version of this file under either the MPL or the
|
||||
GPL.
|
||||
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
=head1 NAME
|
||||
|
||||
basecvt - radix conversion for arbitrary precision integers
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
basecvt <ibase> <obase> [values]
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The B<basecvt> program is a command-line tool for converting integers
|
||||
of arbitrary precision from one radix to another. The current version
|
||||
supports radix values from 2 (binary) to 64, inclusive. The first two
|
||||
command line arguments specify the input and output radix, in base 10.
|
||||
Any further arguments are taken to be integers notated in the input
|
||||
radix, and these are converted to the output radix. The output is
|
||||
written, one integer per line, to standard output.
|
||||
|
||||
When reading integers, only digits considered "valid" for the input
|
||||
radix are considered. Processing of an integer terminates when an
|
||||
invalid input digit is encountered. So, for example, if you set the
|
||||
input radix to 10 and enter '10ACF', B<basecvt> would assume that you
|
||||
had entered '10' and ignore the rest of the string.
|
||||
|
||||
If no values are provided, no output is written, but the program
|
||||
simply terminates with a zero exit status. Error diagnostics are
|
||||
written to standard error in the event of out-of-range radix
|
||||
specifications. Regardless of the actual values of the input and
|
||||
output radix, the radix arguments are taken to be in base 10 (decimal)
|
||||
notation.
|
||||
|
||||
=head1 DIGITS
|
||||
|
||||
For radices from 2-10, standard ASCII decimal digits 0-9 are used for
|
||||
both input and output. For radices from 11-36, the ASCII letters A-Z
|
||||
are also included, following the convention used in hexadecimal. In
|
||||
this range, input is accepted in either upper or lower case, although
|
||||
on output only lower-case letters are used.
|
||||
|
||||
For radices from 37-62, the output includes both upper- and lower-case
|
||||
ASCII letters, and case matters. In this range, case is distinguished
|
||||
both for input and for output values.
|
||||
|
||||
For radices 63 and 64, the characters '+' (plus) and '/' (forward
|
||||
solidus) are also used. These are derived from the MIME base64
|
||||
encoding scheme. The overall encoding is not the same as base64,
|
||||
because the ASCII digits are used for the bottom of the range, and the
|
||||
letters are shifted upward; however, the output will consist of the
|
||||
same character set.
|
||||
|
||||
This input and output behaviour is inherited from the MPI library used
|
||||
by B<basecvt>, and so is not configurable at runtime.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
dec2hex(1), hex2dec(1)
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Thayer School of Engineering, Hanover, New Hampshire, USA
|
||||
|
||||
$Date: 2000-07-14 00:44:31 $
|
||||
@@ -1,66 +0,0 @@
|
||||
#!/bin/sh
|
||||
# ***** BEGIN LICENSE BLOCK *****
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Netscape Communications Corporation
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ***** END LICENSE BLOCK *****
|
||||
|
||||
# $Id: build,v 1.3 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
#
|
||||
|
||||
VERS="1.7p6"
|
||||
SECT="1"
|
||||
NAME="MPI Tools"
|
||||
|
||||
echo "Building manual pages ..."
|
||||
case $# in
|
||||
0)
|
||||
files=`ls *.pod`
|
||||
;;
|
||||
*)
|
||||
files=$*
|
||||
;;
|
||||
esac
|
||||
|
||||
for name in $files
|
||||
do
|
||||
echo -n "$name ... "
|
||||
# sname=`noext $name`
|
||||
sname=`basename $name .pod`
|
||||
pod2man --section="$SECT" --center="$NAME" --release="$VERS" $name > $sname.$SECT
|
||||
echo "(done)"
|
||||
done
|
||||
|
||||
echo "Finished building."
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
Division
|
||||
|
||||
This describes the division algorithm used by the MPI library.
|
||||
|
||||
Input: a, b; a > b
|
||||
Compute: Q, R; a = Qb + R
|
||||
|
||||
The input numbers are normalized so that the high-order digit of b is
|
||||
at least half the radix. This guarantees that we have a reasonable
|
||||
way to guess at the digits of the quotient (this method was taken from
|
||||
Knuth, vol. 2, with adaptations).
|
||||
|
||||
To normalize, test the high-order digit of b. If it is less than half
|
||||
the radix, multiply both a and b by d, where:
|
||||
|
||||
radix - 1
|
||||
d = -----------
|
||||
bmax + 1
|
||||
|
||||
...where bmax is the high-order digit of b. Otherwise, set d = 1.
|
||||
|
||||
Given normalize values for a and b, let the notation a[n] denote the
|
||||
nth digit of a. Let #a be the number of significant figures of a (not
|
||||
including any leading zeroes).
|
||||
|
||||
Let R = 0
|
||||
Let p = #a - 1
|
||||
|
||||
while(p >= 0)
|
||||
do
|
||||
R = (R * radix) + a[p]
|
||||
p = p - 1
|
||||
while(R < b and p >= 0)
|
||||
|
||||
if(R < b)
|
||||
break
|
||||
|
||||
q = (R[#R - 1] * radix) + R[#R - 2]
|
||||
q = q / b[#b - 1]
|
||||
|
||||
T = b * q
|
||||
|
||||
while(T > L)
|
||||
q = q - 1
|
||||
T = T - b
|
||||
endwhile
|
||||
|
||||
L = L - T
|
||||
|
||||
Q = (Q * radix) + q
|
||||
|
||||
endwhile
|
||||
|
||||
At this point, Q is the quotient, and R is the normalized remainder.
|
||||
To denormalize R, compute:
|
||||
|
||||
R = (R / d)
|
||||
|
||||
At this point, you are finished.
|
||||
|
||||
------------------------------------------------------------------
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
|
||||
The contents of this file are subject to the Mozilla Public License Version
|
||||
1.1 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.mozilla.org/MPL/
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
for the specific language governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
|
||||
library.
|
||||
|
||||
The Initial Developer of the Original Code is
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Portions created by the Initial Developer are Copyright (C) 1998, 2000
|
||||
the Initial Developer. All Rights Reserved.
|
||||
|
||||
Contributor(s):
|
||||
|
||||
Alternatively, the contents of this file may be used under the terms of
|
||||
either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
of those above. If you wish to allow use of your version of this file only
|
||||
under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
use your version of this file under the terms of the MPL, indicate your
|
||||
decision by deleting the provisions above and replace them with the notice
|
||||
and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
the provisions above, a recipient may use your version of this file under
|
||||
the terms of any one of the MPL, the GPL or the LGPL.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
|
||||
$Id: div.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
|
||||
|
||||
@@ -1,132 +0,0 @@
|
||||
Exponentiation
|
||||
|
||||
For exponentiation, the MPI library uses a simple and fairly standard
|
||||
square-and-multiply method. The algorithm is this:
|
||||
|
||||
Input: a, b
|
||||
Output: a ** b
|
||||
|
||||
s = 1
|
||||
|
||||
while(b != 0)
|
||||
if(b is odd)
|
||||
s = s * a
|
||||
endif
|
||||
|
||||
b = b / 2
|
||||
|
||||
x = x * x
|
||||
endwhile
|
||||
|
||||
return s
|
||||
|
||||
The modular exponentiation is done the same way, except replacing:
|
||||
|
||||
s = s * a
|
||||
|
||||
with
|
||||
s = (s * a) mod m
|
||||
|
||||
and replacing
|
||||
|
||||
x = x * x
|
||||
|
||||
with
|
||||
|
||||
x = (x * x) mod m
|
||||
|
||||
Here is a sample exponentiation using the MPI library, as compared to
|
||||
the same problem solved by the Unix 'bc' program on my system:
|
||||
|
||||
Computation of 2,381,283 ** 235
|
||||
|
||||
'bc' says:
|
||||
|
||||
4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
|
||||
4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
|
||||
6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
|
||||
4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
|
||||
6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
|
||||
FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
|
||||
CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
|
||||
5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
|
||||
CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
|
||||
49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
|
||||
5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
|
||||
A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
|
||||
D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
|
||||
92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
|
||||
A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
|
||||
AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
|
||||
E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
|
||||
1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
|
||||
CFFF2E1AC93F3CA264A1B
|
||||
|
||||
MPI says:
|
||||
|
||||
4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
|
||||
4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
|
||||
6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
|
||||
4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
|
||||
6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
|
||||
FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
|
||||
CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
|
||||
5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
|
||||
CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
|
||||
49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
|
||||
5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
|
||||
A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
|
||||
D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
|
||||
92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
|
||||
A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
|
||||
AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
|
||||
E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
|
||||
1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
|
||||
CFFF2E1AC93F3CA264A1B
|
||||
|
||||
Diff says:
|
||||
% diff bc.txt mp.txt
|
||||
%
|
||||
|
||||
------------------------------------------------------------------
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
|
||||
The contents of this file are subject to the Mozilla Public License Version
|
||||
1.1 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.mozilla.org/MPL/
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
for the specific language governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
|
||||
library.
|
||||
|
||||
The Initial Developer of the Original Code is
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Portions created by the Initial Developer are Copyright (C) 1998, 2000
|
||||
the Initial Developer. All Rights Reserved.
|
||||
|
||||
Contributor(s):
|
||||
|
||||
Alternatively, the contents of this file may be used under the terms of
|
||||
either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
of those above. If you wish to allow use of your version of this file only
|
||||
under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
use your version of this file under the terms of the MPL, indicate your
|
||||
decision by deleting the provisions above and replace them with the notice
|
||||
and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
the provisions above, a recipient may use your version of this file under
|
||||
the terms of any one of the MPL, the GPL or the LGPL.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
|
||||
$Id: expt.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
|
||||
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
=head1 NAME
|
||||
|
||||
gcd - compute greatest common divisor of two integers
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
gcd <a> <b>
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The B<gcd> program computes the greatest common divisor of two
|
||||
arbitrary-precision integers I<a> and I<b>. The result is written in
|
||||
standard decimal notation to the standard output.
|
||||
|
||||
If I<b> is zero, B<gcd> will print an error message and exit.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
invmod(1), isprime(1), lap(1)
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Thayer School of Engineering, Hanover, New Hampshire, USA
|
||||
|
||||
$Date: 2000-07-14 00:44:32 $
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
=head1 NAME
|
||||
|
||||
invmod - compute modular inverse of an integer
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
invmod <a> <m>
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The B<invmod> program computes the inverse of I<a>, modulo I<m>, if
|
||||
that inverse exists. Both I<a> and I<m> are arbitrary-precision
|
||||
integers in decimal notation. The result is written in standard
|
||||
decimal notation to the standard output.
|
||||
|
||||
If there is no inverse, the message:
|
||||
|
||||
No inverse
|
||||
|
||||
...will be printed to the standard output (an inverse exists if and
|
||||
only if the greatest common divisor of I<a> and I<m> is 1).
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
gcd(1), isprime(1), lap(1)
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Thayer School of Engineering, Hanover, New Hampshire, USA
|
||||
|
||||
$Date: 2000-07-14 00:44:33 $
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
=head1 NAME
|
||||
|
||||
isprime - probabilistic primality testing
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
isprime <a>
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The B<isprime> program attempts to determine whether the arbitrary
|
||||
precision integer I<a> is prime. It first tests I<a> for divisibility
|
||||
by the first 170 or so small primes, and assuming I<a> is not
|
||||
divisible by any of these, applies 15 iterations of the Rabin-Miller
|
||||
probabilistic primality test.
|
||||
|
||||
If the program discovers that the number is composite, it will print:
|
||||
|
||||
Not prime (reason)
|
||||
|
||||
Where I<reason> is either:
|
||||
|
||||
divisible by small prime x
|
||||
|
||||
Or:
|
||||
|
||||
failed nth pseudoprime test
|
||||
|
||||
In the first case, I<x> indicates the first small prime factor that
|
||||
was found. In the second case, I<n> indicates which of the
|
||||
pseudoprime tests failed (numbered from 1)
|
||||
|
||||
If this happens, the number is definitely not prime. However, if the
|
||||
number succeeds, this message results:
|
||||
|
||||
Probably prime, 1 in 4^15 chance of false positive
|
||||
|
||||
If this happens, the number is prime with very high probability, but
|
||||
its primality has not been absolutely proven, only demonstrated to a
|
||||
very convincing degree.
|
||||
|
||||
The value I<a> can be input in standard decimal notation, or, if it is
|
||||
prefixed with I<Ox>, it will be read as hexadecimal.
|
||||
|
||||
=head1 ENVIRONMENT
|
||||
|
||||
You can control how many iterations of Rabin-Miller are performed on
|
||||
the candidate number by setting the I<RM_TESTS> environment variable
|
||||
to an integer value before starting up B<isprime>. This will change
|
||||
the output slightly if the number passes all the tests.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
gcd(1), invmod(1), lap(1)
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Thayer School of Engineering, Hanover, New Hampshire, USA
|
||||
|
||||
$Date: 2000-07-14 00:44:33 $
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
=head1 NAME
|
||||
|
||||
lap - compute least annihilating power of a number
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
lap <a> <m>
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The B<lap> program computes the order of I<a> modulo I<m>, for
|
||||
arbitrary precision integers I<a> and I<m>. The B<order> of I<a>
|
||||
modulo I<m> is defined as the smallest positive value I<n> for which
|
||||
I<a> raised to the I<n>th power, modulo I<m>, is equal to 1. The
|
||||
order may not exist, if I<m> is composite.
|
||||
|
||||
=head1 RESTRICTIONS
|
||||
|
||||
This program is very slow, especially for large moduli. It is
|
||||
intended as a way to help find primitive elements in a modular field,
|
||||
but it does not do so in a particularly inefficient manner. It was
|
||||
written simply to help verify that a particular candidate does not
|
||||
have an obviously short cycle mod I<m>.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
gcd(1), invmod(1), isprime(1)
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Thayer School of Engineering, Hanover, New Hampshire, USA
|
||||
|
||||
$Date: 2000-07-14 00:44:34 $
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
=head1 NAME
|
||||
|
||||
mpi-test - automated test program for MPI library
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
mpi-test <suite-name> [quiet]
|
||||
mpi-test list
|
||||
mpi-test help
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The B<mpi-test> program is a general unit test driver for the MPI
|
||||
library. It is used to verify that the library works as it is
|
||||
supposed to on your architecture. As with most such things, passing
|
||||
all the tests in B<mpi-test> does not guarantee the code is correct,
|
||||
but if any of them fail, there are certainly problems.
|
||||
|
||||
Each major function of the library can be tested individually. For a
|
||||
list of the test suites understood by B<mpi-test>, run it with the
|
||||
I<list> command line option:
|
||||
|
||||
mpi-test list
|
||||
|
||||
This will display a list of the available test suites and a brief
|
||||
synopsis of what each one does. For a brief overview of this
|
||||
document, run B<mpi-test> I<help>.
|
||||
|
||||
B<mpi-test> exits with a zero status if the selected test succeeds, or
|
||||
a nonzero status if it fails. If a I<suite-name> which is not
|
||||
understood by B<mpi-test> is given, a diagnostic is printed to the
|
||||
standard error, and the program exits with a result code of 2. If a
|
||||
test fails, the result code will be 1, and a diagnostic is ordinarily
|
||||
printed to the standard error. However, if the I<quiet> option is
|
||||
provided, these diagnostics will be suppressed.
|
||||
|
||||
=head1 RESTRICTIONS
|
||||
|
||||
Only a few canned test cases are provided. The solutions have been
|
||||
verified using the GNU bc(1) program, so bugs there may cause problems
|
||||
here; however, this is very unlikely, so if a test fails, it is almost
|
||||
certainly my fault, not bc(1)'s.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Thayer School of Engineering, Hanover, New Hampshire, USA
|
||||
|
||||
$Date: 2000-07-14 00:44:34 $
|
||||
@@ -1,114 +0,0 @@
|
||||
Multiplication
|
||||
|
||||
This describes the multiplication algorithm used by the MPI library.
|
||||
|
||||
This is basically a standard "schoolbook" algorithm. It is slow --
|
||||
O(mn) for m = #a, n = #b -- but easy to implement and verify.
|
||||
Basically, we run two nested loops, as illustrated here (R is the
|
||||
radix):
|
||||
|
||||
k = 0
|
||||
for j <- 0 to (#b - 1)
|
||||
for i <- 0 to (#a - 1)
|
||||
w = (a[j] * b[i]) + k + c[i+j]
|
||||
c[i+j] = w mod R
|
||||
k = w div R
|
||||
endfor
|
||||
c[i+j] = k;
|
||||
k = 0;
|
||||
endfor
|
||||
|
||||
It is necessary that 'w' have room for at least two radix R digits.
|
||||
The product of any two digits in radix R is at most:
|
||||
|
||||
(R - 1)(R - 1) = R^2 - 2R + 1
|
||||
|
||||
Since a two-digit radix-R number can hold R^2 - 1 distinct values,
|
||||
this insures that the product will fit into the two-digit register.
|
||||
|
||||
To insure that two digits is enough for w, we must also show that
|
||||
there is room for the carry-in from the previous multiplication, and
|
||||
the current value of the product digit that is being recomputed.
|
||||
Assuming each of these may be as big as R - 1 (and no larger,
|
||||
certainly), two digits will be enough if and only if:
|
||||
|
||||
(R^2 - 2R + 1) + 2(R - 1) <= R^2 - 1
|
||||
|
||||
Solving this equation shows that, indeed, this is the case:
|
||||
|
||||
R^2 - 2R + 1 + 2R - 2 <= R^2 - 1
|
||||
|
||||
R^2 - 1 <= R^2 - 1
|
||||
|
||||
This suggests that a good radix would be one more than the largest
|
||||
value that can be held in half a machine word -- so, for example, as
|
||||
in this implementation, where we used a radix of 65536 on a machine
|
||||
with 4-byte words. Another advantage of a radix of this sort is that
|
||||
binary-level operations are easy on numbers in this representation.
|
||||
|
||||
Here's an example multiplication worked out longhand in radix-10,
|
||||
using the above algorithm:
|
||||
|
||||
a = 999
|
||||
b = x 999
|
||||
-------------
|
||||
p = 98001
|
||||
|
||||
w = (a[jx] * b[ix]) + kin + c[ix + jx]
|
||||
c[ix+jx] = w % RADIX
|
||||
k = w / RADIX
|
||||
product
|
||||
ix jx a[jx] b[ix] kin w c[i+j] kout 000000
|
||||
0 0 9 9 0 81+0+0 1 8 000001
|
||||
0 1 9 9 8 81+8+0 9 8 000091
|
||||
0 2 9 9 8 81+8+0 9 8 000991
|
||||
8 0 008991
|
||||
1 0 9 9 0 81+0+9 0 9 008901
|
||||
1 1 9 9 9 81+9+9 9 9 008901
|
||||
1 2 9 9 9 81+9+8 8 9 008901
|
||||
9 0 098901
|
||||
2 0 9 9 0 81+0+9 0 9 098001
|
||||
2 1 9 9 9 81+9+8 8 9 098001
|
||||
2 2 9 9 9 81+9+9 9 9 098001
|
||||
|
||||
------------------------------------------------------------------
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
|
||||
The contents of this file are subject to the Mozilla Public License Version
|
||||
1.1 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.mozilla.org/MPL/
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
for the specific language governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
|
||||
library.
|
||||
|
||||
The Initial Developer of the Original Code is
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Portions created by the Initial Developer are Copyright (C) 1998, 2000
|
||||
the Initial Developer. All Rights Reserved.
|
||||
|
||||
Contributor(s):
|
||||
|
||||
Alternatively, the contents of this file may be used under the terms of
|
||||
either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
of those above. If you wish to allow use of your version of this file only
|
||||
under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
use your version of this file under the terms of the MPL, indicate your
|
||||
decision by deleting the provisions above and replace them with the notice
|
||||
and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
the provisions above, a recipient may use your version of this file under
|
||||
the terms of any one of the MPL, the GPL or the LGPL.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
|
||||
$Id: mul.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
This file describes how pi is computed by the program in 'pi.c' (see
|
||||
the utils subdirectory).
|
||||
|
||||
Basically, we use Machin's formula, which is what everyone in the
|
||||
world uses as a simple method for computing approximations to pi.
|
||||
This works for up to a few thousand digits without too much effort.
|
||||
Beyond that, though, it gets too slow.
|
||||
|
||||
Machin's formula states:
|
||||
|
||||
pi := 16 * arctan(1/5) - 4 * arctan(1/239)
|
||||
|
||||
We compute this in integer arithmetic by first multiplying everything
|
||||
through by 10^d, where 'd' is the number of digits of pi we wanted to
|
||||
compute. It turns out, the last few digits will be wrong, but the
|
||||
number that are wrong is usually very small (ordinarly only 2-3).
|
||||
Having done this, we compute the arctan() function using the formula:
|
||||
|
||||
1 1 1 1 1
|
||||
arctan(1/x) := --- - ----- + ----- - ----- + ----- - ...
|
||||
x 3 x^3 5 x^5 7 x^7 9 x^9
|
||||
|
||||
This is done iteratively by computing the first term manually, and
|
||||
then iteratively dividing x^2 and k, where k = 3, 5, 7, ... out of the
|
||||
current figure. This is then added to (or subtracted from) a running
|
||||
sum, as appropriate. The iteration continues until we overflow our
|
||||
available precision and the current figure goes to zero under integer
|
||||
division. At that point, we're finished.
|
||||
|
||||
Actually, we get a couple extra bits of precision out of the fact that
|
||||
we know we're computing y * arctan(1/x), by setting up the multiplier
|
||||
as:
|
||||
|
||||
y * 10^d
|
||||
|
||||
... instead of just 10^d. There is also a bit of cleverness in how
|
||||
the loop is constructed, to avoid special-casing the first term.
|
||||
Check out the code for arctan() in 'pi.c', if you are interested in
|
||||
seeing how it is set up.
|
||||
|
||||
Thanks to Jason P. for this algorithm, which I assembled from notes
|
||||
and programs found on his cool "Pile of Pi Programs" page, at:
|
||||
|
||||
http://www.isr.umd.edu/~jasonp/pipage.html
|
||||
|
||||
Thanks also to Henrik Johansson <Henrik.Johansson@Nexus.Comm.SE>, from
|
||||
whose pi program I borrowed the clever idea of pre-multiplying by x in
|
||||
order to avoid a special case on the loop iteration.
|
||||
|
||||
------------------------------------------------------------------
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
|
||||
The contents of this file are subject to the Mozilla Public License Version
|
||||
1.1 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.mozilla.org/MPL/
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
for the specific language governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
|
||||
library.
|
||||
|
||||
The Initial Developer of the Original Code is
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Portions created by the Initial Developer are Copyright (C) 1998, 2000
|
||||
the Initial Developer. All Rights Reserved.
|
||||
|
||||
Contributor(s):
|
||||
|
||||
Alternatively, the contents of this file may be used under the terms of
|
||||
either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
of those above. If you wish to allow use of your version of this file only
|
||||
under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
use your version of this file under the terms of the MPL, indicate your
|
||||
decision by deleting the provisions above and replace them with the notice
|
||||
and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
the provisions above, a recipient may use your version of this file under
|
||||
the terms of any one of the MPL, the GPL or the LGPL.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
|
||||
$Id: pi.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,41 +0,0 @@
|
||||
=head1 NAME
|
||||
|
||||
prng - pseudo-random number generator
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
prng [count]
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
B<Prng> generates 32-bit pseudo-random integers using the
|
||||
Blum-Blum-Shub (BBS) quadratic residue generator. It is seeded using
|
||||
the standard C library's rand() function, which itself seeded from the
|
||||
system clock and the process ID number. Thus, the values generated
|
||||
are not particularly useful for cryptographic applications, but they
|
||||
are in general much better than the typical output of the usual
|
||||
multiplicative congruency generator used by most runtime libraries.
|
||||
|
||||
You may optionally specify how many random values should be generated
|
||||
by giving a I<count> argument on the command line. If you do not
|
||||
specify a count, only one random value will be generated. The results
|
||||
are output to the standard output in decimal notation, one value per
|
||||
line.
|
||||
|
||||
=head1 RESTRICTIONS
|
||||
|
||||
As stated above, B<prng> uses the C library's rand() function to seed
|
||||
the generator, so it is not terribly suitable for cryptographic
|
||||
applications. Also note that each time you run the program, a new
|
||||
seed is generated, so it is better to run it once with a I<count>
|
||||
parameter than it is to run it multiple times to generate several
|
||||
values.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved
|
||||
Thayer School of Engineering, Dartmouth College, Hanover, NH USA
|
||||
|
||||
$Date: 2000-07-14 00:44:36 $
|
||||
|
||||
@@ -1,121 +0,0 @@
|
||||
Modular Reduction
|
||||
|
||||
Usually, modular reduction is accomplished by long division, using the
|
||||
mp_div() or mp_mod() functions. However, when performing modular
|
||||
exponentiation, you spend a lot of time reducing by the same modulus
|
||||
again and again. For this purpose, doing a full division for each
|
||||
multiplication is quite inefficient.
|
||||
|
||||
For this reason, the mp_exptmod() function does not perform modular
|
||||
reductions in the usual way, but instead takes advantage of an
|
||||
algorithm due to Barrett, as described by Menezes, Oorschot and
|
||||
VanStone in their book _Handbook of Applied Cryptography_, published
|
||||
by the CRC Press (see Chapter 14 for details). This method reduces
|
||||
most of the computation of reduction to efficient shifting and masking
|
||||
operations, and avoids the multiple-precision division entirely.
|
||||
|
||||
Here is a brief synopsis of Barrett reduction, as it is implemented in
|
||||
this library.
|
||||
|
||||
Let b denote the radix of the computation (one more than the maximum
|
||||
value that can be denoted by an mp_digit). Let m be the modulus, and
|
||||
let k be the number of significant digits of m. Let x be the value to
|
||||
be reduced modulo m. By the Division Theorem, there exist unique
|
||||
integers Q and R such that:
|
||||
|
||||
x = Qm + R, 0 <= R < m
|
||||
|
||||
Barrett reduction takes advantage of the fact that you can easily
|
||||
approximate Q to within two, given a value M such that:
|
||||
|
||||
2k
|
||||
b
|
||||
M = floor( ----- )
|
||||
m
|
||||
|
||||
Computation of M requires a full-precision division step, so if you
|
||||
are only doing a single reduction by m, you gain no advantage.
|
||||
However, when multiple reductions by the same m are required, this
|
||||
division need only be done once, beforehand. Using this, we can use
|
||||
the following equation to compute Q', an approximation of Q:
|
||||
|
||||
x
|
||||
floor( ------ ) M
|
||||
k-1
|
||||
b
|
||||
Q' = floor( ----------------- )
|
||||
k+1
|
||||
b
|
||||
|
||||
The divisions by b^(k-1) and b^(k+1) and the floor() functions can be
|
||||
efficiently implemented with shifts and masks, leaving only a single
|
||||
multiplication to be performed to get this approximation. It can be
|
||||
shown that Q - 2 <= Q' <= Q, so in the worst case, we can get out with
|
||||
two additional subtractions to bring the value into line with the
|
||||
actual value of Q.
|
||||
|
||||
Once we've got Q', we basically multiply that by m and subtract from
|
||||
x, yielding:
|
||||
|
||||
x - Q'm = Qm + R - Q'm
|
||||
|
||||
Since we know the constraint on Q', this is one of:
|
||||
|
||||
R
|
||||
m + R
|
||||
2m + R
|
||||
|
||||
Since R < m by the Division Theorem, we can simply subtract off m
|
||||
until we get a value in the correct range, which will happen with no
|
||||
more than 2 subtractions:
|
||||
|
||||
v = x - Q'm
|
||||
|
||||
while(v >= m)
|
||||
v = v - m
|
||||
endwhile
|
||||
|
||||
|
||||
In random performance trials, modular exponentiation using this method
|
||||
of reduction gave around a 40% speedup over using the division for
|
||||
reduction.
|
||||
|
||||
------------------------------------------------------------------
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
|
||||
The contents of this file are subject to the Mozilla Public License Version
|
||||
1.1 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.mozilla.org/MPL/
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
for the specific language governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
|
||||
library.
|
||||
|
||||
The Initial Developer of the Original Code is
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Portions created by the Initial Developer are Copyright (C) 1998, 2000
|
||||
the Initial Developer. All Rights Reserved.
|
||||
|
||||
Contributor(s):
|
||||
|
||||
Alternatively, the contents of this file may be used under the terms of
|
||||
either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
of those above. If you wish to allow use of your version of this file only
|
||||
under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
use your version of this file under the terms of the MPL, indicate your
|
||||
decision by deleting the provisions above and replace them with the notice
|
||||
and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
the provisions above, a recipient may use your version of this file under
|
||||
the terms of any one of the MPL, the GPL or the LGPL.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
|
||||
$Id: redux.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
@@ -1,87 +0,0 @@
|
||||
Square Root
|
||||
|
||||
A simple iterative algorithm is used to compute the greatest integer
|
||||
less than or equal to the square root. Essentially, this is Newton's
|
||||
linear approximation, computed by finding successive values of the
|
||||
equation:
|
||||
|
||||
x[k]^2 - V
|
||||
x[k+1] = x[k] - ------------
|
||||
2 x[k]
|
||||
|
||||
...where V is the value for which the square root is being sought. In
|
||||
essence, what is happening here is that we guess a value for the
|
||||
square root, then figure out how far off we were by squaring our guess
|
||||
and subtracting the target. Using this value, we compute a linear
|
||||
approximation for the error, and adjust the "guess". We keep doing
|
||||
this until the precision gets low enough that the above equation
|
||||
yields a quotient of zero. At this point, our last guess is one
|
||||
greater than the square root we're seeking.
|
||||
|
||||
The initial guess is computed by dividing V by 4, which is a heuristic
|
||||
I have found to be fairly good on average. This also has the
|
||||
advantage of being very easy to compute efficiently, even for large
|
||||
values.
|
||||
|
||||
So, the resulting algorithm works as follows:
|
||||
|
||||
x = V / 4 /* compute initial guess */
|
||||
|
||||
loop
|
||||
t = (x * x) - V /* Compute absolute error */
|
||||
u = 2 * x /* Adjust by tangent slope */
|
||||
t = t / u
|
||||
|
||||
/* Loop is done if error is zero */
|
||||
if(t == 0)
|
||||
break
|
||||
|
||||
/* Adjust guess by error term */
|
||||
x = x - t
|
||||
end
|
||||
|
||||
x = x - 1
|
||||
|
||||
The result of the computation is the value of x.
|
||||
|
||||
------------------------------------------------------------------
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
|
||||
The contents of this file are subject to the Mozilla Public License Version
|
||||
1.1 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.mozilla.org/MPL/
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
for the specific language governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
|
||||
library.
|
||||
|
||||
The Initial Developer of the Original Code is
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Portions created by the Initial Developer are Copyright (C) 1998, 2000
|
||||
the Initial Developer. All Rights Reserved.
|
||||
|
||||
Contributor(s):
|
||||
|
||||
Alternatively, the contents of this file may be used under the terms of
|
||||
either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
of those above. If you wish to allow use of your version of this file only
|
||||
under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
use your version of this file under the terms of the MPL, indicate your
|
||||
decision by deleting the provisions above and replace them with the notice
|
||||
and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
the provisions above, a recipient may use your version of this file under
|
||||
the terms of any one of the MPL, the GPL or the LGPL.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
|
||||
$Id: sqrt.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
Squaring Algorithm
|
||||
|
||||
When you are squaring a value, you can take advantage of the fact that
|
||||
half the multiplications performed by the more general multiplication
|
||||
algorithm (see 'mul.txt' for a description) are redundant when the
|
||||
multiplicand equals the multiplier.
|
||||
|
||||
In particular, the modified algorithm is:
|
||||
|
||||
k = 0
|
||||
for j <- 0 to (#a - 1)
|
||||
w = c[2*j] + (a[j] ^ 2);
|
||||
k = w div R
|
||||
|
||||
for i <- j+1 to (#a - 1)
|
||||
w = (2 * a[j] * a[i]) + k + c[i+j]
|
||||
c[i+j] = w mod R
|
||||
k = w div R
|
||||
endfor
|
||||
c[i+j] = k;
|
||||
k = 0;
|
||||
endfor
|
||||
|
||||
On the surface, this looks identical to the multiplication algorithm;
|
||||
however, note the following differences:
|
||||
|
||||
- precomputation of the leading term in the outer loop
|
||||
|
||||
- i runs from j+1 instead of from zero
|
||||
|
||||
- doubling of a[i] * a[j] in the inner product
|
||||
|
||||
Unfortunately, the construction of the inner product is such that we
|
||||
need more than two digits to represent the inner product, in some
|
||||
cases. In a C implementation, this means that some gymnastics must be
|
||||
performed in order to handle overflow, for which C has no direct
|
||||
abstraction. We do this by observing the following:
|
||||
|
||||
If we have multiplied a[i] and a[j], and the product is more than half
|
||||
the maximum value expressible in two digits, then doubling this result
|
||||
will overflow into a third digit. If this occurs, we take note of the
|
||||
overflow, and double it anyway -- C integer arithmetic ignores
|
||||
overflow, so the two digits we get back should still be valid, modulo
|
||||
the overflow.
|
||||
|
||||
Having doubled this value, we now have to add in the remainders and
|
||||
the digits already computed by earlier steps. If we did not overflow
|
||||
in the previous step, we might still cause an overflow here. That
|
||||
will happen whenever the maximum value expressible in two digits, less
|
||||
the amount we have to add, is greater than the result of the previous
|
||||
step. Thus, the overflow computation is:
|
||||
|
||||
|
||||
u = 0
|
||||
w = a[i] * a[j]
|
||||
|
||||
if(w > (R - 1)/ 2)
|
||||
u = 1;
|
||||
|
||||
w = w * 2
|
||||
v = c[i + j] + k
|
||||
|
||||
if(u == 0 && (R - 1 - v) < w)
|
||||
u = 1
|
||||
|
||||
If there is an overflow, u will be 1, otherwise u will be 0. The rest
|
||||
of the parameters are the same as they are in the above description.
|
||||
|
||||
------------------------------------------------------------------
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
|
||||
The contents of this file are subject to the Mozilla Public License Version
|
||||
1.1 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.mozilla.org/MPL/
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
for the specific language governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
|
||||
library.
|
||||
|
||||
The Initial Developer of the Original Code is
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Portions created by the Initial Developer are Copyright (C) 1998, 2000
|
||||
the Initial Developer. All Rights Reserved.
|
||||
|
||||
Contributor(s):
|
||||
|
||||
Alternatively, the contents of this file may be used under the terms of
|
||||
either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
of those above. If you wish to allow use of your version of this file only
|
||||
under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
use your version of this file under the terms of the MPL, indicate your
|
||||
decision by deleting the provisions above and replace them with the notice
|
||||
and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
the provisions above, a recipient may use your version of this file under
|
||||
the terms of any one of the MPL, the GPL or the LGPL.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
|
||||
$Id: square.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
|
||||
|
||||
@@ -1,250 +0,0 @@
|
||||
MPI Library Timing Tests
|
||||
|
||||
Hardware/OS
|
||||
(A) SGI O2 1 x MIPS R10000 250MHz IRIX 6.5.3
|
||||
(B) IBM RS/6000 43P-240 1 x PowerPC 603e 223MHz AIX 4.3
|
||||
(C) Dell GX1/L+ 1 x Pentium III 550MHz Linux 2.2.12-20
|
||||
(D) PowerBook G3 1 x PowerPC 750 266MHz LinuxPPC 2.2.6-15apmac
|
||||
(E) PowerBook G3 1 x PowerPC 750 266MHz MacOS 8.5.1
|
||||
(F) PowerBook G3 1 x PowerPC 750 400MHz MacOS 9.0.2
|
||||
|
||||
Compiler
|
||||
(1) MIPSpro C 7.2.1 -O3 optimizations
|
||||
(2) GCC 2.95.1 -O3 optimizations
|
||||
(3) IBM AIX xlc -O3 optimizations (version unknown)
|
||||
(4) EGCS 2.91.66 -O3 optimizations
|
||||
(5) Metrowerks CodeWarrior 5.0 C, all optimizations
|
||||
(6) MIPSpro C 7.30 -O3 optimizations
|
||||
(7) same as (6), with optimized libmalloc.so
|
||||
|
||||
Timings are given in seconds, computed using the C library's clock()
|
||||
function. The first column gives the hardware and compiler
|
||||
configuration used for the test. The second column indicates the
|
||||
number of tests that were aggregated to get the statistics for that
|
||||
size. These were compiled using 16 bit digits.
|
||||
|
||||
Source data were generated randomly using a fixed seed, so they should
|
||||
be internally consistent, but may vary on different systems depending
|
||||
on the C library. Also, since the resolution of the timer accessed by
|
||||
clock() varies, there may be some variance in the precision of these
|
||||
measurements.
|
||||
|
||||
Prime Generation (primegen)
|
||||
|
||||
128 bits:
|
||||
A1 200 min=0.03, avg=0.19, max=0.72, sum=38.46
|
||||
A2 200 min=0.02, avg=0.16, max=0.62, sum=32.55
|
||||
B3 200 min=0.01, avg=0.07, max=0.22, sum=13.29
|
||||
C4 200 min=0.00, avg=0.03, max=0.20, sum=6.14
|
||||
D4 200 min=0.00, avg=0.05, max=0.33, sum=9.70
|
||||
A6 200 min=0.01, avg=0.09, max=0.36, sum=17.48
|
||||
A7 200 min=0.00, avg=0.05, max=0.24, sum=10.07
|
||||
|
||||
192 bits:
|
||||
A1 200 min=0.05, avg=0.45, max=3.13, sum=89.96
|
||||
A2 200 min=0.04, avg=0.39, max=2.61, sum=77.55
|
||||
B3 200 min=0.02, avg=0.18, max=1.25, sum=36.97
|
||||
C4 200 min=0.01, avg=0.09, max=0.33, sum=18.24
|
||||
D4 200 min=0.02, avg=0.15, max=0.54, sum=29.63
|
||||
A6 200 min=0.02, avg=0.24, max=1.70, sum=47.84
|
||||
A7 200 min=0.01, avg=0.15, max=1.05, sum=30.88
|
||||
|
||||
256 bits:
|
||||
A1 200 min=0.08, avg=0.92, max=6.13, sum=184.79
|
||||
A2 200 min=0.06, avg=0.76, max=5.03, sum=151.11
|
||||
B3 200 min=0.04, avg=0.41, max=2.68, sum=82.35
|
||||
C4 200 min=0.02, avg=0.19, max=0.69, sum=37.91
|
||||
D4 200 min=0.03, avg=0.31, max=1.15, sum=63.00
|
||||
A6 200 min=0.04, avg=0.48, max=3.13, sum=95.46
|
||||
A7 200 min=0.03, avg=0.37, max=2.36, sum=73.60
|
||||
|
||||
320 bits:
|
||||
A1 200 min=0.11, avg=1.59, max=6.14, sum=318.81
|
||||
A2 200 min=0.09, avg=1.27, max=4.93, sum=254.03
|
||||
B3 200 min=0.07, avg=0.82, max=3.13, sum=163.80
|
||||
C4 200 min=0.04, avg=0.44, max=1.91, sum=87.59
|
||||
D4 200 min=0.06, avg=0.73, max=3.22, sum=146.73
|
||||
A6 200 min=0.07, avg=0.93, max=3.50, sum=185.01
|
||||
A7 200 min=0.05, avg=0.76, max=2.94, sum=151.78
|
||||
|
||||
384 bits:
|
||||
A1 200 min=0.16, avg=2.69, max=11.41, sum=537.89
|
||||
A2 200 min=0.13, avg=2.15, max=9.03, sum=429.14
|
||||
B3 200 min=0.11, avg=1.54, max=6.49, sum=307.78
|
||||
C4 200 min=0.06, avg=0.81, max=4.84, sum=161.13
|
||||
D4 200 min=0.10, avg=1.38, max=8.31, sum=276.81
|
||||
A6 200 min=0.11, avg=1.73, max=7.36, sum=345.55
|
||||
A7 200 min=0.09, avg=1.46, max=6.12, sum=292.02
|
||||
|
||||
448 bits:
|
||||
A1 200 min=0.23, avg=3.36, max=15.92, sum=672.63
|
||||
A2 200 min=0.17, avg=2.61, max=12.25, sum=522.86
|
||||
B3 200 min=0.16, avg=2.10, max=9.83, sum=420.86
|
||||
C4 200 min=0.09, avg=1.44, max=7.64, sum=288.36
|
||||
D4 200 min=0.16, avg=2.50, max=13.29, sum=500.17
|
||||
A6 200 min=0.15, avg=2.31, max=10.81, sum=461.58
|
||||
A7 200 min=0.14, avg=2.03, max=9.53, sum=405.16
|
||||
|
||||
512 bits:
|
||||
A1 200 min=0.30, avg=6.12, max=22.18, sum=1223.35
|
||||
A2 200 min=0.25, avg=4.67, max=16.90, sum=933.18
|
||||
B3 200 min=0.23, avg=4.13, max=14.94, sum=825.45
|
||||
C4 200 min=0.13, avg=2.08, max=9.75, sum=415.22
|
||||
D4 200 min=0.24, avg=4.04, max=20.18, sum=808.11
|
||||
A6 200 min=0.22, avg=4.47, max=16.19, sum=893.83
|
||||
A7 200 min=0.20, avg=4.03, max=14.65, sum=806.02
|
||||
|
||||
Modular Exponentation (metime)
|
||||
|
||||
The following results are aggregated from 200 pseudo-randomly
|
||||
generated tests, based on a fixed seed.
|
||||
|
||||
base, exponent, and modulus size (bits)
|
||||
P/C 128 192 256 320 384 448 512 640 768 896 1024
|
||||
------- -----------------------------------------------------------------
|
||||
A1 0.015 0.027 0.047 0.069 0.098 0.133 0.176 0.294 0.458 0.680 1.040
|
||||
A2 0.013 0.024 0.037 0.053 0.077 0.102 0.133 0.214 0.326 0.476 0.668
|
||||
B3 0.005 0.011 0.021 0.036 0.056 0.084 0.121 0.222 0.370 0.573 0.840
|
||||
C4 0.002 0.006 0.011 0.020 0.032 0.048 0.069 0.129 0.223 0.344 0.507
|
||||
D4 0.004 0.010 0.019 0.034 0.056 0.085 0.123 0.232 0.390 0.609 0.899
|
||||
E5 0.007 0.015 0.031 0.055 0.088 0.133 0.183 0.342 0.574 0.893 1.317
|
||||
A6 0.008 0.016 0.038 0.042 0.064 0.093 0.133 0.239 0.393 0.604 0.880
|
||||
A7 0.005 0.011 0.020 0.036 0.056 0.083 0.121 0.223 0.374 0.583 0.855
|
||||
|
||||
Multiplication and Squaring tests, (mulsqr)
|
||||
|
||||
The following results are aggregated from 500000 pseudo-randomly
|
||||
generated tests, based on a per-run wall-clock seed. Times are given
|
||||
in seconds, except where indicated in microseconds (us).
|
||||
|
||||
(A1)
|
||||
|
||||
bits multiply square ad percent time/mult time/square
|
||||
64 9.33 9.15 > 1.9 18.7us 18.3us
|
||||
128 10.88 10.44 > 4.0 21.8us 20.9us
|
||||
192 13.30 11.89 > 10.6 26.7us 23.8us
|
||||
256 14.88 12.64 > 15.1 29.8us 25.3us
|
||||
320 18.64 15.01 > 19.5 37.3us 30.0us
|
||||
384 23.11 17.70 > 23.4 46.2us 35.4us
|
||||
448 28.28 20.88 > 26.2 56.6us 41.8us
|
||||
512 34.09 24.51 > 28.1 68.2us 49.0us
|
||||
640 47.86 33.25 > 30.5 95.7us 66.5us
|
||||
768 64.91 43.54 > 32.9 129.8us 87.1us
|
||||
896 84.49 55.48 > 34.3 169.0us 111.0us
|
||||
1024 107.25 69.21 > 35.5 214.5us 138.4us
|
||||
1536 227.97 141.91 > 37.8 456.0us 283.8us
|
||||
2048 394.05 242.15 > 38.5 788.1us 484.3us
|
||||
|
||||
(A2)
|
||||
|
||||
bits multiply square ad percent time/mult time/square
|
||||
64 7.87 7.95 < 1.0 15.7us 15.9us
|
||||
128 9.40 9.19 > 2.2 18.8us 18.4us
|
||||
192 11.15 10.59 > 5.0 22.3us 21.2us
|
||||
256 12.02 11.16 > 7.2 24.0us 22.3us
|
||||
320 14.62 13.43 > 8.1 29.2us 26.9us
|
||||
384 17.72 15.80 > 10.8 35.4us 31.6us
|
||||
448 21.24 18.51 > 12.9 42.5us 37.0us
|
||||
512 25.36 21.78 > 14.1 50.7us 43.6us
|
||||
640 34.57 29.00 > 16.1 69.1us 58.0us
|
||||
768 46.10 37.60 > 18.4 92.2us 75.2us
|
||||
896 58.94 47.72 > 19.0 117.9us 95.4us
|
||||
1024 73.76 59.12 > 19.8 147.5us 118.2us
|
||||
1536 152.00 118.80 > 21.8 304.0us 237.6us
|
||||
2048 259.41 199.57 > 23.1 518.8us 399.1us
|
||||
|
||||
(B3)
|
||||
|
||||
bits multiply square ad percent time/mult time/square
|
||||
64 2.60 2.47 > 5.0 5.20us 4.94us
|
||||
128 4.43 4.06 > 8.4 8.86us 8.12us
|
||||
192 7.03 6.10 > 13.2 14.1us 12.2us
|
||||
256 10.44 8.59 > 17.7 20.9us 17.2us
|
||||
320 14.44 11.64 > 19.4 28.9us 23.3us
|
||||
384 19.12 15.08 > 21.1 38.2us 30.2us
|
||||
448 24.55 19.09 > 22.2 49.1us 38.2us
|
||||
512 31.03 23.53 > 24.2 62.1us 47.1us
|
||||
640 45.05 33.80 > 25.0 90.1us 67.6us
|
||||
768 63.02 46.05 > 26.9 126.0us 92.1us
|
||||
896 83.74 60.29 > 28.0 167.5us 120.6us
|
||||
1024 106.73 76.65 > 28.2 213.5us 153.3us
|
||||
1536 228.94 160.98 > 29.7 457.9us 322.0us
|
||||
2048 398.08 275.93 > 30.7 796.2us 551.9us
|
||||
|
||||
(C4)
|
||||
|
||||
bits multiply square ad percent time/mult time/square
|
||||
64 1.34 1.28 > 4.5 2.68us 2.56us
|
||||
128 2.76 2.59 > 6.2 5.52us 5.18us
|
||||
192 4.52 4.16 > 8.0 9.04us 8.32us
|
||||
256 6.64 5.99 > 9.8 13.3us 12.0us
|
||||
320 9.20 8.13 > 11.6 18.4us 16.3us
|
||||
384 12.01 10.58 > 11.9 24.0us 21.2us
|
||||
448 15.24 13.33 > 12.5 30.5us 26.7us
|
||||
512 19.02 16.46 > 13.5 38.0us 32.9us
|
||||
640 27.56 23.54 > 14.6 55.1us 47.1us
|
||||
768 37.89 31.78 > 16.1 75.8us 63.6us
|
||||
896 49.24 41.42 > 15.9 98.5us 82.8us
|
||||
1024 62.59 52.18 > 16.6 125.2us 104.3us
|
||||
1536 131.66 107.72 > 18.2 263.3us 215.4us
|
||||
2048 226.45 182.95 > 19.2 453.0us 365.9us
|
||||
|
||||
(A7)
|
||||
|
||||
bits multiply square ad percent time/mult time/square
|
||||
64 1.74 1.71 > 1.7 3.48us 3.42us
|
||||
128 3.48 2.96 > 14.9 6.96us 5.92us
|
||||
192 5.74 4.60 > 19.9 11.5us 9.20us
|
||||
256 8.75 6.61 > 24.5 17.5us 13.2us
|
||||
320 12.5 8.99 > 28.1 25.0us 18.0us
|
||||
384 16.9 11.9 > 29.6 33.8us 23.8us
|
||||
448 22.2 15.2 > 31.7 44.4us 30.4us
|
||||
512 28.3 19.0 > 32.7 56.6us 38.0us
|
||||
640 42.4 28.0 > 34.0 84.8us 56.0us
|
||||
768 59.4 38.5 > 35.2 118.8us 77.0us
|
||||
896 79.5 51.2 > 35.6 159.0us 102.4us
|
||||
1024 102.6 65.5 > 36.2 205.2us 131.0us
|
||||
1536 224.3 140.6 > 37.3 448.6us 281.2us
|
||||
2048 393.4 244.3 > 37.9 786.8us 488.6us
|
||||
|
||||
------------------------------------------------------------------
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
|
||||
The contents of this file are subject to the Mozilla Public License Version
|
||||
1.1 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.mozilla.org/MPL/
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
for the specific language governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Original Code is the MPI Arbitrary Precision Integer Arithmetic
|
||||
library.
|
||||
|
||||
The Initial Developer of the Original Code is
|
||||
Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
Portions created by the Initial Developer are Copyright (C) 1998, 2000
|
||||
the Initial Developer. All Rights Reserved.
|
||||
|
||||
Contributor(s):
|
||||
|
||||
Alternatively, the contents of this file may be used under the terms of
|
||||
either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
of those above. If you wish to allow use of your version of this file only
|
||||
under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
use your version of this file under the terms of the MPL, indicate your
|
||||
decision by deleting the provisions above and replace them with the notice
|
||||
and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
the provisions above, a recipient may use your version of this file under
|
||||
the terms of any one of the MPL, the GPL or the LGPL.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
|
||||
$Id: timing.txt,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
|
||||
|
||||
@@ -1,640 +0,0 @@
|
||||
/*
|
||||
* The contents of this file are subject to the Mozilla Public
|
||||
* License Version 1.1 (the "License"); you may not use this file
|
||||
* except in compliance with the License. You may obtain a copy of
|
||||
* the License at http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS
|
||||
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is multacc512 multiple-precision integer arithmetic.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Hewlett-Packard Company.
|
||||
* Portions created by Hewlett-Packard Company are
|
||||
* Copyright (C) March 1999, Hewlett-Packard Company. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* coded by: Bill Worley, Hewlett-Packard labs
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the
|
||||
* terms of the GNU General Public License Version 2 or later (the
|
||||
* "GPL"), in which case the provisions of the GPL are applicable
|
||||
* instead of those above. If you wish to allow use of your
|
||||
* version of this file only under the terms of the GPL and not to
|
||||
* allow others to use your version of this file under the MPL,
|
||||
* indicate your decision by deleting the provisions above and
|
||||
* replace them with the notice and other provisions required by
|
||||
* the GPL. If you do not delete the provisions above, a recipient
|
||||
* may use your version of this file under either the MPL or the
|
||||
* GPL.
|
||||
*
|
||||
* This PA-RISC 2.0 function computes the product of two unsigned integers,
|
||||
* and adds the result to a previously computed integer. The multiplicand
|
||||
* is a 512-bit (64-byte, eight doubleword) unsigned integer, stored in
|
||||
* memory in little-double-wordian order. The multiplier is an unsigned
|
||||
* 64-bit integer. The previously computed integer to which the product is
|
||||
* added is located in the result ("res") area, and is assumed to be a
|
||||
* 576-bit (72-byte, nine doubleword) unsigned integer, stored in memory
|
||||
* in little-double-wordian order. This value normally will be the result
|
||||
* of a previously computed nine doubleword result. It is not necessary
|
||||
* to pad the multiplicand with an additional 64-bit zero doubleword.
|
||||
*
|
||||
* Multiplicand, multiplier, and addend ideally should be aligned at
|
||||
* 16-byte boundaries for best performance. The code will function
|
||||
* correctly for alignment at eight-byte boundaries which are not 16-byte
|
||||
* boundaries, but the execution may be slightly slower due to even/odd
|
||||
* bank conflicts on PA-RISC 8000 processors.
|
||||
*
|
||||
* This function is designed to accept the same calling sequence as Bill
|
||||
* Ackerman's "maxpy_little" function. The carry from the ninth doubleword
|
||||
* of the result is written to the tenth word of the result, as is done by
|
||||
* Bill Ackerman's function. The final carry also is returned as an
|
||||
* integer, which may be ignored. The function prototype may be either
|
||||
* of the following:
|
||||
*
|
||||
* void multacc512( int l, chunk* m, const chunk* a, chunk* res );
|
||||
* or
|
||||
* int multacc512( int l, chunk* m, const chunk* a, chunk* res );
|
||||
*
|
||||
* where: "l" originally denoted vector lengths. This parameter is
|
||||
* ignored. This function always assumes a multiplicand length of
|
||||
* 512 bits (eight doublewords), and addend and result lengths of
|
||||
* 576 bits (nine doublewords).
|
||||
*
|
||||
* "m" is a pointer to the doubleword multiplier, ideally aligned
|
||||
* on a 16-byte boundary.
|
||||
*
|
||||
* "a" is a pointer to the eight-doubleword multiplicand, stored
|
||||
* in little-double-wordian order, and ideally aligned on a 16-byte
|
||||
* boundary.
|
||||
*
|
||||
* "res" is a pointer to the nine doubleword addend, and to the
|
||||
* nine-doubleword product computed by this function. The result
|
||||
* also is stored in little-double-wordian order, and ideally is
|
||||
* aligned on a 16-byte boundary. It is expected that the alignment
|
||||
* of the "res" area may alternate between even/odd doubleword
|
||||
* boundaries for successive calls for 512-bit x 512-bit
|
||||
* multiplications.
|
||||
*
|
||||
* The code for this function has been scheduled to use the parallelism
|
||||
* of the PA-RISC 8000 series microprocessors as well as the author was
|
||||
* able. Comments and/or suggestions for improvement are welcomed.
|
||||
*
|
||||
* The code is "64-bit safe". This means it may be called in either
|
||||
* the 32ILP context or the 64LP context. All 64-bits of registers are
|
||||
* saved and restored.
|
||||
*
|
||||
* This code is self-contained. It requires no other header files in order
|
||||
* to compile and to be linkable on a PA-RISC 2.0 machine. Symbolic
|
||||
* definitions for registers and stack offsets are included within this
|
||||
* one source file.
|
||||
*
|
||||
* This is a leaf routine. As such, minimal use is made of the stack area.
|
||||
* Of the 192 bytes allocated, 64 bytes are used for saving/restoring eight
|
||||
* general registers, and 128 bytes are used to move intermediate products
|
||||
* from the floating-point registers to the general registers. Stack
|
||||
* protocols assure proper alignment of these areas.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* ====================================================================*/
|
||||
/* symbolic definitions for PA-RISC registers */
|
||||
/* in the MIPS style, avoids lots of case shifts */
|
||||
/* assigments (except t4) preserve register number parity */
|
||||
/* ====================================================================*/
|
||||
|
||||
#define zero %r0 /* permanent zero */
|
||||
#define t5 %r1 /* temp register, altered by addil */
|
||||
|
||||
#define rp %r2 /* return pointer */
|
||||
|
||||
#define s1 %r3 /* callee saves register*/
|
||||
#define s0 %r4 /* callee saves register*/
|
||||
#define s3 %r5 /* callee saves register*/
|
||||
#define s2 %r6 /* callee saves register*/
|
||||
#define s5 %r7 /* callee saves register*/
|
||||
#define s4 %r8 /* callee saves register*/
|
||||
#define s7 %r9 /* callee saves register*/
|
||||
#define s6 %r10 /* callee saves register*/
|
||||
|
||||
#define t1 %r19 /* caller saves register*/
|
||||
#define t0 %r20 /* caller saves register*/
|
||||
#define t3 %r21 /* caller saves register*/
|
||||
#define t2 %r22 /* caller saves register*/
|
||||
|
||||
#define a3 %r23 /* fourth argument register, high word */
|
||||
#define a2 %r24 /* third argument register, low word*/
|
||||
#define a1 %r25 /* second argument register, high word*/
|
||||
#define a0 %r26 /* first argument register, low word*/
|
||||
|
||||
#define v0 %r28 /* high order return value*/
|
||||
#define v1 %r29 /* low order return value*/
|
||||
|
||||
#define sp %r30 /* stack pointer*/
|
||||
#define t4 %r31 /* temporary register */
|
||||
|
||||
#define fa0 %fr4 /* first argument register*/
|
||||
#define fa1 %fr5 /* second argument register*/
|
||||
#define fa2 %fr6 /* third argument register*/
|
||||
#define fa3 %fr7 /* fourth argument register*/
|
||||
|
||||
#define fa0r %fr4R /* first argument register*/
|
||||
#define fa1r %fr5R /* second argument register*/
|
||||
#define fa2r %fr6R /* third argument register*/
|
||||
#define fa3r %fr7R /* fourth argument register*/
|
||||
|
||||
#define ft0 %fr8 /* caller saves register*/
|
||||
#define ft1 %fr9 /* caller saves register*/
|
||||
#define ft2 %fr10 /* caller saves register*/
|
||||
#define ft3 %fr11 /* caller saves register*/
|
||||
|
||||
#define ft0r %fr8R /* caller saves register*/
|
||||
#define ft1r %fr9R /* caller saves register*/
|
||||
#define ft2r %fr10R /* caller saves register*/
|
||||
#define ft3r %fr11R /* caller saves register*/
|
||||
|
||||
#define ft4 %fr22 /* caller saves register*/
|
||||
#define ft5 %fr23 /* caller saves register*/
|
||||
#define ft6 %fr24 /* caller saves register*/
|
||||
#define ft7 %fr25 /* caller saves register*/
|
||||
#define ft8 %fr26 /* caller saves register*/
|
||||
#define ft9 %fr27 /* caller saves register*/
|
||||
#define ft10 %fr28 /* caller saves register*/
|
||||
#define ft11 %fr29 /* caller saves register*/
|
||||
#define ft12 %fr30 /* caller saves register*/
|
||||
#define ft13 %fr31 /* caller saves register*/
|
||||
|
||||
#define ft4r %fr22R /* caller saves register*/
|
||||
#define ft5r %fr23R /* caller saves register*/
|
||||
#define ft6r %fr24R /* caller saves register*/
|
||||
#define ft7r %fr25R /* caller saves register*/
|
||||
#define ft8r %fr26R /* caller saves register*/
|
||||
#define ft9r %fr27R /* caller saves register*/
|
||||
#define ft10r %fr28R /* caller saves register*/
|
||||
#define ft11r %fr29R /* caller saves register*/
|
||||
#define ft12r %fr30R /* caller saves register*/
|
||||
#define ft13r %fr31R /* caller saves register*/
|
||||
|
||||
|
||||
|
||||
/* ================================================================== */
|
||||
/* functional definitions for PA-RISC registers */
|
||||
/* ================================================================== */
|
||||
|
||||
/* general registers */
|
||||
|
||||
#define T1 a0 /* temp, (length parameter ignored) */
|
||||
|
||||
#define pM a1 /* -> 64-bit multiplier */
|
||||
#define T2 a1 /* temp, (after fetching multiplier) */
|
||||
|
||||
#define pA a2 /* -> multiplicand vector (8 64-bit words) */
|
||||
#define T3 a2 /* temp, (after fetching multiplicand) */
|
||||
|
||||
#define pR a3 /* -> addend vector (8 64-bit doublewords,
|
||||
result vector (9 64-bit words) */
|
||||
|
||||
#define S0 s0 /* callee saves summand registers */
|
||||
#define S1 s1
|
||||
#define S2 s2
|
||||
#define S3 s3
|
||||
#define S4 s4
|
||||
#define S5 s5
|
||||
#define S6 s6
|
||||
#define S7 s7
|
||||
|
||||
#define S8 v0 /* caller saves summand registers */
|
||||
#define S9 v1
|
||||
#define S10 t0
|
||||
#define S11 t1
|
||||
#define S12 t2
|
||||
#define S13 t3
|
||||
#define S14 t4
|
||||
#define S15 t5
|
||||
|
||||
|
||||
|
||||
/* floating-point registers */
|
||||
|
||||
#define M fa0 /* multiplier double word */
|
||||
#define MR fa0r /* low order half of multiplier double word */
|
||||
#define ML fa0 /* high order half of multiplier double word */
|
||||
|
||||
#define A0 fa2 /* multiplicand double word 0 */
|
||||
#define A0R fa2r /* low order half of multiplicand double word */
|
||||
#define A0L fa2 /* high order half of multiplicand double word */
|
||||
|
||||
#define A1 fa3 /* multiplicand double word 1 */
|
||||
#define A1R fa3r /* low order half of multiplicand double word */
|
||||
#define A1L fa3 /* high order half of multiplicand double word */
|
||||
|
||||
#define A2 ft0 /* multiplicand double word 2 */
|
||||
#define A2R ft0r /* low order half of multiplicand double word */
|
||||
#define A2L ft0 /* high order half of multiplicand double word */
|
||||
|
||||
#define A3 ft1 /* multiplicand double word 3 */
|
||||
#define A3R ft1r /* low order half of multiplicand double word */
|
||||
#define A3L ft1 /* high order half of multiplicand double word */
|
||||
|
||||
#define A4 ft2 /* multiplicand double word 4 */
|
||||
#define A4R ft2r /* low order half of multiplicand double word */
|
||||
#define A4L ft2 /* high order half of multiplicand double word */
|
||||
|
||||
#define A5 ft3 /* multiplicand double word 5 */
|
||||
#define A5R ft3r /* low order half of multiplicand double word */
|
||||
#define A5L ft3 /* high order half of multiplicand double word */
|
||||
|
||||
#define A6 ft4 /* multiplicand double word 6 */
|
||||
#define A6R ft4r /* low order half of multiplicand double word */
|
||||
#define A6L ft4 /* high order half of multiplicand double word */
|
||||
|
||||
#define A7 ft5 /* multiplicand double word 7 */
|
||||
#define A7R ft5r /* low order half of multiplicand double word */
|
||||
#define A7L ft5 /* high order half of multiplicand double word */
|
||||
|
||||
#define P0 ft6 /* product word 0 */
|
||||
#define P1 ft7 /* product word 0 */
|
||||
#define P2 ft8 /* product word 0 */
|
||||
#define P3 ft9 /* product word 0 */
|
||||
#define P4 ft10 /* product word 0 */
|
||||
#define P5 ft11 /* product word 0 */
|
||||
#define P6 ft12 /* product word 0 */
|
||||
#define P7 ft13 /* product word 0 */
|
||||
|
||||
|
||||
|
||||
|
||||
/* ====================================================================== */
|
||||
/* symbolic definitions for HP-UX stack offsets */
|
||||
/* symbolic definitions for memory NOPs */
|
||||
/* ====================================================================== */
|
||||
|
||||
#define ST_SZ 192 /* stack area total size */
|
||||
|
||||
#define SV0 -192(sp) /* general register save area */
|
||||
#define SV1 -184(sp)
|
||||
#define SV2 -176(sp)
|
||||
#define SV3 -168(sp)
|
||||
#define SV4 -160(sp)
|
||||
#define SV5 -152(sp)
|
||||
#define SV6 -144(sp)
|
||||
#define SV7 -136(sp)
|
||||
|
||||
#define XF0 -128(sp) /* data transfer area */
|
||||
#define XF1 -120(sp) /* for floating-pt to integer regs */
|
||||
#define XF2 -112(sp)
|
||||
#define XF3 -104(sp)
|
||||
#define XF4 -96(sp)
|
||||
#define XF5 -88(sp)
|
||||
#define XF6 -80(sp)
|
||||
#define XF7 -72(sp)
|
||||
#define XF8 -64(sp)
|
||||
#define XF9 -56(sp)
|
||||
#define XF10 -48(sp)
|
||||
#define XF11 -40(sp)
|
||||
#define XF12 -32(sp)
|
||||
#define XF13 -24(sp)
|
||||
#define XF14 -16(sp)
|
||||
#define XF15 -8(sp)
|
||||
|
||||
#define mnop proberi (sp),3,zero /* memory NOP */
|
||||
|
||||
|
||||
|
||||
|
||||
/* ====================================================================== */
|
||||
/* assembler formalities */
|
||||
/* ====================================================================== */
|
||||
|
||||
#ifdef __LP64__
|
||||
.level 2.0W
|
||||
#else
|
||||
.level 2.0
|
||||
#endif
|
||||
.space $TEXT$
|
||||
.subspa $CODE$
|
||||
.align 16
|
||||
|
||||
/* ====================================================================== */
|
||||
/* here to compute 64-bit x 512-bit product + 512-bit addend */
|
||||
/* ====================================================================== */
|
||||
|
||||
multacc512
|
||||
.PROC
|
||||
.CALLINFO
|
||||
.ENTER
|
||||
fldd 0(pM),M ; multiplier double word
|
||||
ldo ST_SZ(sp),sp ; push stack
|
||||
|
||||
fldd 0(pA),A0 ; multiplicand double word 0
|
||||
std S1,SV1 ; save s1
|
||||
|
||||
fldd 16(pA),A2 ; multiplicand double word 2
|
||||
std S3,SV3 ; save s3
|
||||
|
||||
fldd 32(pA),A4 ; multiplicand double word 4
|
||||
std S5,SV5 ; save s5
|
||||
|
||||
fldd 48(pA),A6 ; multiplicand double word 6
|
||||
std S7,SV7 ; save s7
|
||||
|
||||
|
||||
std S0,SV0 ; save s0
|
||||
fldd 8(pA),A1 ; multiplicand double word 1
|
||||
xmpyu MR,A0L,P0 ; A0 cross 32-bit word products
|
||||
xmpyu ML,A0R,P2
|
||||
|
||||
std S2,SV2 ; save s2
|
||||
fldd 24(pA),A3 ; multiplicand double word 3
|
||||
xmpyu MR,A2L,P4 ; A2 cross 32-bit word products
|
||||
xmpyu ML,A2R,P6
|
||||
|
||||
std S4,SV4 ; save s4
|
||||
fldd 40(pA),A5 ; multiplicand double word 5
|
||||
|
||||
std S6,SV6 ; save s6
|
||||
fldd 56(pA),A7 ; multiplicand double word 7
|
||||
|
||||
|
||||
fstd P0,XF0 ; MR * A0L
|
||||
xmpyu MR,A0R,P0 ; A0 right 32-bit word product
|
||||
xmpyu MR,A1L,P1 ; A1 cross 32-bit word product
|
||||
|
||||
fstd P2,XF2 ; ML * A0R
|
||||
xmpyu ML,A0L,P2 ; A0 left 32-bit word product
|
||||
xmpyu ML,A1R,P3 ; A1 cross 32-bit word product
|
||||
|
||||
fstd P4,XF4 ; MR * A2L
|
||||
xmpyu MR,A2R,P4 ; A2 right 32-bit word product
|
||||
xmpyu MR,A3L,P5 ; A3 cross 32-bit word product
|
||||
|
||||
fstd P6,XF6 ; ML * A2R
|
||||
xmpyu ML,A2L,P6 ; A2 parallel 32-bit word product
|
||||
xmpyu ML,A3R,P7 ; A3 cross 32-bit word product
|
||||
|
||||
|
||||
ldd XF0,S0 ; MR * A0L
|
||||
fstd P1,XF1 ; MR * A1L
|
||||
|
||||
ldd XF2,S2 ; ML * A0R
|
||||
fstd P3,XF3 ; ML * A1R
|
||||
|
||||
ldd XF4,S4 ; MR * A2L
|
||||
fstd P5,XF5 ; MR * A3L
|
||||
xmpyu MR,A1R,P1 ; A1 parallel 32-bit word products
|
||||
xmpyu ML,A1L,P3
|
||||
|
||||
ldd XF6,S6 ; ML * A2R
|
||||
fstd P7,XF7 ; ML * A3R
|
||||
xmpyu MR,A3R,P5 ; A3 parallel 32-bit word products
|
||||
xmpyu ML,A3L,P7
|
||||
|
||||
|
||||
fstd P0,XF0 ; MR * A0R
|
||||
ldd XF1,S1 ; MR * A1L
|
||||
nop
|
||||
add S0,S2,T1 ; A0 cross product sum
|
||||
|
||||
fstd P2,XF2 ; ML * A0L
|
||||
ldd XF3,S3 ; ML * A1R
|
||||
add,dc zero,zero,S0 ; A0 cross product sum carry
|
||||
depd,z T1,31,32,S2 ; A0 cross product sum << 32
|
||||
|
||||
fstd P4,XF4 ; MR * A2R
|
||||
ldd XF5,S5 ; MR * A3L
|
||||
shrpd S0,T1,32,S0 ; A0 carry | cross product sum >> 32
|
||||
add S4,S6,T3 ; A2 cross product sum
|
||||
|
||||
fstd P6,XF6 ; ML * A2L
|
||||
ldd XF7,S7 ; ML * A3R
|
||||
add,dc zero,zero,S4 ; A2 cross product sum carry
|
||||
depd,z T3,31,32,S6 ; A2 cross product sum << 32
|
||||
|
||||
|
||||
ldd XF0,S8 ; MR * A0R
|
||||
fstd P1,XF1 ; MR * A1R
|
||||
xmpyu MR,A4L,P0 ; A4 cross 32-bit word product
|
||||
xmpyu MR,A5L,P1 ; A5 cross 32-bit word product
|
||||
|
||||
ldd XF2,S10 ; ML * A0L
|
||||
fstd P3,XF3 ; ML * A1L
|
||||
xmpyu ML,A4R,P2 ; A4 cross 32-bit word product
|
||||
xmpyu ML,A5R,P3 ; A5 cross 32-bit word product
|
||||
|
||||
ldd XF4,S12 ; MR * A2R
|
||||
fstd P5,XF5 ; MR * A3L
|
||||
xmpyu MR,A6L,P4 ; A6 cross 32-bit word product
|
||||
xmpyu MR,A7L,P5 ; A7 cross 32-bit word product
|
||||
|
||||
ldd XF6,S14 ; ML * A2L
|
||||
fstd P7,XF7 ; ML * A3L
|
||||
xmpyu ML,A6R,P6 ; A6 cross 32-bit word product
|
||||
xmpyu ML,A7R,P7 ; A7 cross 32-bit word product
|
||||
|
||||
|
||||
fstd P0,XF0 ; MR * A4L
|
||||
ldd XF1,S9 ; MR * A1R
|
||||
shrpd S4,T3,32,S4 ; A2 carry | cross product sum >> 32
|
||||
add S1,S3,T1 ; A1 cross product sum
|
||||
|
||||
fstd P2,XF2 ; ML * A4R
|
||||
ldd XF3,S11 ; ML * A1L
|
||||
add,dc zero,zero,S1 ; A1 cross product sum carry
|
||||
depd,z T1,31,32,S3 ; A1 cross product sum << 32
|
||||
|
||||
fstd P4,XF4 ; MR * A6L
|
||||
ldd XF5,S13 ; MR * A3R
|
||||
shrpd S1,T1,32,S1 ; A1 carry | cross product sum >> 32
|
||||
add S5,S7,T3 ; A3 cross product sum
|
||||
|
||||
fstd P6,XF6 ; ML * A6R
|
||||
ldd XF7,S15 ; ML * A3L
|
||||
add,dc zero,zero,S5 ; A3 cross product sum carry
|
||||
depd,z T3,31,32,S7 ; A3 cross product sum << 32
|
||||
|
||||
|
||||
shrpd S5,T3,32,S5 ; A3 carry | cross product sum >> 32
|
||||
add S2,S8,S8 ; M * A0 right doubleword, P0 doubleword
|
||||
|
||||
add,dc S0,S10,S10 ; M * A0 left doubleword
|
||||
add S3,S9,S9 ; M * A1 right doubleword
|
||||
|
||||
add,dc S1,S11,S11 ; M * A1 left doubleword
|
||||
add S6,S12,S12 ; M * A2 right doubleword
|
||||
|
||||
|
||||
ldd 24(pR),S3 ; Addend word 3
|
||||
fstd P1,XF1 ; MR * A5L
|
||||
add,dc S4,S14,S14 ; M * A2 left doubleword
|
||||
xmpyu MR,A5R,P1 ; A5 right 32-bit word product
|
||||
|
||||
ldd 8(pR),S1 ; Addend word 1
|
||||
fstd P3,XF3 ; ML * A5R
|
||||
add S7,S13,S13 ; M * A3 right doubleword
|
||||
xmpyu ML,A5L,P3 ; A5 left 32-bit word product
|
||||
|
||||
ldd 0(pR),S7 ; Addend word 0
|
||||
fstd P5,XF5 ; MR * A7L
|
||||
add,dc S5,S15,S15 ; M * A3 left doubleword
|
||||
xmpyu MR,A7R,P5 ; A7 right 32-bit word product
|
||||
|
||||
ldd 16(pR),S5 ; Addend word 2
|
||||
fstd P7,XF7 ; ML * A7R
|
||||
add S10,S9,S9 ; P1 doubleword
|
||||
xmpyu ML,A7L,P7 ; A7 left 32-bit word products
|
||||
|
||||
|
||||
ldd XF0,S0 ; MR * A4L
|
||||
fstd P1,XF9 ; MR * A5R
|
||||
add,dc S11,S12,S12 ; P2 doubleword
|
||||
xmpyu MR,A4R,P0 ; A4 right 32-bit word product
|
||||
|
||||
ldd XF2,S2 ; ML * A4R
|
||||
fstd P3,XF11 ; ML * A5L
|
||||
add,dc S14,S13,S13 ; P3 doubleword
|
||||
xmpyu ML,A4L,P2 ; A4 left 32-bit word product
|
||||
|
||||
ldd XF6,S6 ; ML * A6R
|
||||
fstd P5,XF13 ; MR * A7R
|
||||
add,dc zero,S15,T2 ; P4 partial doubleword
|
||||
xmpyu MR,A6R,P4 ; A6 right 32-bit word product
|
||||
|
||||
ldd XF4,S4 ; MR * A6L
|
||||
fstd P7,XF15 ; ML * A7L
|
||||
add S7,S8,S8 ; R0 + P0, new R0 doubleword
|
||||
xmpyu ML,A6L,P6 ; A6 left 32-bit word product
|
||||
|
||||
|
||||
fstd P0,XF0 ; MR * A4R
|
||||
ldd XF7,S7 ; ML * A7R
|
||||
add,dc S1,S9,S9 ; c + R1 + P1, new R1 doubleword
|
||||
|
||||
fstd P2,XF2 ; ML * A4L
|
||||
ldd XF1,S1 ; MR * A5L
|
||||
add,dc S5,S12,S12 ; c + R2 + P2, new R2 doubleword
|
||||
|
||||
fstd P4,XF4 ; MR * A6R
|
||||
ldd XF5,S5 ; MR * A7L
|
||||
add,dc S3,S13,S13 ; c + R3 + P3, new R3 doubleword
|
||||
|
||||
fstd P6,XF6 ; ML * A6L
|
||||
ldd XF3,S3 ; ML * A5R
|
||||
add,dc zero,T2,T2 ; c + partial P4
|
||||
add S0,S2,T1 ; A4 cross product sum
|
||||
|
||||
|
||||
std S8,0(pR) ; save R0
|
||||
add,dc zero,zero,S0 ; A4 cross product sum carry
|
||||
depd,z T1,31,32,S2 ; A4 cross product sum << 32
|
||||
|
||||
std S9,8(pR) ; save R1
|
||||
shrpd S0,T1,32,S0 ; A4 carry | cross product sum >> 32
|
||||
add S4,S6,T3 ; A6 cross product sum
|
||||
|
||||
std S12,16(pR) ; save R2
|
||||
add,dc zero,zero,S4 ; A6 cross product sum carry
|
||||
depd,z T3,31,32,S6 ; A6 cross product sum << 32
|
||||
|
||||
|
||||
std S13,24(pR) ; save R3
|
||||
shrpd S4,T3,32,S4 ; A6 carry | cross product sum >> 32
|
||||
add S1,S3,T1 ; A5 cross product sum
|
||||
|
||||
ldd XF0,S8 ; MR * A4R
|
||||
add,dc zero,zero,S1 ; A5 cross product sum carry
|
||||
depd,z T1,31,32,S3 ; A5 cross product sum << 32
|
||||
|
||||
ldd XF2,S10 ; ML * A4L
|
||||
ldd XF9,S9 ; MR * A5R
|
||||
shrpd S1,T1,32,S1 ; A5 carry | cross product sum >> 32
|
||||
add S5,S7,T3 ; A7 cross product sum
|
||||
|
||||
ldd XF4,S12 ; MR * A6R
|
||||
ldd XF11,S11 ; ML * A5L
|
||||
add,dc zero,zero,S5 ; A7 cross product sum carry
|
||||
depd,z T3,31,32,S7 ; A7 cross product sum << 32
|
||||
|
||||
ldd XF6,S14 ; ML * A6L
|
||||
ldd XF13,S13 ; MR * A7R
|
||||
shrpd S5,T3,32,S5 ; A7 carry | cross product sum >> 32
|
||||
add S2,S8,S8 ; M * A4 right doubleword
|
||||
|
||||
|
||||
ldd XF15,S15 ; ML * A7L
|
||||
add,dc S0,S10,S10 ; M * A4 left doubleword
|
||||
add S3,S9,S9 ; M * A5 right doubleword
|
||||
|
||||
add,dc S1,S11,S11 ; M * A5 left doubleword
|
||||
add S6,S12,S12 ; M * A6 right doubleword
|
||||
|
||||
ldd 32(pR),S0 ; Addend word 4
|
||||
ldd 40(pR),S1 ; Addend word 5
|
||||
add,dc S4,S14,S14 ; M * A6 left doubleword
|
||||
add S7,S13,S13 ; M * A7 right doubleword
|
||||
|
||||
ldd 48(pR),S2 ; Addend word 6
|
||||
ldd 56(pR),S3 ; Addend word 7
|
||||
add,dc S5,S15,S15 ; M * A7 left doubleword
|
||||
add S8,T2,S8 ; P4 doubleword
|
||||
|
||||
ldd 64(pR),S4 ; Addend word 8
|
||||
ldd SV5,s5 ; restore s5
|
||||
add,dc S10,S9,S9 ; P5 doubleword
|
||||
add,dc S11,S12,S12 ; P6 doubleword
|
||||
|
||||
|
||||
ldd SV6,s6 ; restore s6
|
||||
ldd SV7,s7 ; restore s7
|
||||
add,dc S14,S13,S13 ; P7 doubleword
|
||||
add,dc zero,S15,S15 ; P8 doubleword
|
||||
|
||||
add S0,S8,S8 ; new R4 doubleword
|
||||
|
||||
ldd SV0,s0 ; restore s0
|
||||
std S8,32(pR) ; save R4
|
||||
add,dc S1,S9,S9 ; new R5 doubleword
|
||||
|
||||
ldd SV1,s1 ; restore s1
|
||||
std S9,40(pR) ; save R5
|
||||
add,dc S2,S12,S12 ; new R6 doubleword
|
||||
|
||||
ldd SV2,s2 ; restore s2
|
||||
std S12,48(pR) ; save R6
|
||||
add,dc S3,S13,S13 ; new R7 doubleword
|
||||
|
||||
ldd SV3,s3 ; restore s3
|
||||
std S13,56(pR) ; save R7
|
||||
add,dc S4,S15,S15 ; new R8 doubleword
|
||||
|
||||
ldd SV4,s4 ; restore s4
|
||||
std S15,64(pR) ; save result[8]
|
||||
add,dc zero,zero,v0 ; return carry from R8
|
||||
|
||||
CMPIB,*= 0,v0,$L0 ; if no overflow, exit
|
||||
LDO 8(pR),pR
|
||||
|
||||
$FINAL1 ; Final carry propagation
|
||||
LDD 64(pR),v0
|
||||
LDO 8(pR),pR
|
||||
ADDI 1,v0,v0
|
||||
CMPIB,*= 0,v0,$FINAL1 ; Keep looping if there is a carry.
|
||||
STD v0,56(pR)
|
||||
$L0
|
||||
bv zero(rp) ; -> caller
|
||||
ldo -ST_SZ(sp),sp ; pop stack
|
||||
|
||||
/* ====================================================================== */
|
||||
/* end of module */
|
||||
/* ====================================================================== */
|
||||
|
||||
.LEAVE
|
||||
|
||||
.PROCEND
|
||||
.SPACE $TEXT$
|
||||
.SUBSPA $CODE$
|
||||
.EXPORT multacc512,ENTRY
|
||||
|
||||
.end
|
||||
@@ -1,929 +0,0 @@
|
||||
; The contents of this file are subject to the Mozilla Public
|
||||
; License Version 1.1 (the "License"); you may not use this file
|
||||
; except in compliance with the License. You may obtain a copy of
|
||||
; the License at http://www.mozilla.org/MPL/
|
||||
;
|
||||
; Software distributed under the License is distributed on an "AS
|
||||
; IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
; implied. See the License for the specific language governing
|
||||
; rights and limitations under the License.
|
||||
;
|
||||
; The Original Code is MAXPY multiple-precision integer arithmetic.
|
||||
;
|
||||
; The Initial Developer of the Original Code is the Hewlett-Packard Company.
|
||||
; Portions created by Hewlett-Packard Company are
|
||||
; Copyright (C) 1997 Hewlett-Packard Company. All Rights Reserved.
|
||||
;
|
||||
; Contributor(s):
|
||||
; coded by: William B. Ackerman
|
||||
;
|
||||
; Alternatively, the contents of this file may be used under the
|
||||
; terms of the GNU General Public License Version 2 or later (the
|
||||
; "GPL"), in which case the provisions of the GPL are applicable
|
||||
; instead of those above. If you wish to allow use of your
|
||||
; version of this file only under the terms of the GPL and not to
|
||||
; allow others to use your version of this file under the MPL,
|
||||
; indicate your decision by deleting the provisions above and
|
||||
; replace them with the notice and other provisions required by
|
||||
; the GPL. If you do not delete the provisions above, a recipient
|
||||
; may use your version of this file under either the MPL or the
|
||||
; GPL.
|
||||
|
||||
#ifdef __LP64__
|
||||
.LEVEL 2.0W
|
||||
#else
|
||||
; .LEVEL 1.1
|
||||
; .ALLOW 2.0N
|
||||
.LEVEL 2.0N
|
||||
#endif
|
||||
.SPACE $TEXT$,SORT=8
|
||||
.SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24
|
||||
|
||||
; ***************************************************************
|
||||
;
|
||||
; maxpy_[little/big]
|
||||
;
|
||||
; ***************************************************************
|
||||
|
||||
; There is no default -- you must specify one or the other.
|
||||
#define LITTLE_WORDIAN 1
|
||||
|
||||
#ifdef LITTLE_WORDIAN
|
||||
#define EIGHT 8
|
||||
#define SIXTEEN 16
|
||||
#define THIRTY_TWO 32
|
||||
#define UN_EIGHT -8
|
||||
#define UN_SIXTEEN -16
|
||||
#define UN_TWENTY_FOUR -24
|
||||
#endif
|
||||
|
||||
#ifdef BIG_WORDIAN
|
||||
#define EIGHT -8
|
||||
#define SIXTEEN -16
|
||||
#define THIRTY_TWO -32
|
||||
#define UN_EIGHT 8
|
||||
#define UN_SIXTEEN 16
|
||||
#define UN_TWENTY_FOUR 24
|
||||
#endif
|
||||
|
||||
; This performs a multiple-precision integer version of "daxpy",
|
||||
; Using the selected addressing direction. "Little-wordian" means that
|
||||
; the least significant word of a number is stored at the lowest address.
|
||||
; "Big-wordian" means that the most significant word is at the lowest
|
||||
; address. Either way, the incoming address of the vector is that
|
||||
; of the least significant word. That means that, for little-wordian
|
||||
; addressing, we move the address upward as we propagate carries
|
||||
; from the least significant word to the most significant. For
|
||||
; big-wordian we move the address downward.
|
||||
|
||||
; We use the following registers:
|
||||
;
|
||||
; r2 return PC, of course
|
||||
; r26 = arg1 = length
|
||||
; r25 = arg2 = address of scalar
|
||||
; r24 = arg3 = multiplicand vector
|
||||
; r23 = arg4 = result vector
|
||||
;
|
||||
; fr9 = scalar loaded once only from r25
|
||||
|
||||
; The cycle counts shown in the bodies below are simply the result of a
|
||||
; scheduling by hand. The actual PCX-U hardware does it differently.
|
||||
; The intention is that the overall speed is the same.
|
||||
|
||||
; The pipeline startup and shutdown code is constructed in the usual way,
|
||||
; by taking the loop bodies and removing unnecessary instructions.
|
||||
; We have left the comments describing cycle numbers in the code.
|
||||
; These are intended for reference when comparing with the main loop,
|
||||
; and have no particular relationship to actual cycle numbers.
|
||||
|
||||
#ifdef LITTLE_WORDIAN
|
||||
maxpy_little
|
||||
#else
|
||||
maxpy_big
|
||||
#endif
|
||||
.PROC
|
||||
.CALLINFO FRAME=120,ENTRY_GR=%r4
|
||||
.ENTER
|
||||
|
||||
; Of course, real men don't use the sissy "enter" and "leave" commands.
|
||||
; They write their own stack manipulation stuff. Unfortunately,
|
||||
; that doesn't generate complete unwind info, whereas "enter" and
|
||||
; "leave" (if the documentation is to be believed) do so. Therefore,
|
||||
; we use the sissy commands. We have verified (by real-man methods)
|
||||
; that the above command generates what we want:
|
||||
; STW,MA %r3,128(%sp)
|
||||
; STW %r4,-124(%sp)
|
||||
|
||||
ADDIB,< -1,%r26,$L0 ; If N = 0, exit immediately.
|
||||
FLDD 0(%r25),%fr9 ; fr9 = scalar
|
||||
|
||||
; First startup
|
||||
|
||||
FLDD 0(%r24),%fr24 ; Cycle 1
|
||||
XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
|
||||
XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
|
||||
XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
|
||||
CMPIB,> 3,%r26,$N_IS_SMALL ; Pick out cases N = 1, 2, or 3
|
||||
XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
|
||||
FLDD EIGHT(%r24),%fr28 ; Cycle 8
|
||||
XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
|
||||
FSTD %fr24,-96(%sp)
|
||||
XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
|
||||
FSTD %fr25,-80(%sp)
|
||||
LDO SIXTEEN(%r24),%r24 ; Cycle 12
|
||||
FSTD %fr31,-64(%sp)
|
||||
XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
|
||||
FSTD %fr27,-48(%sp)
|
||||
|
||||
; Second startup
|
||||
|
||||
XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
|
||||
FSTD %fr30,-56(%sp)
|
||||
FLDD 0(%r24),%fr24
|
||||
|
||||
FSTD %fr26,-88(%sp) ; Cycle 2
|
||||
|
||||
XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
|
||||
FSTD %fr28,-104(%sp)
|
||||
|
||||
XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
|
||||
LDD -96(%sp),%r3
|
||||
FSTD %fr29,-72(%sp)
|
||||
|
||||
XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
|
||||
LDD -64(%sp),%r19
|
||||
LDD -80(%sp),%r21
|
||||
|
||||
XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
|
||||
LDD -56(%sp),%r20
|
||||
ADD %r21,%r3,%r3
|
||||
|
||||
ADD,DC %r20,%r19,%r19 ; Cycle 7
|
||||
LDD -88(%sp),%r4
|
||||
SHRPD %r3,%r0,32,%r21
|
||||
LDD -48(%sp),%r1
|
||||
|
||||
FLDD EIGHT(%r24),%fr28 ; Cycle 8
|
||||
LDD -104(%sp),%r31
|
||||
ADD,DC %r0,%r0,%r20
|
||||
SHRPD %r19,%r3,32,%r3
|
||||
|
||||
LDD -72(%sp),%r29 ; Cycle 9
|
||||
SHRPD %r20,%r19,32,%r20
|
||||
ADD %r21,%r1,%r1
|
||||
|
||||
XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
|
||||
ADD,DC %r3,%r4,%r4
|
||||
FSTD %fr24,-96(%sp)
|
||||
|
||||
XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
|
||||
ADD,DC %r0,%r20,%r20
|
||||
LDD 0(%r23),%r3
|
||||
FSTD %fr25,-80(%sp)
|
||||
|
||||
LDO SIXTEEN(%r24),%r24 ; Cycle 12
|
||||
FSTD %fr31,-64(%sp)
|
||||
|
||||
XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
|
||||
ADD %r0,%r0,%r0 ; clear the carry bit
|
||||
ADDIB,<= -4,%r26,$ENDLOOP ; actually happens in cycle 12
|
||||
FSTD %fr27,-48(%sp)
|
||||
; MFCTL %cr16,%r21 ; for timing
|
||||
; STD %r21,-112(%sp)
|
||||
|
||||
; Here is the loop.
|
||||
|
||||
$LOOP XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
|
||||
ADD,DC %r29,%r4,%r4
|
||||
FSTD %fr30,-56(%sp)
|
||||
FLDD 0(%r24),%fr24
|
||||
|
||||
LDO SIXTEEN(%r23),%r23 ; Cycle 2
|
||||
ADD,DC %r0,%r20,%r20
|
||||
FSTD %fr26,-88(%sp)
|
||||
|
||||
XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
|
||||
ADD %r3,%r1,%r1
|
||||
FSTD %fr28,-104(%sp)
|
||||
LDD UN_EIGHT(%r23),%r21
|
||||
|
||||
XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
|
||||
ADD,DC %r21,%r4,%r28
|
||||
FSTD %fr29,-72(%sp)
|
||||
LDD -96(%sp),%r3
|
||||
|
||||
XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
|
||||
ADD,DC %r20,%r31,%r22
|
||||
LDD -64(%sp),%r19
|
||||
LDD -80(%sp),%r21
|
||||
|
||||
XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
|
||||
ADD %r21,%r3,%r3
|
||||
LDD -56(%sp),%r20
|
||||
STD %r1,UN_SIXTEEN(%r23)
|
||||
|
||||
ADD,DC %r20,%r19,%r19 ; Cycle 7
|
||||
SHRPD %r3,%r0,32,%r21
|
||||
LDD -88(%sp),%r4
|
||||
LDD -48(%sp),%r1
|
||||
|
||||
ADD,DC %r0,%r0,%r20 ; Cycle 8
|
||||
SHRPD %r19,%r3,32,%r3
|
||||
FLDD EIGHT(%r24),%fr28
|
||||
LDD -104(%sp),%r31
|
||||
|
||||
SHRPD %r20,%r19,32,%r20 ; Cycle 9
|
||||
ADD %r21,%r1,%r1
|
||||
STD %r28,UN_EIGHT(%r23)
|
||||
LDD -72(%sp),%r29
|
||||
|
||||
XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
|
||||
ADD,DC %r3,%r4,%r4
|
||||
FSTD %fr24,-96(%sp)
|
||||
|
||||
XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
|
||||
ADD,DC %r0,%r20,%r20
|
||||
FSTD %fr25,-80(%sp)
|
||||
LDD 0(%r23),%r3
|
||||
|
||||
LDO SIXTEEN(%r24),%r24 ; Cycle 12
|
||||
FSTD %fr31,-64(%sp)
|
||||
|
||||
XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
|
||||
ADD %r22,%r1,%r1
|
||||
ADDIB,> -2,%r26,$LOOP ; actually happens in cycle 12
|
||||
FSTD %fr27,-48(%sp)
|
||||
|
||||
$ENDLOOP
|
||||
|
||||
; Shutdown code, first stage.
|
||||
|
||||
; MFCTL %cr16,%r21 ; for timing
|
||||
; STD %r21,UN_SIXTEEN(%r23)
|
||||
; LDD -112(%sp),%r21
|
||||
; STD %r21,UN_EIGHT(%r23)
|
||||
|
||||
XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
|
||||
ADD,DC %r29,%r4,%r4
|
||||
CMPIB,= 0,%r26,$ONEMORE
|
||||
FSTD %fr30,-56(%sp)
|
||||
|
||||
LDO SIXTEEN(%r23),%r23 ; Cycle 2
|
||||
ADD,DC %r0,%r20,%r20
|
||||
FSTD %fr26,-88(%sp)
|
||||
|
||||
ADD %r3,%r1,%r1 ; Cycle 3
|
||||
FSTD %fr28,-104(%sp)
|
||||
LDD UN_EIGHT(%r23),%r21
|
||||
|
||||
ADD,DC %r21,%r4,%r28 ; Cycle 4
|
||||
FSTD %fr29,-72(%sp)
|
||||
STD %r28,UN_EIGHT(%r23) ; moved up from cycle 9
|
||||
LDD -96(%sp),%r3
|
||||
|
||||
ADD,DC %r20,%r31,%r22 ; Cycle 5
|
||||
STD %r1,UN_SIXTEEN(%r23)
|
||||
$JOIN4
|
||||
LDD -64(%sp),%r19
|
||||
LDD -80(%sp),%r21
|
||||
|
||||
ADD %r21,%r3,%r3 ; Cycle 6
|
||||
LDD -56(%sp),%r20
|
||||
|
||||
ADD,DC %r20,%r19,%r19 ; Cycle 7
|
||||
SHRPD %r3,%r0,32,%r21
|
||||
LDD -88(%sp),%r4
|
||||
LDD -48(%sp),%r1
|
||||
|
||||
ADD,DC %r0,%r0,%r20 ; Cycle 8
|
||||
SHRPD %r19,%r3,32,%r3
|
||||
LDD -104(%sp),%r31
|
||||
|
||||
SHRPD %r20,%r19,32,%r20 ; Cycle 9
|
||||
ADD %r21,%r1,%r1
|
||||
LDD -72(%sp),%r29
|
||||
|
||||
ADD,DC %r3,%r4,%r4 ; Cycle 10
|
||||
|
||||
ADD,DC %r0,%r20,%r20 ; Cycle 11
|
||||
LDD 0(%r23),%r3
|
||||
|
||||
ADD %r22,%r1,%r1 ; Cycle 13
|
||||
|
||||
; Shutdown code, second stage.
|
||||
|
||||
ADD,DC %r29,%r4,%r4 ; Cycle 1
|
||||
|
||||
LDO SIXTEEN(%r23),%r23 ; Cycle 2
|
||||
ADD,DC %r0,%r20,%r20
|
||||
|
||||
LDD UN_EIGHT(%r23),%r21 ; Cycle 3
|
||||
ADD %r3,%r1,%r1
|
||||
|
||||
ADD,DC %r21,%r4,%r28 ; Cycle 4
|
||||
|
||||
ADD,DC %r20,%r31,%r22 ; Cycle 5
|
||||
|
||||
STD %r1,UN_SIXTEEN(%r23); Cycle 6
|
||||
|
||||
STD %r28,UN_EIGHT(%r23) ; Cycle 9
|
||||
|
||||
LDD 0(%r23),%r3 ; Cycle 11
|
||||
|
||||
; Shutdown code, third stage.
|
||||
|
||||
LDO SIXTEEN(%r23),%r23
|
||||
ADD %r3,%r22,%r1
|
||||
$JOIN1 ADD,DC %r0,%r0,%r21
|
||||
CMPIB,*= 0,%r21,$L0 ; if no overflow, exit
|
||||
STD %r1,UN_SIXTEEN(%r23)
|
||||
|
||||
; Final carry propagation
|
||||
|
||||
$FINAL1 LDO EIGHT(%r23),%r23
|
||||
LDD UN_SIXTEEN(%r23),%r21
|
||||
ADDI 1,%r21,%r21
|
||||
CMPIB,*= 0,%r21,$FINAL1 ; Keep looping if there is a carry.
|
||||
STD %r21,UN_SIXTEEN(%r23)
|
||||
B $L0
|
||||
NOP
|
||||
|
||||
; Here is the code that handles the difficult cases N=1, N=2, and N=3.
|
||||
; We do the usual trick -- branch out of the startup code at appropriate
|
||||
; points, and branch into the shutdown code.
|
||||
|
||||
$N_IS_SMALL
|
||||
CMPIB,= 0,%r26,$N_IS_ONE
|
||||
FSTD %fr24,-96(%sp) ; Cycle 10
|
||||
FLDD EIGHT(%r24),%fr28 ; Cycle 8
|
||||
XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
|
||||
XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
|
||||
FSTD %fr25,-80(%sp)
|
||||
FSTD %fr31,-64(%sp) ; Cycle 12
|
||||
XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
|
||||
FSTD %fr27,-48(%sp)
|
||||
XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
|
||||
CMPIB,= 2,%r26,$N_IS_THREE
|
||||
FSTD %fr30,-56(%sp)
|
||||
|
||||
; N = 2
|
||||
FSTD %fr26,-88(%sp) ; Cycle 2
|
||||
FSTD %fr28,-104(%sp) ; Cycle 3
|
||||
LDD -96(%sp),%r3 ; Cycle 4
|
||||
FSTD %fr29,-72(%sp)
|
||||
B $JOIN4
|
||||
ADD %r0,%r0,%r22
|
||||
|
||||
$N_IS_THREE
|
||||
FLDD SIXTEEN(%r24),%fr24
|
||||
FSTD %fr26,-88(%sp) ; Cycle 2
|
||||
XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
|
||||
FSTD %fr28,-104(%sp)
|
||||
XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
|
||||
LDD -96(%sp),%r3
|
||||
FSTD %fr29,-72(%sp)
|
||||
XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
|
||||
LDD -64(%sp),%r19
|
||||
LDD -80(%sp),%r21
|
||||
B $JOIN3
|
||||
ADD %r0,%r0,%r22
|
||||
|
||||
$N_IS_ONE
|
||||
FSTD %fr25,-80(%sp)
|
||||
FSTD %fr27,-48(%sp)
|
||||
FSTD %fr26,-88(%sp) ; Cycle 2
|
||||
B $JOIN5
|
||||
ADD %r0,%r0,%r22
|
||||
|
||||
; We came out of the unrolled loop with wrong parity. Do one more
|
||||
; single cycle. This is quite tricky, because of the way the
|
||||
; carry chains and SHRPD chains have been chopped up.
|
||||
|
||||
$ONEMORE
|
||||
|
||||
FLDD 0(%r24),%fr24
|
||||
|
||||
LDO SIXTEEN(%r23),%r23 ; Cycle 2
|
||||
ADD,DC %r0,%r20,%r20
|
||||
FSTD %fr26,-88(%sp)
|
||||
|
||||
XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
|
||||
FSTD %fr28,-104(%sp)
|
||||
LDD UN_EIGHT(%r23),%r21
|
||||
ADD %r3,%r1,%r1
|
||||
|
||||
XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
|
||||
ADD,DC %r21,%r4,%r28
|
||||
STD %r28,UN_EIGHT(%r23) ; moved from cycle 9
|
||||
LDD -96(%sp),%r3
|
||||
FSTD %fr29,-72(%sp)
|
||||
|
||||
XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
|
||||
ADD,DC %r20,%r31,%r22
|
||||
LDD -64(%sp),%r19
|
||||
LDD -80(%sp),%r21
|
||||
|
||||
STD %r1,UN_SIXTEEN(%r23); Cycle 6
|
||||
$JOIN3
|
||||
XMPYU %fr9L,%fr24R,%fr24
|
||||
LDD -56(%sp),%r20
|
||||
ADD %r21,%r3,%r3
|
||||
|
||||
ADD,DC %r20,%r19,%r19 ; Cycle 7
|
||||
LDD -88(%sp),%r4
|
||||
SHRPD %r3,%r0,32,%r21
|
||||
LDD -48(%sp),%r1
|
||||
|
||||
LDD -104(%sp),%r31 ; Cycle 8
|
||||
ADD,DC %r0,%r0,%r20
|
||||
SHRPD %r19,%r3,32,%r3
|
||||
|
||||
LDD -72(%sp),%r29 ; Cycle 9
|
||||
SHRPD %r20,%r19,32,%r20
|
||||
ADD %r21,%r1,%r1
|
||||
|
||||
ADD,DC %r3,%r4,%r4 ; Cycle 10
|
||||
FSTD %fr24,-96(%sp)
|
||||
|
||||
ADD,DC %r0,%r20,%r20 ; Cycle 11
|
||||
LDD 0(%r23),%r3
|
||||
FSTD %fr25,-80(%sp)
|
||||
|
||||
ADD %r22,%r1,%r1 ; Cycle 13
|
||||
FSTD %fr27,-48(%sp)
|
||||
|
||||
; Shutdown code, stage 1-1/2.
|
||||
|
||||
ADD,DC %r29,%r4,%r4 ; Cycle 1
|
||||
|
||||
LDO SIXTEEN(%r23),%r23 ; Cycle 2
|
||||
ADD,DC %r0,%r20,%r20
|
||||
FSTD %fr26,-88(%sp)
|
||||
|
||||
LDD UN_EIGHT(%r23),%r21 ; Cycle 3
|
||||
ADD %r3,%r1,%r1
|
||||
|
||||
ADD,DC %r21,%r4,%r28 ; Cycle 4
|
||||
STD %r28,UN_EIGHT(%r23) ; moved from cycle 9
|
||||
|
||||
ADD,DC %r20,%r31,%r22 ; Cycle 5
|
||||
STD %r1,UN_SIXTEEN(%r23)
|
||||
$JOIN5
|
||||
LDD -96(%sp),%r3 ; moved from cycle 4
|
||||
LDD -80(%sp),%r21
|
||||
ADD %r21,%r3,%r3 ; Cycle 6
|
||||
ADD,DC %r0,%r0,%r19 ; Cycle 7
|
||||
LDD -88(%sp),%r4
|
||||
SHRPD %r3,%r0,32,%r21
|
||||
LDD -48(%sp),%r1
|
||||
SHRPD %r19,%r3,32,%r3 ; Cycle 8
|
||||
ADD %r21,%r1,%r1 ; Cycle 9
|
||||
ADD,DC %r3,%r4,%r4 ; Cycle 10
|
||||
LDD 0(%r23),%r3 ; Cycle 11
|
||||
ADD %r22,%r1,%r1 ; Cycle 13
|
||||
|
||||
; Shutdown code, stage 2-1/2.
|
||||
|
||||
ADD,DC %r0,%r4,%r4 ; Cycle 1
|
||||
LDO SIXTEEN(%r23),%r23 ; Cycle 2
|
||||
LDD UN_EIGHT(%r23),%r21 ; Cycle 3
|
||||
ADD %r3,%r1,%r1
|
||||
STD %r1,UN_SIXTEEN(%r23)
|
||||
ADD,DC %r21,%r4,%r1
|
||||
B $JOIN1
|
||||
LDO EIGHT(%r23),%r23
|
||||
|
||||
; exit
|
||||
|
||||
$L0
|
||||
.LEAVE
|
||||
|
||||
; We have verified that the above command generates what we want:
|
||||
; LDW -124(%sp),%r4
|
||||
; BVE (%r2)
|
||||
; LDW,MB -128(%sp),%r3
|
||||
|
||||
.PROCEND
|
||||
|
||||
; ***************************************************************
|
||||
;
|
||||
; add_diag_[little/big]
|
||||
;
|
||||
; ***************************************************************
|
||||
|
||||
; The arguments are as follows:
|
||||
; r2 return PC, of course
|
||||
; r26 = arg1 = length
|
||||
; r25 = arg2 = vector to square
|
||||
; r24 = arg3 = result vector
|
||||
|
||||
#ifdef LITTLE_WORDIAN
|
||||
add_diag_little
|
||||
#else
|
||||
add_diag_big
|
||||
#endif
|
||||
.PROC
|
||||
.CALLINFO FRAME=120,ENTRY_GR=%r4
|
||||
.ENTER
|
||||
|
||||
ADDIB,< -1,%r26,$Z0 ; If N=0, exit immediately.
|
||||
NOP
|
||||
|
||||
; Startup code
|
||||
|
||||
FLDD 0(%r25),%fr7 ; Cycle 2 (alternate body)
|
||||
XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4
|
||||
XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5
|
||||
XMPYU %fr7L,%fr7L,%fr30
|
||||
LDO SIXTEEN(%r25),%r25 ; Cycle 6
|
||||
FSTD %fr29,-88(%sp)
|
||||
FSTD %fr27,-72(%sp) ; Cycle 7
|
||||
CMPIB,= 0,%r26,$DIAG_N_IS_ONE ; Cycle 1 (main body)
|
||||
FSTD %fr30,-96(%sp)
|
||||
FLDD UN_EIGHT(%r25),%fr7 ; Cycle 2
|
||||
LDD -88(%sp),%r22 ; Cycle 3
|
||||
LDD -72(%sp),%r31 ; Cycle 4
|
||||
XMPYU %fr7R,%fr7R,%fr28
|
||||
XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5
|
||||
XMPYU %fr7L,%fr7L,%fr31
|
||||
LDD -96(%sp),%r20 ; Cycle 6
|
||||
FSTD %fr28,-80(%sp)
|
||||
ADD %r0,%r0,%r0 ; clear the carry bit
|
||||
ADDIB,<= -2,%r26,$ENDDIAGLOOP ; Cycle 7
|
||||
FSTD %fr24,-64(%sp)
|
||||
|
||||
; Here is the loop. It is unrolled twice, modelled after the "alternate body" and then the "main body".
|
||||
|
||||
$DIAGLOOP
|
||||
SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body)
|
||||
LDO SIXTEEN(%r25),%r25
|
||||
LDD 0(%r24),%r1
|
||||
FSTD %fr31,-104(%sp)
|
||||
SHRPD %r0,%r31,31,%r4 ; Cycle 2
|
||||
ADD,DC %r22,%r3,%r3
|
||||
FLDD UN_SIXTEEN(%r25),%fr7
|
||||
ADD,DC %r0,%r20,%r20 ; Cycle 3
|
||||
ADD %r1,%r3,%r3
|
||||
XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4
|
||||
LDD -80(%sp),%r21
|
||||
STD %r3,0(%r24)
|
||||
XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5
|
||||
XMPYU %fr7L,%fr7L,%fr30
|
||||
LDD -64(%sp),%r29
|
||||
LDD EIGHT(%r24),%r1
|
||||
ADD,DC %r4,%r20,%r20 ; Cycle 6
|
||||
LDD -104(%sp),%r19
|
||||
FSTD %fr29,-88(%sp)
|
||||
ADD %r20,%r1,%r1 ; Cycle 7
|
||||
FSTD %fr27,-72(%sp)
|
||||
SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
|
||||
LDO THIRTY_TWO(%r24),%r24
|
||||
LDD UN_SIXTEEN(%r24),%r28
|
||||
FSTD %fr30,-96(%sp)
|
||||
SHRPD %r0,%r29,31,%r3 ; Cycle 2
|
||||
ADD,DC %r21,%r4,%r4
|
||||
FLDD UN_EIGHT(%r25),%fr7
|
||||
STD %r1,UN_TWENTY_FOUR(%r24)
|
||||
ADD,DC %r0,%r19,%r19 ; Cycle 3
|
||||
ADD %r28,%r4,%r4
|
||||
XMPYU %fr7R,%fr7R,%fr28 ; Cycle 4
|
||||
LDD -88(%sp),%r22
|
||||
STD %r4,UN_SIXTEEN(%r24)
|
||||
XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5
|
||||
XMPYU %fr7L,%fr7L,%fr31
|
||||
LDD -72(%sp),%r31
|
||||
LDD UN_EIGHT(%r24),%r28
|
||||
ADD,DC %r3,%r19,%r19 ; Cycle 6
|
||||
LDD -96(%sp),%r20
|
||||
FSTD %fr28,-80(%sp)
|
||||
ADD %r19,%r28,%r28 ; Cycle 7
|
||||
FSTD %fr24,-64(%sp)
|
||||
ADDIB,> -2,%r26,$DIAGLOOP ; Cycle 8
|
||||
STD %r28,UN_EIGHT(%r24)
|
||||
|
||||
$ENDDIAGLOOP
|
||||
|
||||
ADD,DC %r0,%r22,%r22
|
||||
CMPIB,= 0,%r26,$ONEMOREDIAG
|
||||
SHRPD %r31,%r0,31,%r3
|
||||
|
||||
; Shutdown code, first stage.
|
||||
|
||||
FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body)
|
||||
LDD 0(%r24),%r28
|
||||
SHRPD %r0,%r31,31,%r4 ; Cycle 2
|
||||
ADD %r3,%r22,%r3
|
||||
ADD,DC %r0,%r20,%r20 ; Cycle 3
|
||||
LDD -80(%sp),%r21
|
||||
ADD %r3,%r28,%r3
|
||||
LDD -64(%sp),%r29 ; Cycle 4
|
||||
STD %r3,0(%r24)
|
||||
LDD EIGHT(%r24),%r1 ; Cycle 5
|
||||
LDO SIXTEEN(%r25),%r25 ; Cycle 6
|
||||
LDD -104(%sp),%r19
|
||||
ADD,DC %r4,%r20,%r20
|
||||
ADD %r20,%r1,%r1 ; Cycle 7
|
||||
ADD,DC %r0,%r21,%r21 ; Cycle 8
|
||||
STD %r1,EIGHT(%r24)
|
||||
|
||||
; Shutdown code, second stage.
|
||||
|
||||
SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
|
||||
LDO THIRTY_TWO(%r24),%r24
|
||||
LDD UN_SIXTEEN(%r24),%r1
|
||||
SHRPD %r0,%r29,31,%r3 ; Cycle 2
|
||||
ADD %r4,%r21,%r4
|
||||
ADD,DC %r0,%r19,%r19 ; Cycle 3
|
||||
ADD %r4,%r1,%r4
|
||||
STD %r4,UN_SIXTEEN(%r24); Cycle 4
|
||||
LDD UN_EIGHT(%r24),%r28 ; Cycle 5
|
||||
ADD,DC %r3,%r19,%r19 ; Cycle 6
|
||||
ADD %r19,%r28,%r28 ; Cycle 7
|
||||
ADD,DC %r0,%r0,%r22 ; Cycle 8
|
||||
CMPIB,*= 0,%r22,$Z0 ; if no overflow, exit
|
||||
STD %r28,UN_EIGHT(%r24)
|
||||
|
||||
; Final carry propagation
|
||||
|
||||
$FDIAG2
|
||||
LDO EIGHT(%r24),%r24
|
||||
LDD UN_EIGHT(%r24),%r26
|
||||
ADDI 1,%r26,%r26
|
||||
CMPIB,*= 0,%r26,$FDIAG2 ; Keep looping if there is a carry.
|
||||
STD %r26,UN_EIGHT(%r24)
|
||||
|
||||
B $Z0
|
||||
NOP
|
||||
|
||||
; Here is the code that handles the difficult case N=1.
|
||||
; We do the usual trick -- branch out of the startup code at appropriate
|
||||
; points, and branch into the shutdown code.
|
||||
|
||||
$DIAG_N_IS_ONE
|
||||
|
||||
LDD -88(%sp),%r22
|
||||
LDD -72(%sp),%r31
|
||||
B $JOINDIAG
|
||||
LDD -96(%sp),%r20
|
||||
|
||||
; We came out of the unrolled loop with wrong parity. Do one more
|
||||
; single cycle. This is the "alternate body". It will, of course,
|
||||
; give us opposite registers from the other case, so we need
|
||||
; completely different shutdown code.
|
||||
|
||||
$ONEMOREDIAG
|
||||
FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body)
|
||||
LDD 0(%r24),%r28
|
||||
FLDD 0(%r25),%fr7 ; Cycle 2
|
||||
SHRPD %r0,%r31,31,%r4
|
||||
ADD %r3,%r22,%r3
|
||||
ADD,DC %r0,%r20,%r20 ; Cycle 3
|
||||
LDD -80(%sp),%r21
|
||||
ADD %r3,%r28,%r3
|
||||
LDD -64(%sp),%r29 ; Cycle 4
|
||||
STD %r3,0(%r24)
|
||||
XMPYU %fr7R,%fr7R,%fr29
|
||||
LDD EIGHT(%r24),%r1 ; Cycle 5
|
||||
XMPYU %fr7L,%fr7R,%fr27
|
||||
XMPYU %fr7L,%fr7L,%fr30
|
||||
LDD -104(%sp),%r19 ; Cycle 6
|
||||
FSTD %fr29,-88(%sp)
|
||||
ADD,DC %r4,%r20,%r20
|
||||
FSTD %fr27,-72(%sp) ; Cycle 7
|
||||
ADD %r20,%r1,%r1
|
||||
ADD,DC %r0,%r21,%r21 ; Cycle 8
|
||||
STD %r1,EIGHT(%r24)
|
||||
|
||||
; Shutdown code, first stage.
|
||||
|
||||
SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
|
||||
LDO THIRTY_TWO(%r24),%r24
|
||||
FSTD %fr30,-96(%sp)
|
||||
LDD UN_SIXTEEN(%r24),%r1
|
||||
SHRPD %r0,%r29,31,%r3 ; Cycle 2
|
||||
ADD %r4,%r21,%r4
|
||||
ADD,DC %r0,%r19,%r19 ; Cycle 3
|
||||
LDD -88(%sp),%r22
|
||||
ADD %r4,%r1,%r4
|
||||
LDD -72(%sp),%r31 ; Cycle 4
|
||||
STD %r4,UN_SIXTEEN(%r24)
|
||||
LDD UN_EIGHT(%r24),%r28 ; Cycle 5
|
||||
LDD -96(%sp),%r20 ; Cycle 6
|
||||
ADD,DC %r3,%r19,%r19
|
||||
ADD %r19,%r28,%r28 ; Cycle 7
|
||||
ADD,DC %r0,%r22,%r22 ; Cycle 8
|
||||
STD %r28,UN_EIGHT(%r24)
|
||||
|
||||
; Shutdown code, second stage.
|
||||
|
||||
$JOINDIAG
|
||||
SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body)
|
||||
LDD 0(%r24),%r28
|
||||
SHRPD %r0,%r31,31,%r4 ; Cycle 2
|
||||
ADD %r3,%r22,%r3
|
||||
ADD,DC %r0,%r20,%r20 ; Cycle 3
|
||||
ADD %r3,%r28,%r3
|
||||
STD %r3,0(%r24) ; Cycle 4
|
||||
LDD EIGHT(%r24),%r1 ; Cycle 5
|
||||
ADD,DC %r4,%r20,%r20
|
||||
ADD %r20,%r1,%r1 ; Cycle 7
|
||||
ADD,DC %r0,%r0,%r21 ; Cycle 8
|
||||
CMPIB,*= 0,%r21,$Z0 ; if no overflow, exit
|
||||
STD %r1,EIGHT(%r24)
|
||||
|
||||
; Final carry propagation
|
||||
|
||||
$FDIAG1
|
||||
LDO EIGHT(%r24),%r24
|
||||
LDD EIGHT(%r24),%r26
|
||||
ADDI 1,%r26,%r26
|
||||
CMPIB,*= 0,%r26,$FDIAG1 ; Keep looping if there is a carry.
|
||||
STD %r26,EIGHT(%r24)
|
||||
|
||||
$Z0
|
||||
.LEAVE
|
||||
.PROCEND
|
||||
; .ALLOW
|
||||
|
||||
.SPACE $TEXT$
|
||||
.SUBSPA $CODE$
|
||||
#ifdef LITTLE_WORDIAN
|
||||
.EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
|
||||
.EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
|
||||
#else
|
||||
.EXPORT maxpy_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
|
||||
.EXPORT add_diag_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
|
||||
#endif
|
||||
.END
|
||||
|
||||
|
||||
; How to use "maxpy_PA20_little" and "maxpy_PA20_big"
|
||||
;
|
||||
; The routine "maxpy_PA20_little" or "maxpy_PA20_big"
|
||||
; performs a 64-bit x any-size multiply, and adds the
|
||||
; result to an area of memory. That is, it performs
|
||||
; something like
|
||||
;
|
||||
; A B C D
|
||||
; * Z
|
||||
; __________
|
||||
; P Q R S T
|
||||
;
|
||||
; and then adds the "PQRST" vector into an area of memory,
|
||||
; handling all carries.
|
||||
;
|
||||
; Digression on nomenclature and endian-ness:
|
||||
;
|
||||
; Each of the capital letters in the above represents a 64-bit
|
||||
; quantity. That is, you could think of the discussion as
|
||||
; being in terms of radix-16-quintillion arithmetic. The data
|
||||
; type being manipulated is "unsigned long long int". This
|
||||
; requires the 64-bit extension of the HP-UX C compiler,
|
||||
; available at release 10. You need these compiler flags to
|
||||
; enable these extensions:
|
||||
;
|
||||
; -Aa +e +DA2.0 +DS2.0
|
||||
;
|
||||
; (The first specifies ANSI C, the second enables the
|
||||
; extensions, which are beyond ANSI C, and the third and
|
||||
; fourth tell the compiler to use whatever features of the
|
||||
; PA2.0 architecture it wishes, in order to made the code more
|
||||
; efficient. Since the presence of the assembly code will
|
||||
; make the program unable to run on anything less than PA2.0,
|
||||
; you might as well gain the performance enhancements in the C
|
||||
; code as well.)
|
||||
;
|
||||
; Questions of "endian-ness" often come up, usually in the
|
||||
; context of byte ordering in a word. These routines have a
|
||||
; similar issue, that could be called "wordian-ness".
|
||||
; Independent of byte ordering (PA is always big-endian), one
|
||||
; can make two choices when representing extremely large
|
||||
; numbers as arrays of 64-bit doublewords in memory.
|
||||
;
|
||||
; "Little-wordian" layout means that the least significant
|
||||
; word of a number is stored at the lowest address.
|
||||
;
|
||||
; MSW LSW
|
||||
; | |
|
||||
; V V
|
||||
;
|
||||
; A B C D E
|
||||
;
|
||||
; ^ ^ ^
|
||||
; | | |____ address 0
|
||||
; | |
|
||||
; | |_______address 8
|
||||
; |
|
||||
; address 32
|
||||
;
|
||||
; "Big-wordian" means that the most significant word is at the
|
||||
; lowest address.
|
||||
;
|
||||
; MSW LSW
|
||||
; | |
|
||||
; V V
|
||||
;
|
||||
; A B C D E
|
||||
;
|
||||
; ^ ^ ^
|
||||
; | | |____ address 32
|
||||
; | |
|
||||
; | |_______address 24
|
||||
; |
|
||||
; address 0
|
||||
;
|
||||
; When you compile the file, you must specify one or the other, with
|
||||
; a switch "-DLITTLE_WORDIAN" or "-DBIG_WORDIAN".
|
||||
;
|
||||
; Incidentally, you assemble this file as part of your
|
||||
; project with the same C compiler as the rest of the program.
|
||||
; My "makefile" for a superprecision arithmetic package has
|
||||
; the following stuff:
|
||||
;
|
||||
; # definitions:
|
||||
; CC = cc -Aa +e -z +DA2.0 +DS2.0 +w1
|
||||
; CFLAGS = +O3
|
||||
; LDFLAGS = -L /usr/lib -Wl,-aarchive
|
||||
;
|
||||
; # general build rule for ".s" files:
|
||||
; .s.o:
|
||||
; $(CC) $(CFLAGS) -c $< -DBIG_WORDIAN
|
||||
;
|
||||
; # Now any bind step that calls for pa20.o will assemble pa20.s
|
||||
;
|
||||
; End of digression, back to arithmetic:
|
||||
;
|
||||
; The way we multiply two huge numbers is, of course, to multiply
|
||||
; the "ABCD" vector by each of the "WXYZ" doublewords, adding
|
||||
; the result vectors with increasing offsets, the way we learned
|
||||
; in school, back before we all used calculators:
|
||||
;
|
||||
; A B C D
|
||||
; * W X Y Z
|
||||
; __________
|
||||
; P Q R S T
|
||||
; E F G H I
|
||||
; M N O P Q
|
||||
; + R S T U V
|
||||
; _______________
|
||||
; F I N A L S U M
|
||||
;
|
||||
; So we call maxpy_PA20_big (in my case; my package is
|
||||
; big-wordian) repeatedly, giving the W, X, Y, and Z arguments
|
||||
; in turn as the "scalar", and giving the "ABCD" vector each
|
||||
; time. We direct it to add its result into an area of memory
|
||||
; that we have cleared at the start. We skew the exact
|
||||
; location into that area with each call.
|
||||
;
|
||||
; The prototype for the function is
|
||||
;
|
||||
; extern void maxpy_PA20_big(
|
||||
; int length, /* Number of doublewords in the multiplicand vector. */
|
||||
; const long long int *scalaraddr, /* Address to fetch the scalar. */
|
||||
; const long long int *multiplicand, /* The multiplicand vector. */
|
||||
; long long int *result); /* Where to accumulate the result. */
|
||||
;
|
||||
; (You should place a copy of this prototype in an include file
|
||||
; or in your C file.)
|
||||
;
|
||||
; Now, IN ALL CASES, the given address for the multiplicand or
|
||||
; the result is that of the LEAST SIGNIFICANT DOUBLEWORD.
|
||||
; That word is, of course, the word at which the routine
|
||||
; starts processing. "maxpy_PA20_little" then increases the
|
||||
; addresses as it computes. "maxpy_PA20_big" decreases them.
|
||||
;
|
||||
; In our example above, "length" would be 4 in each case.
|
||||
; "multiplicand" would be the "ABCD" vector. Specifically,
|
||||
; the address of the element "D". "scalaraddr" would be the
|
||||
; address of "W", "X", "Y", or "Z" on the four calls that we
|
||||
; would make. (The order doesn't matter, of course.)
|
||||
; "result" would be the appropriate address in the result
|
||||
; area. When multiplying by "Z", that would be the least
|
||||
; significant word. When multiplying by "Y", it would be the
|
||||
; next higher word (8 bytes higher if little-wordian; 8 bytes
|
||||
; lower if big-wordian), and so on. The size of the result
|
||||
; area must be the the sum of the sizes of the multiplicand
|
||||
; and multiplier vectors, and must be initialized to zero
|
||||
; before we start.
|
||||
;
|
||||
; Whenever the routine adds its partial product into the result
|
||||
; vector, it follows carry chains as far as they need to go.
|
||||
;
|
||||
; Here is the super-precision multiply routine that I use for
|
||||
; my package. The package is big-wordian. I have taken out
|
||||
; handling of exponents (it's a floating point package):
|
||||
;
|
||||
; static void mul_PA20(
|
||||
; int size,
|
||||
; const long long int *arg1,
|
||||
; const long long int *arg2,
|
||||
; long long int *result)
|
||||
; {
|
||||
; int i;
|
||||
;
|
||||
; for (i=0 ; i<2*size ; i++) result[i] = 0ULL;
|
||||
;
|
||||
; for (i=0 ; i<size ; i++) {
|
||||
; maxpy_PA20_big(size, &arg2[i], &arg1[size-1], &result[size+i]);
|
||||
; }
|
||||
; }
|
||||
@@ -1,54 +0,0 @@
|
||||
#/bin/sh
|
||||
#
|
||||
# ***** BEGIN LICENSE BLOCK *****
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is script to change the system id in an object file from PA-RISC 2.0 to 1.1.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Hewlett-Packard Company.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1999
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# wrapped by Dennis Handly on Tue Mar 23 15:23:43 1999
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ***** END LICENSE BLOCK *****
|
||||
|
||||
# script to change the system id in an object file from PA-RISC 2.0 to 1.1
|
||||
|
||||
adb -w $1 << EOF
|
||||
?m 0 -1 0
|
||||
0x0?X
|
||||
0x0?W (@0x0&~0x40000)|(~@0x0&0x40000)
|
||||
|
||||
0?"change checksum"
|
||||
0x7c?X
|
||||
0x7c?W (@0x7c&~0x40000)|(~@0x7c&0x40000)
|
||||
$q
|
||||
EOF
|
||||
|
||||
exit 0
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
/*
|
||||
* logtab.h
|
||||
*
|
||||
* Arbitrary precision integer arithmetic library
|
||||
*
|
||||
* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Michael J. Fromberger.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: logtab.h,v 1.5 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
|
||||
|
||||
const float s_logv_2[] = {
|
||||
0.000000000f, 0.000000000f, 1.000000000f, 0.630929754f, /* 0 1 2 3 */
|
||||
0.500000000f, 0.430676558f, 0.386852807f, 0.356207187f, /* 4 5 6 7 */
|
||||
0.333333333f, 0.315464877f, 0.301029996f, 0.289064826f, /* 8 9 10 11 */
|
||||
0.278942946f, 0.270238154f, 0.262649535f, 0.255958025f, /* 12 13 14 15 */
|
||||
0.250000000f, 0.244650542f, 0.239812467f, 0.235408913f, /* 16 17 18 19 */
|
||||
0.231378213f, 0.227670249f, 0.224243824f, 0.221064729f, /* 20 21 22 23 */
|
||||
0.218104292f, 0.215338279f, 0.212746054f, 0.210309918f, /* 24 25 26 27 */
|
||||
0.208014598f, 0.205846832f, 0.203795047f, 0.201849087f, /* 28 29 30 31 */
|
||||
0.200000000f, 0.198239863f, 0.196561632f, 0.194959022f, /* 32 33 34 35 */
|
||||
0.193426404f, 0.191958720f, 0.190551412f, 0.189200360f, /* 36 37 38 39 */
|
||||
0.187901825f, 0.186652411f, 0.185449023f, 0.184288833f, /* 40 41 42 43 */
|
||||
0.183169251f, 0.182087900f, 0.181042597f, 0.180031327f, /* 44 45 46 47 */
|
||||
0.179052232f, 0.178103594f, 0.177183820f, 0.176291434f, /* 48 49 50 51 */
|
||||
0.175425064f, 0.174583430f, 0.173765343f, 0.172969690f, /* 52 53 54 55 */
|
||||
0.172195434f, 0.171441601f, 0.170707280f, 0.169991616f, /* 56 57 58 59 */
|
||||
0.169293808f, 0.168613099f, 0.167948779f, 0.167300179f, /* 60 61 62 63 */
|
||||
0.166666667f
|
||||
};
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
#!/usr/linguist/bin/perl
|
||||
|
||||
#
|
||||
# make-logtab
|
||||
#
|
||||
# Generate a table of logarithms of 2 in various bases, for use in
|
||||
# estimating the output sizes of various bases.
|
||||
|
||||
# ***** BEGIN LICENSE BLOCK *****
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic
|
||||
# library.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Michael J. Fromberger <sting@linguist.dartmouth.edu>
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998, 2000
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ***** END LICENSE BLOCK *****
|
||||
|
||||
# $Id: make-logtab,v 1.4 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
|
||||
$ARRAYNAME = $ENV{'ARRAYNAME'} || "s_logv_2";
|
||||
$ARRAYTYPE = $ENV{'ARRAYTYPE'} || "float";
|
||||
|
||||
printf("const %s %s[] = {\n %0.9ff, %0.9ff, ",
|
||||
$ARRAYTYPE, $ARRAYNAME, 0, 0);
|
||||
$brk = 2;
|
||||
for($ix = 2; $ix < 64; $ix++) {
|
||||
printf("%0.9ff, ", (log(2)/log($ix)));
|
||||
$brk = ($brk + 1) & 3;
|
||||
if(!$brk) {
|
||||
printf(" /* %2d %2d %2d %2d */\n ",
|
||||
$ix - 3, $ix - 2, $ix - 1, $ix);
|
||||
}
|
||||
}
|
||||
printf("%0.9ff\n};\n\n", (log(2)/log($ix)));
|
||||
|
||||
exit 0;
|
||||
@@ -1,133 +0,0 @@
|
||||
#!/usr/linguist/bin/perl
|
||||
|
||||
#
|
||||
# make-test-arrays
|
||||
#
|
||||
# Given a test-arrays file, which specifies the test suite names, the
|
||||
# names of the functions which perform those test suites, and
|
||||
# descriptive comments, this script generates C structures for the
|
||||
# mpi-test program. The input consists of lines of the form:
|
||||
#
|
||||
# suite-name:function-name:comment
|
||||
#
|
||||
# The output is written to the standard output. Blank lines are
|
||||
# ignored, and comments beginning with '#' are stripped.
|
||||
|
||||
# ***** BEGIN LICENSE BLOCK *****
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Michael J. Fromberger <sting@linguist.dartmouth.edu>.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ***** END LICENSE BLOCK *****
|
||||
|
||||
# $Id: make-test-arrays,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
#
|
||||
|
||||
# Read parameters from the environment, if available
|
||||
$NAMEVAR = $ENV{'NAMEVAR'} || "g_names";
|
||||
$COUNTVAR = $ENV{'COUNTVAR'} || "g_count";
|
||||
$FUNCVAR = $ENV{'FUNCVAR'} || "g_tests";
|
||||
$DESCVAR = $ENV{'DESCVAR'} || "g_descs";
|
||||
$FUNCLEN = 13;
|
||||
$NAMELEN = 18;
|
||||
$DESCLEN = 45;
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
# Suck in input from the files on the command line, or standard input
|
||||
while(<>) {
|
||||
chomp;
|
||||
s/\#.*$//;
|
||||
next if /^\s*$/;
|
||||
|
||||
($suite, $func, $desc) = split(/:/, $_);
|
||||
|
||||
$tmp = { "suite" => $suite,
|
||||
"func" => $func,
|
||||
"desc" => $desc };
|
||||
|
||||
push(@item, $tmp);
|
||||
}
|
||||
$count = scalar(@item);
|
||||
$last = pop(@item);
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
# Output the table of names
|
||||
print "/* Table mapping test suite names to index numbers */\n";
|
||||
printf("const int %s = %d;\n", $COUNTVAR, $count);
|
||||
printf("const char *%s[] = {\n", $NAMEVAR);
|
||||
|
||||
foreach $elt (@item) {
|
||||
printf(" \"%s\",%s/* %s%s */\n", $elt->{"suite"},
|
||||
" " x ($NAMELEN - length($elt->{"suite"})),
|
||||
$elt->{"desc"},
|
||||
" " x ($DESCLEN - length($elt->{"desc"})));
|
||||
}
|
||||
printf(" \"%s\" %s/* %s%s */\n", $last->{"suite"},
|
||||
" " x ($NAMELEN - length($last->{"suite"})),
|
||||
$last->{"desc"},
|
||||
" " x ($DESCLEN - length($last->{"desc"})));
|
||||
print "};\n\n";
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
# Output the driver function prototypes
|
||||
print "/* Test function prototypes */\n";
|
||||
foreach $elt (@item, $last) {
|
||||
printf("int %s(void);\n", $elt->{"func"});
|
||||
}
|
||||
print "\n";
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
# Output the table of functions
|
||||
print "/* Table mapping index numbers to functions */\n";
|
||||
printf("int (*%s[])(void) = {\n ", $FUNCVAR);
|
||||
$brk = 0;
|
||||
|
||||
foreach $elt (@item) {
|
||||
print($elt->{"func"}, ", ",
|
||||
" " x ($FUNCLEN - length($elt->{"func"})));
|
||||
$brk = ($brk + 1) & 3;
|
||||
print "\n " unless($brk);
|
||||
}
|
||||
print $last->{"func"}, "\n};\n\n";
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
# Output the table of descriptions
|
||||
print "/* Table mapping index numbers to descriptions */\n";
|
||||
printf("const char *%s[] = {\n", $DESCVAR);
|
||||
|
||||
foreach $elt (@item) {
|
||||
printf(" \"%s\",\n", $elt->{"desc"});
|
||||
}
|
||||
printf(" \"%s\"\n};\n\n", $last->{"desc"});
|
||||
|
||||
exit 0;
|
||||
|
||||
@@ -1,342 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the Netscape security libraries.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Netscape Communications Corporation.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2000
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <malloc.h>
|
||||
#include <time.h>
|
||||
#include "mpi.h"
|
||||
#include "mpi-priv.h"
|
||||
|
||||
/* #define OLD_WAY 1 */
|
||||
|
||||
/* This key is the 1024-bit test key used for speed testing of RSA private
|
||||
** key ops.
|
||||
*/
|
||||
|
||||
#define CONST const
|
||||
|
||||
static CONST unsigned char default_n[128] = {
|
||||
0xc2,0xae,0x96,0x89,0xaf,0xce,0xd0,0x7b,0x3b,0x35,0xfd,0x0f,0xb1,0xf4,0x7a,0xd1,
|
||||
0x3c,0x7d,0xb5,0x86,0xf2,0x68,0x36,0xc9,0x97,0xe6,0x82,0x94,0x86,0xaa,0x05,0x39,
|
||||
0xec,0x11,0x51,0xcc,0x5c,0xa1,0x59,0xba,0x29,0x18,0xf3,0x28,0xf1,0x9d,0xe3,0xae,
|
||||
0x96,0x5d,0x6d,0x87,0x73,0xf6,0xf6,0x1f,0xd0,0x2d,0xfb,0x2f,0x7a,0x13,0x7f,0xc8,
|
||||
0x0c,0x7a,0xe9,0x85,0xfb,0xce,0x74,0x86,0xf8,0xef,0x2f,0x85,0x37,0x73,0x0f,0x62,
|
||||
0x4e,0x93,0x17,0xb7,0x7e,0x84,0x9a,0x94,0x11,0x05,0xca,0x0d,0x31,0x4b,0x2a,0xc8,
|
||||
0xdf,0xfe,0xe9,0x0c,0x13,0xc7,0xf2,0xad,0x19,0x64,0x28,0x3c,0xb5,0x6a,0xc8,0x4b,
|
||||
0x79,0xea,0x7c,0xce,0x75,0x92,0x45,0x3e,0xa3,0x9d,0x64,0x6f,0x04,0x69,0x19,0x17
|
||||
};
|
||||
|
||||
static CONST unsigned char default_d[128] = {
|
||||
0x13,0xcb,0xbc,0xf2,0xf3,0x35,0x8c,0x6d,0x7b,0x6f,0xd9,0xf3,0xa6,0x9c,0xbd,0x80,
|
||||
0x59,0x2e,0x4f,0x2f,0x11,0xa7,0x17,0x2b,0x18,0x8f,0x0f,0xe8,0x1a,0x69,0x5f,0x6e,
|
||||
0xac,0x5a,0x76,0x7e,0xd9,0x4c,0x6e,0xdb,0x47,0x22,0x8a,0x57,0x37,0x7a,0x5e,0x94,
|
||||
0x7a,0x25,0xb5,0xe5,0x78,0x1d,0x3c,0x99,0xaf,0x89,0x7d,0x69,0x2e,0x78,0x9d,0x1d,
|
||||
0x84,0xc8,0xc1,0xd7,0x1a,0xb2,0x6d,0x2d,0x8a,0xd9,0xab,0x6b,0xce,0xae,0xb0,0xa0,
|
||||
0x58,0x55,0xad,0x5c,0x40,0x8a,0xd6,0x96,0x08,0x8a,0xe8,0x63,0xe6,0x3d,0x6c,0x20,
|
||||
0x49,0xc7,0xaf,0x0f,0x25,0x73,0xd3,0x69,0x43,0x3b,0xf2,0x32,0xf8,0x3d,0x5e,0xee,
|
||||
0x7a,0xca,0xd6,0x94,0x55,0xe5,0xbd,0x25,0x34,0x8d,0x63,0x40,0xb5,0x8a,0xc3,0x01
|
||||
};
|
||||
|
||||
|
||||
#define DEFAULT_ITERS 50
|
||||
|
||||
typedef clock_t timetype;
|
||||
#define gettime(x) *(x) = clock()
|
||||
#define subtime(a, b) a -= b
|
||||
#define msec(x) ((clock_t)((double)x * 1000.0 / CLOCKS_PER_SEC))
|
||||
#define sec(x) (x / CLOCKS_PER_SEC)
|
||||
|
||||
struct TimingContextStr {
|
||||
timetype start;
|
||||
timetype end;
|
||||
timetype interval;
|
||||
|
||||
int minutes;
|
||||
int seconds;
|
||||
int millisecs;
|
||||
};
|
||||
|
||||
typedef struct TimingContextStr TimingContext;
|
||||
|
||||
TimingContext *CreateTimingContext(void)
|
||||
{
|
||||
return (TimingContext *)malloc(sizeof(TimingContext));
|
||||
}
|
||||
|
||||
void DestroyTimingContext(TimingContext *ctx)
|
||||
{
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
void TimingBegin(TimingContext *ctx)
|
||||
{
|
||||
gettime(&ctx->start);
|
||||
}
|
||||
|
||||
static void timingUpdate(TimingContext *ctx)
|
||||
{
|
||||
|
||||
ctx->millisecs = msec(ctx->interval) % 1000;
|
||||
ctx->seconds = sec(ctx->interval);
|
||||
ctx->minutes = ctx->seconds / 60;
|
||||
ctx->seconds %= 60;
|
||||
|
||||
}
|
||||
|
||||
void TimingEnd(TimingContext *ctx)
|
||||
{
|
||||
gettime(&ctx->end);
|
||||
ctx->interval = ctx->end;
|
||||
subtime(ctx->interval, ctx->start);
|
||||
timingUpdate(ctx);
|
||||
}
|
||||
|
||||
char *TimingGenerateString(TimingContext *ctx)
|
||||
{
|
||||
static char sBuf[4096];
|
||||
|
||||
sprintf(sBuf, "%d minutes, %d.%03d seconds", ctx->minutes,
|
||||
ctx->seconds, ctx->millisecs);
|
||||
return sBuf;
|
||||
}
|
||||
|
||||
static void
|
||||
dumpBytes( unsigned char * b, int l)
|
||||
{
|
||||
int i;
|
||||
if (l <= 0)
|
||||
return;
|
||||
for (i = 0; i < l; ++i) {
|
||||
if (i % 16 == 0)
|
||||
printf("\t");
|
||||
printf(" %02x", b[i]);
|
||||
if (i % 16 == 15)
|
||||
printf("\n");
|
||||
}
|
||||
if ((i % 16) != 0)
|
||||
printf("\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static mp_err
|
||||
testNewFuncs(const unsigned char * modulusBytes, int modulus_len)
|
||||
{
|
||||
mp_err mperr = MP_OKAY;
|
||||
mp_int modulus;
|
||||
unsigned char buf[512];
|
||||
|
||||
mperr = mp_init(&modulus);
|
||||
mperr = mp_read_unsigned_octets(&modulus, modulusBytes, modulus_len );
|
||||
mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len);
|
||||
mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len+1);
|
||||
mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len+4);
|
||||
mperr = mp_to_unsigned_octets(&modulus, buf, modulus_len);
|
||||
mperr = mp_to_signed_octets(&modulus, buf, modulus_len + 1);
|
||||
mp_clear(&modulus);
|
||||
return mperr;
|
||||
}
|
||||
|
||||
int
|
||||
testModExp( const unsigned char * modulusBytes,
|
||||
const unsigned int expo,
|
||||
const unsigned char * input,
|
||||
unsigned char * output,
|
||||
int modulus_len)
|
||||
{
|
||||
mp_err mperr = MP_OKAY;
|
||||
mp_int modulus;
|
||||
mp_int base;
|
||||
mp_int exponent;
|
||||
mp_int result;
|
||||
|
||||
mperr = mp_init(&modulus);
|
||||
mperr += mp_init(&base);
|
||||
mperr += mp_init(&exponent);
|
||||
mperr += mp_init(&result);
|
||||
/* we initialize all mp_ints unconditionally, even if some fail.
|
||||
** This guarantees that the DIGITS pointer is valid (even if null).
|
||||
** So, mp_clear will do the right thing below.
|
||||
*/
|
||||
if (mperr == MP_OKAY) {
|
||||
mperr = mp_read_unsigned_octets(&modulus,
|
||||
modulusBytes + (sizeof default_n - modulus_len), modulus_len );
|
||||
mperr += mp_read_unsigned_octets(&base, input, modulus_len );
|
||||
mp_set(&exponent, expo);
|
||||
if (mperr == MP_OKAY) {
|
||||
#if OLD_WAY
|
||||
mperr = s_mp_exptmod(&base, &exponent, &modulus, &result);
|
||||
#else
|
||||
mperr = mp_exptmod(&base, &exponent, &modulus, &result);
|
||||
#endif
|
||||
if (mperr == MP_OKAY) {
|
||||
mperr = mp_to_fixlen_octets(&result, output, modulus_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
mp_clear(&base);
|
||||
mp_clear(&result);
|
||||
|
||||
mp_clear(&modulus);
|
||||
mp_clear(&exponent);
|
||||
|
||||
return (int)mperr;
|
||||
}
|
||||
|
||||
int
|
||||
doModExp( const unsigned char * modulusBytes,
|
||||
const unsigned char * exponentBytes,
|
||||
const unsigned char * input,
|
||||
unsigned char * output,
|
||||
int modulus_len)
|
||||
{
|
||||
mp_err mperr = MP_OKAY;
|
||||
mp_int modulus;
|
||||
mp_int base;
|
||||
mp_int exponent;
|
||||
mp_int result;
|
||||
|
||||
mperr = mp_init(&modulus);
|
||||
mperr += mp_init(&base);
|
||||
mperr += mp_init(&exponent);
|
||||
mperr += mp_init(&result);
|
||||
/* we initialize all mp_ints unconditionally, even if some fail.
|
||||
** This guarantees that the DIGITS pointer is valid (even if null).
|
||||
** So, mp_clear will do the right thing below.
|
||||
*/
|
||||
if (mperr == MP_OKAY) {
|
||||
mperr = mp_read_unsigned_octets(&modulus,
|
||||
modulusBytes + (sizeof default_n - modulus_len), modulus_len );
|
||||
mperr += mp_read_unsigned_octets(&exponent, exponentBytes, modulus_len );
|
||||
mperr += mp_read_unsigned_octets(&base, input, modulus_len );
|
||||
if (mperr == MP_OKAY) {
|
||||
#if OLD_WAY
|
||||
mperr = s_mp_exptmod(&base, &exponent, &modulus, &result);
|
||||
#else
|
||||
mperr = mp_exptmod(&base, &exponent, &modulus, &result);
|
||||
#endif
|
||||
if (mperr == MP_OKAY) {
|
||||
mperr = mp_to_fixlen_octets(&result, output, modulus_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
mp_clear(&base);
|
||||
mp_clear(&result);
|
||||
|
||||
mp_clear(&modulus);
|
||||
mp_clear(&exponent);
|
||||
|
||||
return (int)mperr;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
TimingContext * timeCtx;
|
||||
char * progName;
|
||||
long iters = DEFAULT_ITERS;
|
||||
unsigned int modulus_len;
|
||||
int i;
|
||||
int rv;
|
||||
unsigned char buf [1024];
|
||||
unsigned char buf2[1024];
|
||||
|
||||
progName = strrchr(argv[0], '/');
|
||||
if (!progName)
|
||||
progName = strrchr(argv[0], '\\');
|
||||
progName = progName ? progName+1 : argv[0];
|
||||
|
||||
if (argc >= 2) {
|
||||
iters = atol(argv[1]);
|
||||
}
|
||||
|
||||
if (argc >= 3) {
|
||||
modulus_len = atol(argv[2]);
|
||||
} else
|
||||
modulus_len = sizeof default_n;
|
||||
|
||||
/* no library init function !? */
|
||||
|
||||
memset(buf, 0x41, sizeof buf);
|
||||
|
||||
if (iters < 2) {
|
||||
testNewFuncs( default_n, modulus_len);
|
||||
testNewFuncs( default_n+1, modulus_len - 1);
|
||||
testNewFuncs( default_n+2, modulus_len - 2);
|
||||
testNewFuncs( default_n+3, modulus_len - 3);
|
||||
|
||||
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
|
||||
rv = testModExp(default_n, 0, buf, buf2, modulus_len);
|
||||
dumpBytes((unsigned char *)buf2, modulus_len);
|
||||
|
||||
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
|
||||
rv = testModExp(default_n, 1, buf, buf2, modulus_len);
|
||||
dumpBytes((unsigned char *)buf2, modulus_len);
|
||||
|
||||
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
|
||||
rv = testModExp(default_n, 2, buf, buf2, modulus_len);
|
||||
dumpBytes((unsigned char *)buf2, modulus_len);
|
||||
|
||||
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
|
||||
rv = testModExp(default_n, 3, buf, buf2, modulus_len);
|
||||
dumpBytes((unsigned char *)buf2, modulus_len);
|
||||
}
|
||||
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
|
||||
rv = doModExp(default_n, default_d, buf, buf2, modulus_len);
|
||||
if (rv != 0) {
|
||||
fprintf(stderr, "Error in modexp operation:\n");
|
||||
exit(1);
|
||||
}
|
||||
dumpBytes((unsigned char *)buf2, modulus_len);
|
||||
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
|
||||
|
||||
timeCtx = CreateTimingContext();
|
||||
TimingBegin(timeCtx);
|
||||
i = iters;
|
||||
while (i--) {
|
||||
rv = doModExp(default_n, default_d, buf, buf2, modulus_len);
|
||||
if (rv != 0) {
|
||||
fprintf(stderr, "Error in modexp operation\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
TimingEnd(timeCtx);
|
||||
printf("%ld iterations in %s\n", iters, TimingGenerateString(timeCtx));
|
||||
printf("%lu allocations, %lu frees, %lu copies\n", mp_allocs, mp_frees, mp_copies);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,329 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is SPARC optimized Montgomery multiply functions.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Sun Microsystems Inc.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1999-2000
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Netscape Communications Corporation
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: montmulf.c,v 1.7 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
|
||||
|
||||
#ifdef SOLARIS
|
||||
#define RF_INLINE_MACROS 1
|
||||
#endif
|
||||
|
||||
static const double TwoTo16=65536.0;
|
||||
static const double TwoToMinus16=1.0/65536.0;
|
||||
static const double Zero=0.0;
|
||||
static const double TwoTo32=65536.0*65536.0;
|
||||
static const double TwoToMinus32=1.0/(65536.0*65536.0);
|
||||
|
||||
#ifdef RF_INLINE_MACROS
|
||||
|
||||
double upper32(double);
|
||||
double lower32(double, double);
|
||||
double mod(double, double, double);
|
||||
|
||||
void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
|
||||
const double * /* 2^16*/,
|
||||
const double * /* 0 */,
|
||||
double * /*result16*/,
|
||||
double * /* result32 */,
|
||||
float * /*source - should be unsigned int*
|
||||
converted to float* */);
|
||||
|
||||
#else
|
||||
#ifdef MP_USE_FLOOR
|
||||
#include <math.h>
|
||||
#else
|
||||
#define floor(d) ((double)((unsigned long long)(d)))
|
||||
#endif
|
||||
|
||||
static double upper32(double x)
|
||||
{
|
||||
return floor(x*TwoToMinus32);
|
||||
}
|
||||
|
||||
static double lower32(double x, double y)
|
||||
{
|
||||
return x-TwoTo32*floor(x*TwoToMinus32);
|
||||
}
|
||||
|
||||
static double mod(double x, double oneoverm, double m)
|
||||
{
|
||||
return x-m*floor(x*oneoverm);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
static void cleanup(double *dt, int from, int tlen)
|
||||
{
|
||||
int i;
|
||||
double tmp,tmp1,x,x1;
|
||||
|
||||
tmp=tmp1=Zero;
|
||||
/* original code **
|
||||
for(i=2*from;i<2*tlen-2;i++)
|
||||
{
|
||||
x=dt[i];
|
||||
dt[i]=lower32(x,Zero)+tmp1;
|
||||
tmp1=tmp;
|
||||
tmp=upper32(x);
|
||||
}
|
||||
dt[tlen-2]+=tmp1;
|
||||
dt[tlen-1]+=tmp;
|
||||
**end original code ***/
|
||||
/* new code ***/
|
||||
for(i=2*from;i<2*tlen;i+=2)
|
||||
{
|
||||
x=dt[i];
|
||||
x1=dt[i+1];
|
||||
dt[i]=lower32(x,Zero)+tmp;
|
||||
dt[i+1]=lower32(x1,Zero)+tmp1;
|
||||
tmp=upper32(x);
|
||||
tmp1=upper32(x1);
|
||||
}
|
||||
/** end new code **/
|
||||
}
|
||||
|
||||
|
||||
void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
|
||||
{
|
||||
int i;
|
||||
long long t, t1, a, b, c, d;
|
||||
|
||||
t1=0;
|
||||
a=(long long)d16[0];
|
||||
b=(long long)d16[1];
|
||||
for(i=0; i<ilen-1; i++)
|
||||
{
|
||||
c=(long long)d16[2*i+2];
|
||||
t1+=(unsigned int)a;
|
||||
t=(a>>32);
|
||||
d=(long long)d16[2*i+3];
|
||||
t1+=(b&0xffff)<<16;
|
||||
t+=(b>>16)+(t1>>32);
|
||||
i32[i]=(unsigned int)t1;
|
||||
t1=t;
|
||||
a=c;
|
||||
b=d;
|
||||
}
|
||||
t1+=(unsigned int)a;
|
||||
t=(a>>32);
|
||||
t1+=(b&0xffff)<<16;
|
||||
i32[i]=(unsigned int)t1;
|
||||
}
|
||||
|
||||
void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
|
||||
{
|
||||
int i;
|
||||
|
||||
#pragma pipeloop(0)
|
||||
for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
|
||||
}
|
||||
|
||||
|
||||
void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
|
||||
{
|
||||
int i;
|
||||
unsigned int a;
|
||||
|
||||
#pragma pipeloop(0)
|
||||
for(i=0;i<len;i++)
|
||||
{
|
||||
a=i32[i];
|
||||
d16[2*i]=(double)(a&0xffff);
|
||||
d16[2*i+1]=(double)(a>>16);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void conv_i32_to_d32_and_d16(double *d32, double *d16,
|
||||
unsigned int *i32, int len)
|
||||
{
|
||||
int i = 0;
|
||||
unsigned int a;
|
||||
|
||||
#pragma pipeloop(0)
|
||||
#ifdef RF_INLINE_MACROS
|
||||
for(;i<len-3;i+=4)
|
||||
{
|
||||
i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
|
||||
&(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
|
||||
}
|
||||
#endif
|
||||
for(;i<len;i++)
|
||||
{
|
||||
a=i32[i];
|
||||
d32[i]=(double)(i32[i]);
|
||||
d16[2*i]=(double)(a&0xffff);
|
||||
d16[2*i+1]=(double)(a>>16);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
|
||||
{
|
||||
long long acc;
|
||||
int i;
|
||||
|
||||
if(i32[len]>0) i=-1;
|
||||
else
|
||||
{
|
||||
for(i=len-1; i>=0; i--)
|
||||
{
|
||||
if(i32[i]!=nint[i]) break;
|
||||
}
|
||||
}
|
||||
if((i<0)||(i32[i]>nint[i]))
|
||||
{
|
||||
acc=0;
|
||||
for(i=0;i<len;i++)
|
||||
{
|
||||
acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
|
||||
i32[i]=(unsigned int)acc;
|
||||
acc=acc>>32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
** the lengths of the input arrays should be at least the following:
|
||||
** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
|
||||
** all of them should be different from one another
|
||||
**
|
||||
*/
|
||||
void mont_mulf_noconv(unsigned int *result,
|
||||
double *dm1, double *dm2, double *dt,
|
||||
double *dn, unsigned int *nint,
|
||||
int nlen, double dn0)
|
||||
{
|
||||
int i, j, jj;
|
||||
int tmp;
|
||||
double digit, m2j, nextm2j, a, b;
|
||||
double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
|
||||
|
||||
pdm1=&(dm1[0]);
|
||||
pdm2=&(dm2[0]);
|
||||
pdn=&(dn[0]);
|
||||
pdm2[2*nlen]=Zero;
|
||||
|
||||
if (nlen!=16)
|
||||
{
|
||||
for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
|
||||
|
||||
a=dt[0]=pdm1[0]*pdm2[0];
|
||||
digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
|
||||
|
||||
pdtj=&(dt[0]);
|
||||
for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
|
||||
{
|
||||
m2j=pdm2[j];
|
||||
a=pdtj[0]+pdn[0]*digit;
|
||||
b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
|
||||
pdtj[1]=b;
|
||||
|
||||
#pragma pipeloop(0)
|
||||
for(i=1;i<nlen;i++)
|
||||
{
|
||||
pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
|
||||
}
|
||||
if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
|
||||
|
||||
digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
a=dt[0]=pdm1[0]*pdm2[0];
|
||||
|
||||
dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
|
||||
dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
|
||||
dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
|
||||
dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
|
||||
dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
|
||||
dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
|
||||
dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
|
||||
dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
|
||||
dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
|
||||
dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
|
||||
dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
|
||||
|
||||
pdn_0=pdn[0];
|
||||
pdm1_0=pdm1[0];
|
||||
|
||||
digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
|
||||
pdtj=&(dt[0]);
|
||||
|
||||
for(j=0;j<32;j++,pdtj++)
|
||||
{
|
||||
|
||||
m2j=pdm2[j];
|
||||
a=pdtj[0]+pdn_0*digit;
|
||||
b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
|
||||
pdtj[1]=b;
|
||||
|
||||
/**** this loop will be fully unrolled:
|
||||
for(i=1;i<16;i++)
|
||||
{
|
||||
pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
|
||||
}
|
||||
*************************************/
|
||||
pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
|
||||
pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
|
||||
pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
|
||||
pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
|
||||
pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
|
||||
pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
|
||||
pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
|
||||
pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
|
||||
pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
|
||||
pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
|
||||
pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
|
||||
pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
|
||||
pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
|
||||
pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
|
||||
pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
|
||||
/* no need for cleenup, cannot overflow */
|
||||
digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
|
||||
}
|
||||
}
|
||||
|
||||
conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
|
||||
|
||||
adjust_montf_result(result,nint,nlen);
|
||||
|
||||
}
|
||||
|
||||
@@ -1,103 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is interface file for SPARC Montgomery multiply functions.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Sun Microsystems Inc.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1999-2000
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Netscape Communications Corporation
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: montmulf.h,v 1.4 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
|
||||
|
||||
/* The functions that are to be called from outside of the .s file have the
|
||||
* following interfaces and array size requirements:
|
||||
*/
|
||||
|
||||
|
||||
void conv_i32_to_d32(double *d32, unsigned int *i32, int len);
|
||||
|
||||
/* Converts an array of int's to an array of doubles, so that each double
|
||||
* corresponds to an int. len is the number of items converted.
|
||||
* Does not allocate the output array.
|
||||
* The pointers d32 and i32 should point to arrays of size at least len
|
||||
* (doubles and unsigned ints, respectively)
|
||||
*/
|
||||
|
||||
|
||||
void conv_i32_to_d16(double *d16, unsigned int *i32, int len);
|
||||
|
||||
/* Converts an array of int's to an array of doubles so that each element
|
||||
* of the int array is converted to a pair of doubles, the first one
|
||||
* corresponding to the lower (least significant) 16 bits of the int and
|
||||
* the second one corresponding to the upper (most significant) 16 bits of
|
||||
* the 32-bit int. len is the number of ints converted.
|
||||
* Does not allocate the output array.
|
||||
* The pointer d16 should point to an array of doubles of size at least
|
||||
* 2*len and i32 should point an array of ints of size at least len
|
||||
*/
|
||||
|
||||
|
||||
void conv_i32_to_d32_and_d16(double *d32, double *d16,
|
||||
unsigned int *i32, int len);
|
||||
|
||||
/* Does the above two conversions together, it is much faster than doing
|
||||
* both of those in succession
|
||||
*/
|
||||
|
||||
|
||||
void mont_mulf_noconv(unsigned int *result,
|
||||
double *dm1, double *dm2, double *dt,
|
||||
double *dn, unsigned int *nint,
|
||||
int nlen, double dn0);
|
||||
|
||||
/* Does the Montgomery multiplication of the numbers stored in the arrays
|
||||
* pointed to by dm1 and dm2, writing the result to the array pointed to by
|
||||
* result. It uses the array pointed to by dt as a temporary work area.
|
||||
* nint should point to the modulus in the array-of-integers representation,
|
||||
* dn should point to its array-of-doubles as obtained as a result of the
|
||||
* function call conv_i32_to_d32(dn, nint, nlen);
|
||||
* nlen is the length of the array containing the modulus.
|
||||
* The representation used for dm1 is the one that is a result of the function
|
||||
* call conv_i32_to_d32(dm1, m1, nlen), the representation for dm2 is the
|
||||
* result of the function call conv_i32_to_d16(dm2, m2, nlen).
|
||||
* Note that m1 and m2 should both be of length nlen, so they should be
|
||||
* padded with 0's if necessary before the conversion. The result comes in
|
||||
* this form (int representation, padded with 0's).
|
||||
* dn0 is the value of the 16 least significant bits of n0'.
|
||||
* The function does not allocate memory for any of the arrays, so the
|
||||
* pointers should point to arrays with the following minimal sizes:
|
||||
* result - nlen+1
|
||||
* dm1 - nlen
|
||||
* dm2 - 2*nlen+1 ( the +1 is necessary for technical reasons )
|
||||
* dt - 4*nlen+2
|
||||
* dn - nlen
|
||||
* nint - nlen
|
||||
* No two arrays should point to overlapping areas of memory.
|
||||
*/
|
||||
@@ -1,141 +0,0 @@
|
||||
!
|
||||
! ***** BEGIN LICENSE BLOCK *****
|
||||
! Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
!
|
||||
! The contents of this file are subject to the Mozilla Public License Version
|
||||
! 1.1 (the "License"); you may not use this file except in compliance with
|
||||
! the License. You may obtain a copy of the License at
|
||||
! http://www.mozilla.org/MPL/
|
||||
!
|
||||
! Software distributed under the License is distributed on an "AS IS" basis,
|
||||
! WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
! for the specific language governing rights and limitations under the
|
||||
! License.
|
||||
!
|
||||
! The Original Code is inline macros for SPARC Montgomery multiply functions.
|
||||
!
|
||||
! The Initial Developer of the Original Code is
|
||||
! Sun Microsystems Inc.
|
||||
! Portions created by the Initial Developer are Copyright (C) 1999-2000
|
||||
! the Initial Developer. All Rights Reserved.
|
||||
!
|
||||
! Contributor(s):
|
||||
!
|
||||
! Alternatively, the contents of this file may be used under the terms of
|
||||
! either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
! the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
! in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
! of those above. If you wish to allow use of your version of this file only
|
||||
! under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
! use your version of this file under the terms of the MPL, indicate your
|
||||
! decision by deleting the provisions above and replace them with the notice
|
||||
! and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
! the provisions above, a recipient may use your version of this file under
|
||||
! the terms of any one of the MPL, the GPL or the LGPL.
|
||||
!
|
||||
! ***** END LICENSE BLOCK *****
|
||||
! $Id: montmulf.il,v 1.4 2004-04-27 23:04:36 gerv%gerv.net Exp $
|
||||
|
||||
!
|
||||
! double upper32(double /*frs1*/);
|
||||
!
|
||||
.inline upper32,8
|
||||
std %o0,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f10
|
||||
|
||||
fdtox %f10,%f10
|
||||
fitod %f10,%f0
|
||||
.end
|
||||
|
||||
!
|
||||
! double lower32(double /*frs1*/, double /* Zero */);
|
||||
!
|
||||
.inline lower32,8
|
||||
std %o0,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f10
|
||||
std %o2,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f12
|
||||
|
||||
fdtox %f10,%f10
|
||||
fmovs %f12,%f10
|
||||
fxtod %f10,%f0
|
||||
.end
|
||||
|
||||
!
|
||||
! double mod(double /*x*/, double /*1/m*/, double /*m*/);
|
||||
!
|
||||
.inline mod,12
|
||||
std %o0,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f2
|
||||
std %o2,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f4
|
||||
std %o4,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f6
|
||||
|
||||
fmuld %f2,%f4,%f4
|
||||
fdtox %f4,%f4
|
||||
fxtod %f4,%f4
|
||||
fmuld %f4,%f6,%f4
|
||||
fsubd %f2,%f4,%f0
|
||||
.end
|
||||
|
||||
|
||||
!
|
||||
! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
|
||||
! double * /* 0 */,
|
||||
! double * /*result16*/, double * /* result32 */
|
||||
! float * /*source - should be unsigned int*
|
||||
! converted to float* */);
|
||||
!
|
||||
.inline i16_to_d16_and_d32x4,24
|
||||
ldd [%o0],%f2 ! 1/(2^16)
|
||||
ldd [%o1],%f4 ! 2^16
|
||||
ldd [%o2],%f22
|
||||
|
||||
fmovd %f22,%f6
|
||||
ld [%o5],%f7
|
||||
fmovd %f22,%f10
|
||||
ld [%o5+4],%f11
|
||||
fmovd %f22,%f14
|
||||
ld [%o5+8],%f15
|
||||
fmovd %f22,%f18
|
||||
ld [%o5+12],%f19
|
||||
fxtod %f6,%f6
|
||||
std %f6,[%o4]
|
||||
fxtod %f10,%f10
|
||||
std %f10,[%o4+8]
|
||||
fxtod %f14,%f14
|
||||
std %f14,[%o4+16]
|
||||
fxtod %f18,%f18
|
||||
std %f18,[%o4+24]
|
||||
fmuld %f2,%f6,%f8
|
||||
fmuld %f2,%f10,%f12
|
||||
fmuld %f2,%f14,%f16
|
||||
fmuld %f2,%f18,%f20
|
||||
fdtox %f8,%f8
|
||||
fdtox %f12,%f12
|
||||
fdtox %f16,%f16
|
||||
fdtox %f20,%f20
|
||||
fxtod %f8,%f8
|
||||
std %f8,[%o3+8]
|
||||
fxtod %f12,%f12
|
||||
std %f12,[%o3+24]
|
||||
fxtod %f16,%f16
|
||||
std %f16,[%o3+40]
|
||||
fxtod %f20,%f20
|
||||
std %f20,[%o3+56]
|
||||
fmuld %f8,%f4,%f8
|
||||
fmuld %f12,%f4,%f12
|
||||
fmuld %f16,%f4,%f16
|
||||
fmuld %f20,%f4,%f20
|
||||
fsubd %f6,%f8,%f8
|
||||
std %f8,[%o3]
|
||||
fsubd %f10,%f12,%f12
|
||||
std %f12,[%o3+16]
|
||||
fsubd %f14,%f16,%f16
|
||||
std %f16,[%o3+32]
|
||||
fsubd %f18,%f20,%f20
|
||||
std %f20,[%o3+48]
|
||||
.end
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,141 +0,0 @@
|
||||
!
|
||||
! ***** BEGIN LICENSE BLOCK *****
|
||||
! Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
!
|
||||
! The contents of this file are subject to the Mozilla Public License Version
|
||||
! 1.1 (the "License"); you may not use this file except in compliance with
|
||||
! the License. You may obtain a copy of the License at
|
||||
! http://www.mozilla.org/MPL/
|
||||
!
|
||||
! Software distributed under the License is distributed on an "AS IS" basis,
|
||||
! WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
! for the specific language governing rights and limitations under the
|
||||
! License.
|
||||
!
|
||||
! The Original Code is inline macros for SPARC Montgomery multiply functions.
|
||||
!
|
||||
! The Initial Developer of the Original Code is
|
||||
! Sun Microsystems Inc.
|
||||
! Portions created by the Initial Developer are Copyright (C) 1999-2000
|
||||
! the Initial Developer. All Rights Reserved.
|
||||
!
|
||||
! Contributor(s):
|
||||
!
|
||||
! Alternatively, the contents of this file may be used under the terms of
|
||||
! either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
! the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
! in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
! of those above. If you wish to allow use of your version of this file only
|
||||
! under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
! use your version of this file under the terms of the MPL, indicate your
|
||||
! decision by deleting the provisions above and replace them with the notice
|
||||
! and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
! the provisions above, a recipient may use your version of this file under
|
||||
! the terms of any one of the MPL, the GPL or the LGPL.
|
||||
!
|
||||
! ***** END LICENSE BLOCK *****
|
||||
! $Id: montmulfv8.il,v 1.3 2004-04-27 23:04:36 gerv%gerv.net Exp $
|
||||
|
||||
!
|
||||
! double upper32(double /*frs1*/);
|
||||
!
|
||||
.inline upper32,8
|
||||
std %o0,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f10
|
||||
|
||||
fdtox %f10,%f10
|
||||
fitod %f10,%f0
|
||||
.end
|
||||
|
||||
!
|
||||
! double lower32(double /*frs1*/, double /* Zero */);
|
||||
!
|
||||
.inline lower32,8
|
||||
std %o0,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f10
|
||||
std %o2,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f12
|
||||
|
||||
fdtox %f10,%f10
|
||||
fmovs %f12,%f10
|
||||
fxtod %f10,%f0
|
||||
.end
|
||||
|
||||
!
|
||||
! double mod(double /*x*/, double /*1/m*/, double /*m*/);
|
||||
!
|
||||
.inline mod,12
|
||||
std %o0,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f2
|
||||
std %o2,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f4
|
||||
std %o4,[%sp+0x48]
|
||||
ldd [%sp+0x48],%f6
|
||||
|
||||
fmuld %f2,%f4,%f4
|
||||
fdtox %f4,%f4
|
||||
fxtod %f4,%f4
|
||||
fmuld %f4,%f6,%f4
|
||||
fsubd %f2,%f4,%f0
|
||||
.end
|
||||
|
||||
|
||||
!
|
||||
! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
|
||||
! double * /* 0 */,
|
||||
! double * /*result16*/, double * /* result32 */
|
||||
! float * /*source - should be unsigned int*
|
||||
! converted to float* */);
|
||||
!
|
||||
.inline i16_to_d16_and_d32x4,24
|
||||
ldd [%o0],%f2 ! 1/(2^16)
|
||||
ldd [%o1],%f4 ! 2^16
|
||||
ldd [%o2],%f22
|
||||
|
||||
fmovd %f22,%f6
|
||||
ld [%o5],%f7
|
||||
fmovd %f22,%f10
|
||||
ld [%o5+4],%f11
|
||||
fmovd %f22,%f14
|
||||
ld [%o5+8],%f15
|
||||
fmovd %f22,%f18
|
||||
ld [%o5+12],%f19
|
||||
fxtod %f6,%f6
|
||||
std %f6,[%o4]
|
||||
fxtod %f10,%f10
|
||||
std %f10,[%o4+8]
|
||||
fxtod %f14,%f14
|
||||
std %f14,[%o4+16]
|
||||
fxtod %f18,%f18
|
||||
std %f18,[%o4+24]
|
||||
fmuld %f2,%f6,%f8
|
||||
fmuld %f2,%f10,%f12
|
||||
fmuld %f2,%f14,%f16
|
||||
fmuld %f2,%f18,%f20
|
||||
fdtox %f8,%f8
|
||||
fdtox %f12,%f12
|
||||
fdtox %f16,%f16
|
||||
fdtox %f20,%f20
|
||||
fxtod %f8,%f8
|
||||
std %f8,[%o3+8]
|
||||
fxtod %f12,%f12
|
||||
std %f12,[%o3+24]
|
||||
fxtod %f16,%f16
|
||||
std %f16,[%o3+40]
|
||||
fxtod %f20,%f20
|
||||
std %f20,[%o3+56]
|
||||
fmuld %f8,%f4,%f8
|
||||
fmuld %f12,%f4,%f12
|
||||
fmuld %f16,%f4,%f16
|
||||
fmuld %f20,%f4,%f20
|
||||
fsubd %f6,%f8,%f8
|
||||
std %f8,[%o3]
|
||||
fsubd %f10,%f12,%f12
|
||||
std %f12,[%o3+16]
|
||||
fsubd %f14,%f16,%f16
|
||||
std %f16,[%o3+32]
|
||||
fsubd %f18,%f20,%f20
|
||||
std %f20,[%o3+48]
|
||||
.end
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,126 +0,0 @@
|
||||
!
|
||||
! ***** BEGIN LICENSE BLOCK *****
|
||||
! Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
!
|
||||
! The contents of this file are subject to the Mozilla Public License Version
|
||||
! 1.1 (the "License"); you may not use this file except in compliance with
|
||||
! the License. You may obtain a copy of the License at
|
||||
! http://www.mozilla.org/MPL/
|
||||
!
|
||||
! Software distributed under the License is distributed on an "AS IS" basis,
|
||||
! WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
! for the specific language governing rights and limitations under the
|
||||
! License.
|
||||
!
|
||||
! The Original Code is inline macros for SPARC Montgomery multiply functions.
|
||||
!
|
||||
! The Initial Developer of the Original Code is
|
||||
! Sun Microsystems Inc.
|
||||
! Portions created by the Initial Developer are Copyright (C) 1999-2000
|
||||
! the Initial Developer. All Rights Reserved.
|
||||
!
|
||||
! Contributor(s):
|
||||
!
|
||||
! Alternatively, the contents of this file may be used under the terms of
|
||||
! either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
! the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
! in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
! of those above. If you wish to allow use of your version of this file only
|
||||
! under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
! use your version of this file under the terms of the MPL, indicate your
|
||||
! decision by deleting the provisions above and replace them with the notice
|
||||
! and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
! the provisions above, a recipient may use your version of this file under
|
||||
! the terms of any one of the MPL, the GPL or the LGPL.
|
||||
!
|
||||
! ***** END LICENSE BLOCK *****
|
||||
! $Id: montmulfv9.il,v 1.3 2004-04-27 23:04:36 gerv%gerv.net Exp $
|
||||
|
||||
!
|
||||
! double upper32(double /*frs1*/);
|
||||
!
|
||||
.inline upper32,8
|
||||
fdtox %f0,%f10
|
||||
fitod %f10,%f0
|
||||
.end
|
||||
|
||||
!
|
||||
! double lower32(double /*frs1*/, double /* Zero */);
|
||||
!
|
||||
.inline lower32,8
|
||||
fdtox %f0,%f10
|
||||
fmovs %f2,%f10
|
||||
fxtod %f10,%f0
|
||||
.end
|
||||
|
||||
!
|
||||
! double mod(double /*x*/, double /*1/m*/, double /*m*/);
|
||||
!
|
||||
.inline mod,12
|
||||
fmuld %f0,%f2,%f2
|
||||
fdtox %f2,%f2
|
||||
fxtod %f2,%f2
|
||||
fmuld %f2,%f4,%f2
|
||||
fsubd %f0,%f2,%f0
|
||||
.end
|
||||
|
||||
|
||||
!
|
||||
! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
|
||||
! double * /* 0 */,
|
||||
! double * /*result16*/, double * /* result32 */
|
||||
! float * /*source - should be unsigned int*
|
||||
! converted to float* */);
|
||||
!
|
||||
.inline i16_to_d16_and_d32x4,24
|
||||
ldd [%o0],%f2 ! 1/(2^16)
|
||||
ldd [%o1],%f4 ! 2^16
|
||||
ldd [%o2],%f22
|
||||
|
||||
fmovd %f22,%f6
|
||||
ld [%o5],%f7
|
||||
fmovd %f22,%f10
|
||||
ld [%o5+4],%f11
|
||||
fmovd %f22,%f14
|
||||
ld [%o5+8],%f15
|
||||
fmovd %f22,%f18
|
||||
ld [%o5+12],%f19
|
||||
fxtod %f6,%f6
|
||||
std %f6,[%o4]
|
||||
fxtod %f10,%f10
|
||||
std %f10,[%o4+8]
|
||||
fxtod %f14,%f14
|
||||
std %f14,[%o4+16]
|
||||
fxtod %f18,%f18
|
||||
std %f18,[%o4+24]
|
||||
fmuld %f2,%f6,%f8
|
||||
fmuld %f2,%f10,%f12
|
||||
fmuld %f2,%f14,%f16
|
||||
fmuld %f2,%f18,%f20
|
||||
fdtox %f8,%f8
|
||||
fdtox %f12,%f12
|
||||
fdtox %f16,%f16
|
||||
fdtox %f20,%f20
|
||||
fxtod %f8,%f8
|
||||
std %f8,[%o3+8]
|
||||
fxtod %f12,%f12
|
||||
std %f12,[%o3+24]
|
||||
fxtod %f16,%f16
|
||||
std %f16,[%o3+40]
|
||||
fxtod %f20,%f20
|
||||
std %f20,[%o3+56]
|
||||
fmuld %f8,%f4,%f8
|
||||
fmuld %f12,%f4,%f12
|
||||
fmuld %f16,%f4,%f16
|
||||
fmuld %f20,%f4,%f20
|
||||
fsubd %f6,%f8,%f8
|
||||
std %f8,[%o3]
|
||||
fsubd %f10,%f12,%f12
|
||||
std %f12,[%o3+16]
|
||||
fsubd %f14,%f16,%f16
|
||||
std %f16,[%o3+32]
|
||||
fsubd %f18,%f20,%f20
|
||||
std %f20,[%o3+48]
|
||||
.end
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,102 +0,0 @@
|
||||
/*
|
||||
* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the Multi-precision Binary Polynomial Arithmetic Library.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Sun Microsystems, Inc.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2003
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Sheueling Chang Shantz <sheueling.chang@sun.com> and
|
||||
* Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef _MP_GF2M_PRIV_H_
|
||||
#define _MP_GF2M_PRIV_H_
|
||||
|
||||
#include "mpi-priv.h"
|
||||
|
||||
extern const mp_digit mp_gf2m_sqr_tb[16];
|
||||
|
||||
#if defined(MP_USE_UINT_DIGIT)
|
||||
#define MP_DIGIT_BITS 32
|
||||
#else
|
||||
#define MP_DIGIT_BITS 64
|
||||
#endif
|
||||
|
||||
/* Platform-specific macros for fast binary polynomial squaring. */
|
||||
#if MP_DIGIT_BITS == 32
|
||||
#define gf2m_SQR1(w) \
|
||||
mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 16 | \
|
||||
mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF]
|
||||
#define gf2m_SQR0(w) \
|
||||
mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \
|
||||
mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) & 0xF]
|
||||
#else
|
||||
#define gf2m_SQR1(w) \
|
||||
mp_gf2m_sqr_tb[(w) >> 60 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 56 & 0xF] << 48 | \
|
||||
mp_gf2m_sqr_tb[(w) >> 52 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 48 & 0xF] << 32 | \
|
||||
mp_gf2m_sqr_tb[(w) >> 44 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 40 & 0xF] << 16 | \
|
||||
mp_gf2m_sqr_tb[(w) >> 36 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 32 & 0xF]
|
||||
#define gf2m_SQR0(w) \
|
||||
mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 48 | \
|
||||
mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] << 32 | \
|
||||
mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \
|
||||
mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) & 0xF]
|
||||
#endif
|
||||
|
||||
/* Multiply two binary polynomials mp_digits a, b.
|
||||
* Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
|
||||
* Output in two mp_digits rh, rl.
|
||||
*/
|
||||
void s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b);
|
||||
|
||||
/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0)
|
||||
* result is a binary polynomial in 4 mp_digits r[4].
|
||||
* The caller MUST ensure that r has the right amount of space allocated.
|
||||
*/
|
||||
void s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
|
||||
const mp_digit b0);
|
||||
|
||||
/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0)
|
||||
* result is a binary polynomial in 6 mp_digits r[6].
|
||||
* The caller MUST ensure that r has the right amount of space allocated.
|
||||
*/
|
||||
void s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
|
||||
const mp_digit b2, const mp_digit b1, const mp_digit b0);
|
||||
|
||||
/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0)
|
||||
* result is a binary polynomial in 8 mp_digits r[8].
|
||||
* The caller MUST ensure that r has the right amount of space allocated.
|
||||
*/
|
||||
void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
|
||||
const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
|
||||
const mp_digit b0);
|
||||
|
||||
#endif /* _MP_GF2M_PRIV_H_ */
|
||||
@@ -1,600 +0,0 @@
|
||||
/*
|
||||
* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the Multi-precision Binary Polynomial Arithmetic Library.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Sun Microsystems, Inc.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2003
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Sheueling Chang Shantz <sheueling.chang@sun.com> and
|
||||
* Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "mp_gf2m.h"
|
||||
#include "mp_gf2m-priv.h"
|
||||
#include "mplogic.h"
|
||||
#include "mpi-priv.h"
|
||||
|
||||
const mp_digit mp_gf2m_sqr_tb[16] =
|
||||
{
|
||||
0, 1, 4, 5, 16, 17, 20, 21,
|
||||
64, 65, 68, 69, 80, 81, 84, 85
|
||||
};
|
||||
|
||||
/* Multiply two binary polynomials mp_digits a, b.
|
||||
* Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
|
||||
* Output in two mp_digits rh, rl.
|
||||
*/
|
||||
#if MP_DIGIT_BITS == 32
|
||||
void
|
||||
s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
|
||||
{
|
||||
register mp_digit h, l, s;
|
||||
mp_digit tab[8], top2b = a >> 30;
|
||||
register mp_digit a1, a2, a4;
|
||||
|
||||
a1 = a & (0x3FFFFFFF); a2 = a1 << 1; a4 = a2 << 1;
|
||||
|
||||
tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
|
||||
tab[4] = a4; tab[5] = a1^a4; tab[6] = a2^a4; tab[7] = a1^a2^a4;
|
||||
|
||||
s = tab[b & 0x7]; l = s;
|
||||
s = tab[b >> 3 & 0x7]; l ^= s << 3; h = s >> 29;
|
||||
s = tab[b >> 6 & 0x7]; l ^= s << 6; h ^= s >> 26;
|
||||
s = tab[b >> 9 & 0x7]; l ^= s << 9; h ^= s >> 23;
|
||||
s = tab[b >> 12 & 0x7]; l ^= s << 12; h ^= s >> 20;
|
||||
s = tab[b >> 15 & 0x7]; l ^= s << 15; h ^= s >> 17;
|
||||
s = tab[b >> 18 & 0x7]; l ^= s << 18; h ^= s >> 14;
|
||||
s = tab[b >> 21 & 0x7]; l ^= s << 21; h ^= s >> 11;
|
||||
s = tab[b >> 24 & 0x7]; l ^= s << 24; h ^= s >> 8;
|
||||
s = tab[b >> 27 & 0x7]; l ^= s << 27; h ^= s >> 5;
|
||||
s = tab[b >> 30 ]; l ^= s << 30; h ^= s >> 2;
|
||||
|
||||
/* compensate for the top two bits of a */
|
||||
|
||||
if (top2b & 01) { l ^= b << 30; h ^= b >> 2; }
|
||||
if (top2b & 02) { l ^= b << 31; h ^= b >> 1; }
|
||||
|
||||
*rh = h; *rl = l;
|
||||
}
|
||||
#else
|
||||
void
|
||||
s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
|
||||
{
|
||||
register mp_digit h, l, s;
|
||||
mp_digit tab[16], top3b = a >> 61;
|
||||
register mp_digit a1, a2, a4, a8;
|
||||
|
||||
a1 = a & (0x1FFFFFFFFFFFFFFF); a2 = a1 << 1;
|
||||
a4 = a2 << 1; a8 = a4 << 1;
|
||||
tab[ 0] = 0; tab[ 1] = a1; tab[ 2] = a2; tab[ 3] = a1^a2;
|
||||
tab[ 4] = a4; tab[ 5] = a1^a4; tab[ 6] = a2^a4; tab[ 7] = a1^a2^a4;
|
||||
tab[ 8] = a8; tab[ 9] = a1^a8; tab[10] = a2^a8; tab[11] = a1^a2^a8;
|
||||
tab[12] = a4^a8; tab[13] = a1^a4^a8; tab[14] = a2^a4^a8; tab[15] = a1^a2^a4^a8;
|
||||
|
||||
s = tab[b & 0xF]; l = s;
|
||||
s = tab[b >> 4 & 0xF]; l ^= s << 4; h = s >> 60;
|
||||
s = tab[b >> 8 & 0xF]; l ^= s << 8; h ^= s >> 56;
|
||||
s = tab[b >> 12 & 0xF]; l ^= s << 12; h ^= s >> 52;
|
||||
s = tab[b >> 16 & 0xF]; l ^= s << 16; h ^= s >> 48;
|
||||
s = tab[b >> 20 & 0xF]; l ^= s << 20; h ^= s >> 44;
|
||||
s = tab[b >> 24 & 0xF]; l ^= s << 24; h ^= s >> 40;
|
||||
s = tab[b >> 28 & 0xF]; l ^= s << 28; h ^= s >> 36;
|
||||
s = tab[b >> 32 & 0xF]; l ^= s << 32; h ^= s >> 32;
|
||||
s = tab[b >> 36 & 0xF]; l ^= s << 36; h ^= s >> 28;
|
||||
s = tab[b >> 40 & 0xF]; l ^= s << 40; h ^= s >> 24;
|
||||
s = tab[b >> 44 & 0xF]; l ^= s << 44; h ^= s >> 20;
|
||||
s = tab[b >> 48 & 0xF]; l ^= s << 48; h ^= s >> 16;
|
||||
s = tab[b >> 52 & 0xF]; l ^= s << 52; h ^= s >> 12;
|
||||
s = tab[b >> 56 & 0xF]; l ^= s << 56; h ^= s >> 8;
|
||||
s = tab[b >> 60 ]; l ^= s << 60; h ^= s >> 4;
|
||||
|
||||
/* compensate for the top three bits of a */
|
||||
|
||||
if (top3b & 01) { l ^= b << 61; h ^= b >> 3; }
|
||||
if (top3b & 02) { l ^= b << 62; h ^= b >> 2; }
|
||||
if (top3b & 04) { l ^= b << 63; h ^= b >> 1; }
|
||||
|
||||
*rh = h; *rl = l;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0)
|
||||
* result is a binary polynomial in 4 mp_digits r[4].
|
||||
* The caller MUST ensure that r has the right amount of space allocated.
|
||||
*/
|
||||
void
|
||||
s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
|
||||
const mp_digit b0)
|
||||
{
|
||||
mp_digit m1, m0;
|
||||
/* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */
|
||||
s_bmul_1x1(r+3, r+2, a1, b1);
|
||||
s_bmul_1x1(r+1, r, a0, b0);
|
||||
s_bmul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1);
|
||||
/* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */
|
||||
r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */
|
||||
r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
|
||||
}
|
||||
|
||||
/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0)
|
||||
* result is a binary polynomial in 6 mp_digits r[6].
|
||||
* The caller MUST ensure that r has the right amount of space allocated.
|
||||
*/
|
||||
void
|
||||
s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
|
||||
const mp_digit b2, const mp_digit b1, const mp_digit b0)
|
||||
{
|
||||
mp_digit zm[4];
|
||||
|
||||
s_bmul_1x1(r+5, r+4, a2, b2); /* fill top 2 words */
|
||||
s_bmul_2x2(zm, a1, a2^a0, b1, b2^b0); /* fill middle 4 words */
|
||||
s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */
|
||||
|
||||
zm[3] ^= r[3];
|
||||
zm[2] ^= r[2];
|
||||
zm[1] ^= r[1] ^ r[5];
|
||||
zm[0] ^= r[0] ^ r[4];
|
||||
|
||||
r[5] ^= zm[3];
|
||||
r[4] ^= zm[2];
|
||||
r[3] ^= zm[1];
|
||||
r[2] ^= zm[0];
|
||||
}
|
||||
|
||||
/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0)
|
||||
* result is a binary polynomial in 8 mp_digits r[8].
|
||||
* The caller MUST ensure that r has the right amount of space allocated.
|
||||
*/
|
||||
void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
|
||||
const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
|
||||
const mp_digit b0)
|
||||
{
|
||||
mp_digit zm[4];
|
||||
|
||||
s_bmul_2x2(r+4, a3, a2, b3, b2); /* fill top 4 words */
|
||||
s_bmul_2x2(zm, a3^a1, a2^a0, b3^b1, b2^b0); /* fill middle 4 words */
|
||||
s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */
|
||||
|
||||
zm[3] ^= r[3] ^ r[7];
|
||||
zm[2] ^= r[2] ^ r[6];
|
||||
zm[1] ^= r[1] ^ r[5];
|
||||
zm[0] ^= r[0] ^ r[4];
|
||||
|
||||
r[5] ^= zm[3];
|
||||
r[4] ^= zm[2];
|
||||
r[3] ^= zm[1];
|
||||
r[2] ^= zm[0];
|
||||
}
|
||||
|
||||
/* Compute addition of two binary polynomials a and b,
|
||||
* store result in c; c could be a or b, a and b could be equal;
|
||||
* c is the bitwise XOR of a and b.
|
||||
*/
|
||||
mp_err
|
||||
mp_badd(const mp_int *a, const mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_digit *pa, *pb, *pc;
|
||||
mp_size ix;
|
||||
mp_size used_pa, used_pb;
|
||||
mp_err res = MP_OKAY;
|
||||
|
||||
/* Add all digits up to the precision of b. If b had more
|
||||
* precision than a initially, swap a, b first
|
||||
*/
|
||||
if (MP_USED(a) >= MP_USED(b)) {
|
||||
pa = MP_DIGITS(a);
|
||||
pb = MP_DIGITS(b);
|
||||
used_pa = MP_USED(a);
|
||||
used_pb = MP_USED(b);
|
||||
} else {
|
||||
pa = MP_DIGITS(b);
|
||||
pb = MP_DIGITS(a);
|
||||
used_pa = MP_USED(b);
|
||||
used_pb = MP_USED(a);
|
||||
}
|
||||
|
||||
/* Make sure c has enough precision for the output value */
|
||||
MP_CHECKOK( s_mp_pad(c, used_pa) );
|
||||
|
||||
/* Do word-by-word xor */
|
||||
pc = MP_DIGITS(c);
|
||||
for (ix = 0; ix < used_pb; ix++) {
|
||||
(*pc++) = (*pa++) ^ (*pb++);
|
||||
}
|
||||
|
||||
/* Finish the rest of digits until we're actually done */
|
||||
for (; ix < used_pa; ++ix) {
|
||||
*pc++ = *pa++;
|
||||
}
|
||||
|
||||
MP_USED(c) = used_pa;
|
||||
MP_SIGN(c) = ZPOS;
|
||||
s_mp_clamp(c);
|
||||
|
||||
CLEANUP:
|
||||
return res;
|
||||
}
|
||||
|
||||
#define s_mp_div2(a) MP_CHECKOK( mpl_rsh((a), (a), 1) );
|
||||
|
||||
/* Compute binary polynomial multiply d = a * b */
|
||||
static void
|
||||
s_bmul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
|
||||
{
|
||||
mp_digit a_i, a0b0, a1b1, carry = 0;
|
||||
while (a_len--) {
|
||||
a_i = *a++;
|
||||
s_bmul_1x1(&a1b1, &a0b0, a_i, b);
|
||||
*d++ = a0b0 ^ carry;
|
||||
carry = a1b1;
|
||||
}
|
||||
*d = carry;
|
||||
}
|
||||
|
||||
/* Compute binary polynomial xor multiply accumulate d ^= a * b */
|
||||
static void
|
||||
s_bmul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
|
||||
{
|
||||
mp_digit a_i, a0b0, a1b1, carry = 0;
|
||||
while (a_len--) {
|
||||
a_i = *a++;
|
||||
s_bmul_1x1(&a1b1, &a0b0, a_i, b);
|
||||
*d++ ^= a0b0 ^ carry;
|
||||
carry = a1b1;
|
||||
}
|
||||
*d ^= carry;
|
||||
}
|
||||
|
||||
/* Compute binary polynomial xor multiply c = a * b.
|
||||
* All parameters may be identical.
|
||||
*/
|
||||
mp_err
|
||||
mp_bmul(const mp_int *a, const mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_digit *pb, b_i;
|
||||
mp_int tmp;
|
||||
mp_size ib, a_used, b_used;
|
||||
mp_err res = MP_OKAY;
|
||||
|
||||
MP_DIGITS(&tmp) = 0;
|
||||
|
||||
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
|
||||
|
||||
if (a == c) {
|
||||
MP_CHECKOK( mp_init_copy(&tmp, a) );
|
||||
if (a == b)
|
||||
b = &tmp;
|
||||
a = &tmp;
|
||||
} else if (b == c) {
|
||||
MP_CHECKOK( mp_init_copy(&tmp, b) );
|
||||
b = &tmp;
|
||||
}
|
||||
|
||||
if (MP_USED(a) < MP_USED(b)) {
|
||||
const mp_int *xch = b; /* switch a and b if b longer */
|
||||
b = a;
|
||||
a = xch;
|
||||
}
|
||||
|
||||
MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
|
||||
MP_CHECKOK( s_mp_pad(c, USED(a) + USED(b)) );
|
||||
|
||||
pb = MP_DIGITS(b);
|
||||
s_bmul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
|
||||
|
||||
/* Outer loop: Digits of b */
|
||||
a_used = MP_USED(a);
|
||||
b_used = MP_USED(b);
|
||||
MP_USED(c) = a_used + b_used;
|
||||
for (ib = 1; ib < b_used; ib++) {
|
||||
b_i = *pb++;
|
||||
|
||||
/* Inner product: Digits of a */
|
||||
if (b_i)
|
||||
s_bmul_d_add(MP_DIGITS(a), a_used, b_i, MP_DIGITS(c) + ib);
|
||||
else
|
||||
MP_DIGIT(c, ib + a_used) = b_i;
|
||||
}
|
||||
|
||||
s_mp_clamp(c);
|
||||
|
||||
SIGN(c) = ZPOS;
|
||||
|
||||
CLEANUP:
|
||||
mp_clear(&tmp);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/* Compute modular reduction of a and store result in r.
|
||||
* r could be a.
|
||||
* For modular arithmetic, the irreducible polynomial f(t) is represented
|
||||
* as an array of int[], where f(t) is of the form:
|
||||
* f(t) = t^p[0] + t^p[1] + ... + t^p[k]
|
||||
* where m = p[0] > p[1] > ... > p[k] = 0.
|
||||
*/
|
||||
mp_err
|
||||
mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r)
|
||||
{
|
||||
int j, k;
|
||||
int n, dN, d0, d1;
|
||||
mp_digit zz, *z, tmp;
|
||||
mp_size used;
|
||||
mp_err res = MP_OKAY;
|
||||
|
||||
/* The algorithm does the reduction in place in r,
|
||||
* if a != r, copy a into r first so reduction can be done in r
|
||||
*/
|
||||
if (a != r) {
|
||||
MP_CHECKOK( mp_copy(a, r) );
|
||||
}
|
||||
z = MP_DIGITS(r);
|
||||
|
||||
/* start reduction */
|
||||
dN = p[0] / MP_DIGIT_BITS;
|
||||
used = MP_USED(r);
|
||||
|
||||
for (j = used - 1; j > dN;) {
|
||||
|
||||
zz = z[j];
|
||||
if (zz == 0) {
|
||||
j--; continue;
|
||||
}
|
||||
z[j] = 0;
|
||||
|
||||
for (k = 1; p[k] > 0; k++) {
|
||||
/* reducing component t^p[k] */
|
||||
n = p[0] - p[k];
|
||||
d0 = n % MP_DIGIT_BITS;
|
||||
d1 = MP_DIGIT_BITS - d0;
|
||||
n /= MP_DIGIT_BITS;
|
||||
z[j-n] ^= (zz>>d0);
|
||||
if (d0)
|
||||
z[j-n-1] ^= (zz<<d1);
|
||||
}
|
||||
|
||||
/* reducing component t^0 */
|
||||
n = dN;
|
||||
d0 = p[0] % MP_DIGIT_BITS;
|
||||
d1 = MP_DIGIT_BITS - d0;
|
||||
z[j-n] ^= (zz >> d0);
|
||||
if (d0)
|
||||
z[j-n-1] ^= (zz << d1);
|
||||
|
||||
}
|
||||
|
||||
/* final round of reduction */
|
||||
while (j == dN) {
|
||||
|
||||
d0 = p[0] % MP_DIGIT_BITS;
|
||||
zz = z[dN] >> d0;
|
||||
if (zz == 0) break;
|
||||
d1 = MP_DIGIT_BITS - d0;
|
||||
|
||||
/* clear up the top d1 bits */
|
||||
if (d0) z[dN] = (z[dN] << d1) >> d1;
|
||||
*z ^= zz; /* reduction t^0 component */
|
||||
|
||||
for (k = 1; p[k] > 0; k++) {
|
||||
/* reducing component t^p[k]*/
|
||||
n = p[k] / MP_DIGIT_BITS;
|
||||
d0 = p[k] % MP_DIGIT_BITS;
|
||||
d1 = MP_DIGIT_BITS - d0;
|
||||
z[n] ^= (zz << d0);
|
||||
tmp = zz >> d1;
|
||||
if (d0 && tmp)
|
||||
z[n+1] ^= tmp;
|
||||
}
|
||||
}
|
||||
|
||||
s_mp_clamp(r);
|
||||
CLEANUP:
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Compute the product of two polynomials a and b, reduce modulo p,
|
||||
* Store the result in r. r could be a or b; a could be b.
|
||||
*/
|
||||
mp_err
|
||||
mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], mp_int *r)
|
||||
{
|
||||
mp_err res;
|
||||
|
||||
if (a == b) return mp_bsqrmod(a, p, r);
|
||||
if ((res = mp_bmul(a, b, r) ) != MP_OKAY)
|
||||
return res;
|
||||
return mp_bmod(r, p, r);
|
||||
}
|
||||
|
||||
/* Compute binary polynomial squaring c = a*a mod p .
|
||||
* Parameter r and a can be identical.
|
||||
*/
|
||||
|
||||
mp_err
|
||||
mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r)
|
||||
{
|
||||
mp_digit *pa, *pr, a_i;
|
||||
mp_int tmp;
|
||||
mp_size ia, a_used;
|
||||
mp_err res;
|
||||
|
||||
ARGCHK(a != NULL && r != NULL, MP_BADARG);
|
||||
MP_DIGITS(&tmp) = 0;
|
||||
|
||||
if (a == r) {
|
||||
MP_CHECKOK( mp_init_copy(&tmp, a) );
|
||||
a = &tmp;
|
||||
}
|
||||
|
||||
MP_USED(r) = 1; MP_DIGIT(r, 0) = 0;
|
||||
MP_CHECKOK( s_mp_pad(r, 2*USED(a)) );
|
||||
|
||||
pa = MP_DIGITS(a);
|
||||
pr = MP_DIGITS(r);
|
||||
a_used = MP_USED(a);
|
||||
MP_USED(r) = 2 * a_used;
|
||||
|
||||
for (ia = 0; ia < a_used; ia++) {
|
||||
a_i = *pa++;
|
||||
*pr++ = gf2m_SQR0(a_i);
|
||||
*pr++ = gf2m_SQR1(a_i);
|
||||
}
|
||||
|
||||
MP_CHECKOK( mp_bmod(r, p, r) );
|
||||
s_mp_clamp(r);
|
||||
SIGN(r) = ZPOS;
|
||||
|
||||
CLEANUP:
|
||||
mp_clear(&tmp);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Compute binary polynomial y/x mod p, y divided by x, reduce modulo p.
|
||||
* Store the result in r. r could be x or y, and x could equal y.
|
||||
* Uses algorithm Modular_Division_GF(2^m) from
|
||||
* Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to
|
||||
* the Great Divide".
|
||||
*/
|
||||
int
|
||||
mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
|
||||
const unsigned int p[], mp_int *r)
|
||||
{
|
||||
mp_int aa, bb, uu;
|
||||
mp_int *a, *b, *u, *v;
|
||||
mp_err res = MP_OKAY;
|
||||
|
||||
MP_DIGITS(&aa) = 0;
|
||||
MP_DIGITS(&bb) = 0;
|
||||
MP_DIGITS(&uu) = 0;
|
||||
|
||||
MP_CHECKOK( mp_init_copy(&aa, x) );
|
||||
MP_CHECKOK( mp_init_copy(&uu, y) );
|
||||
MP_CHECKOK( mp_init_copy(&bb, pp) );
|
||||
MP_CHECKOK( s_mp_pad(r, USED(pp)) );
|
||||
MP_USED(r) = 1; MP_DIGIT(r, 0) = 0;
|
||||
|
||||
a = &aa; b= &bb; u=&uu; v=r;
|
||||
/* reduce x and y mod p */
|
||||
MP_CHECKOK( mp_bmod(a, p, a) );
|
||||
MP_CHECKOK( mp_bmod(u, p, u) );
|
||||
|
||||
while (!mp_isodd(a)) {
|
||||
s_mp_div2(a);
|
||||
if (mp_isodd(u)) {
|
||||
MP_CHECKOK( mp_badd(u, pp, u) );
|
||||
}
|
||||
s_mp_div2(u);
|
||||
}
|
||||
|
||||
do {
|
||||
if (mp_cmp_mag(b, a) > 0) {
|
||||
MP_CHECKOK( mp_badd(b, a, b) );
|
||||
MP_CHECKOK( mp_badd(v, u, v) );
|
||||
do {
|
||||
s_mp_div2(b);
|
||||
if (mp_isodd(v)) {
|
||||
MP_CHECKOK( mp_badd(v, pp, v) );
|
||||
}
|
||||
s_mp_div2(v);
|
||||
} while (!mp_isodd(b));
|
||||
}
|
||||
else if ((MP_DIGIT(a,0) == 1) && (MP_USED(a) == 1))
|
||||
break;
|
||||
else {
|
||||
MP_CHECKOK( mp_badd(a, b, a) );
|
||||
MP_CHECKOK( mp_badd(u, v, u) );
|
||||
do {
|
||||
s_mp_div2(a);
|
||||
if (mp_isodd(u)) {
|
||||
MP_CHECKOK( mp_badd(u, pp, u) );
|
||||
}
|
||||
s_mp_div2(u);
|
||||
} while (!mp_isodd(a));
|
||||
}
|
||||
} while (1);
|
||||
|
||||
MP_CHECKOK( mp_copy(u, r) );
|
||||
|
||||
CLEANUP:
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
/* Convert the bit-string representation of a polynomial a into an array
|
||||
* of integers corresponding to the bits with non-zero coefficient.
|
||||
* Up to max elements of the array will be filled. Return value is total
|
||||
* number of coefficients that would be extracted if array was large enough.
|
||||
*/
|
||||
int
|
||||
mp_bpoly2arr(const mp_int *a, unsigned int p[], int max)
|
||||
{
|
||||
int i, j, k;
|
||||
mp_digit top_bit, mask;
|
||||
|
||||
top_bit = 1;
|
||||
top_bit <<= MP_DIGIT_BIT - 1;
|
||||
|
||||
for (k = 0; k < max; k++) p[k] = 0;
|
||||
k = 0;
|
||||
|
||||
for (i = MP_USED(a) - 1; i >= 0; i--) {
|
||||
mask = top_bit;
|
||||
for (j = MP_DIGIT_BIT - 1; j >= 0; j--) {
|
||||
if (MP_DIGITS(a)[i] & mask) {
|
||||
if (k < max) p[k] = MP_DIGIT_BIT * i + j;
|
||||
k++;
|
||||
}
|
||||
mask >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
/* Convert the coefficient array representation of a polynomial to a
|
||||
* bit-string. The array must be terminated by 0.
|
||||
*/
|
||||
mp_err
|
||||
mp_barr2poly(const unsigned int p[], mp_int *a)
|
||||
{
|
||||
|
||||
mp_err res = MP_OKAY;
|
||||
int i;
|
||||
|
||||
mp_zero(a);
|
||||
for (i = 0; p[i] > 0; i++) {
|
||||
MP_CHECKOK( mpl_set_bit(a, p[i], 1) );
|
||||
}
|
||||
MP_CHECKOK( mpl_set_bit(a, 0, 1) );
|
||||
|
||||
CLEANUP:
|
||||
return res;
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
/*
|
||||
* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the Multi-precision Binary Polynomial Arithmetic Library.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Sun Microsystems, Inc.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2003
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Sheueling Chang Shantz <sheueling.chang@sun.com> and
|
||||
* Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef _MP_GF2M_H_
|
||||
#define _MP_GF2M_H_
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
mp_err mp_badd(const mp_int *a, const mp_int *b, mp_int *c);
|
||||
mp_err mp_bmul(const mp_int *a, const mp_int *b, mp_int *c);
|
||||
|
||||
/* For modular arithmetic, the irreducible polynomial f(t) is represented
|
||||
* as an array of int[], where f(t) is of the form:
|
||||
* f(t) = t^p[0] + t^p[1] + ... + t^p[k]
|
||||
* where m = p[0] > p[1] > ... > p[k] = 0.
|
||||
*/
|
||||
mp_err mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r);
|
||||
mp_err mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[],
|
||||
mp_int *r);
|
||||
mp_err mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r);
|
||||
mp_err mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
|
||||
const unsigned int p[], mp_int *r);
|
||||
|
||||
int mp_bpoly2arr(const mp_int *a, unsigned int p[], int max);
|
||||
mp_err mp_barr2poly(const unsigned int p[], mp_int *a);
|
||||
|
||||
#endif /* _MP_GF2M_H_ */
|
||||
@@ -1,760 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the Netscape security libraries.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Red Hat, Inc
|
||||
* Portions created by the Initial Developer are Copyright (C) 2005
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Robert Relyea <rrelyea@redhat.com>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
/*
|
||||
* This file implements a single function: mpi_getProcessorLineSize();
|
||||
* mpi_getProcessorLineSize() returns the size in bytes of the cache line
|
||||
* if a cache exists, or zero if there is no cache. If more than one
|
||||
* cache line exists, it should return the smallest line size (which is
|
||||
* usually the L1 cache).
|
||||
*
|
||||
* mp_modexp uses this information to make sure that private key information
|
||||
* isn't being leaked through the cache.
|
||||
*
|
||||
* Currently the file returns good data for most modern x86 processors, and
|
||||
* reasonable data on 64-bit ppc processors. All other processors are assumed
|
||||
* to have a cache line size of 32 bytes unless modified by target.mk.
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86)
|
||||
/* X86 processors have special instructions that tell us about the cache */
|
||||
#include "string.h"
|
||||
|
||||
/* Generic CPUID function */
|
||||
#ifndef _WIN32
|
||||
static void cpuid(unsigned long op, unsigned long *eax,
|
||||
unsigned long *ebx, unsigned long *ecx,
|
||||
unsigned long *edx)
|
||||
{
|
||||
/* sigh GCC isn't smart enough to save the ebx PIC register on it's own
|
||||
* in this case, so do it by hand. */
|
||||
__asm__("pushl %%ebx\n\t"
|
||||
"cpuid\n\t"
|
||||
"mov %%ebx,%1\n\t"
|
||||
"popl %%ebx\n\t"
|
||||
: "=a" (*eax),
|
||||
"=r" (*ebx),
|
||||
"=c" (*ecx),
|
||||
"=d" (*edx)
|
||||
: "0" (op));
|
||||
}
|
||||
|
||||
/*
|
||||
* try flipping a processor flag to determine CPU type
|
||||
*/
|
||||
static unsigned long changeFlag(unsigned long flag)
|
||||
{
|
||||
unsigned long changedFlags, originalFlags;
|
||||
__asm__("pushfl\n\t" /* get the flags */
|
||||
"popl %0\n\t"
|
||||
"movl %0,%1\n\t" /* save the original flags */
|
||||
"xor %0,%2\n\t" /* flip the but */
|
||||
"pushl %0\n\t" /* set the flags */
|
||||
"popfl\n\t"
|
||||
"pushfl\n\t" /* get the flags again (for return) */
|
||||
"popl %0\n\t"
|
||||
"pushl %1\n\t" /* restore the original flags */
|
||||
"popfl\n\t"
|
||||
: "=r" (changedFlags),
|
||||
"=r" (originalFlags)
|
||||
: "r" (flag));
|
||||
return changedFlags ^ originalFlags;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* windows versions of the above assembler
|
||||
*/
|
||||
#define wcpuid __asm __emit 0fh __asm __emit 0a2h
|
||||
static void cpuid(unsigned long op, unsigned long *Reax, unsigned long *Rebx,
|
||||
unsigned long *Recx, unsigned long *Redx)
|
||||
{
|
||||
unsigned long Leax, Lebx, Lecx, Ledx;
|
||||
__asm {
|
||||
pushad
|
||||
mov eax,op
|
||||
wcpuid
|
||||
mov Leax,eax
|
||||
mov Lebx,ebx
|
||||
mov Lecx,ecx
|
||||
mov Ledx,edx
|
||||
popad
|
||||
}
|
||||
*Reax = Leax;
|
||||
*Rebx = Lebx;
|
||||
*Recx = Lecx;
|
||||
*Redx = Ledx;
|
||||
}
|
||||
|
||||
static unsigned long changeFlag(unsigned long flag)
|
||||
{
|
||||
unsigned long changedFlags, originalFlags;
|
||||
__asm {
|
||||
pushad
|
||||
pushfd /* get the flags */
|
||||
pop eax
|
||||
mov ecx,eax /* save the original flags */
|
||||
mov originalFlags,ecx /* save the original flags */
|
||||
mov ebx,flag
|
||||
xor eax,ebx /* flip the but */
|
||||
push eax /* set the flags */
|
||||
popfd
|
||||
pushfd /* get the flags again (for return) */
|
||||
pop eax
|
||||
push ecx /* restore the original flags */
|
||||
popfd
|
||||
mov changedFlags,eax
|
||||
popad
|
||||
}
|
||||
return changedFlags ^ originalFlags;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define AC_FLAG 0x40000
|
||||
#define ID_FLAG 0x200000
|
||||
|
||||
/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
|
||||
static int is386()
|
||||
{
|
||||
return changeFlag(AC_FLAG) == 0;
|
||||
}
|
||||
|
||||
/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
|
||||
static int is486()
|
||||
{
|
||||
return changeFlag(ID_FLAG) == 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* table for Intel Cache.
|
||||
* See Intel Application Note AP-485 for more information
|
||||
*/
|
||||
typedef enum {
|
||||
Cache_NONE = 0,
|
||||
Cache_UNKNOWN = 1,
|
||||
Cache_TLB = 2,
|
||||
Cache_Trace = 3,
|
||||
Cache_L1 = 4,
|
||||
Cache_L2 = 5 ,
|
||||
Cache_L3 = 6
|
||||
} CacheType;
|
||||
|
||||
#define DATA_INSTR 1
|
||||
#define DATA_DATA 2
|
||||
#define DATA_BOTH 3
|
||||
#define DATA_TRACE 4
|
||||
#define DATA_NONE 0
|
||||
|
||||
#define TLB_4k 0x01
|
||||
#define TLB_2M 0x08
|
||||
#define TLB_4M 0x10
|
||||
#define TLB_4Mk 0x11
|
||||
#define TLB_ALL 0x19
|
||||
|
||||
#define k * 1024
|
||||
#define M * (1024*1024)
|
||||
#define G * (1024*1024*1024)
|
||||
|
||||
struct _cache {
|
||||
CacheType type;
|
||||
unsigned long data;
|
||||
#define pageSize size
|
||||
#define trcuops size
|
||||
unsigned long size;
|
||||
unsigned long association;
|
||||
#define tlbEntries lineSize
|
||||
unsigned long lineSize;
|
||||
} CacheMap[] = {
|
||||
/* 00 */ {Cache_NONE, DATA_NONE, 0, 0, 0 },
|
||||
/* 01 */ {Cache_TLB, DATA_INSTR, TLB_4k, 4, 32 },
|
||||
/* 02 */ {Cache_TLB, DATA_INSTR, TLB_4M, 0, 2 },
|
||||
/* 03 */ {Cache_TLB, DATA_DATA, TLB_4k, 4, 64 },
|
||||
/* 04 */ {Cache_TLB, DATA_DATA, TLB_4M, 4, 8 },
|
||||
/* 05 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 06 */ {Cache_L1, DATA_INSTR, 8 k, 4, 32 },
|
||||
/* 07 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 08 */ {Cache_L1, DATA_INSTR, 16 k, 4, 32 },
|
||||
/* 09 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 0a */ {Cache_L1, DATA_DATA, 8 k, 4, 32 },
|
||||
/* 0b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 0c */ {Cache_L1, DATA_DATA, 16 k, 4, 32 },
|
||||
/* 0d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 0e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 0f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 10 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 11 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 12 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 13 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 14 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 15 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 16 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 17 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 18 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 19 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 1a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 1b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 1c */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 1d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 1e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 1f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 20 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 21 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 22 */ {Cache_L3, DATA_BOTH, 512 k, 8, 64 },
|
||||
/* 23 */ {Cache_L3, DATA_BOTH, 1 M, 8, 64 },
|
||||
/* 24 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 25 */ {Cache_L3, DATA_BOTH, 2 M, 8, 64 },
|
||||
/* 26 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 27 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 28 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 29 */ {Cache_L3, DATA_BOTH, 4 M, 8, 64 },
|
||||
/* 2a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 2b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 2c */ {Cache_L1, DATA_DATA, 32 k, 8, 64 },
|
||||
/* 2d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 2e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 2f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 30 */ {Cache_L1, DATA_INSTR, 32 k, 8, 64 },
|
||||
/* 31 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 32 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 33 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 34 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 35 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 36 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 37 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 38 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 39 */ {Cache_L2, DATA_BOTH, 128 k, 4, 64 },
|
||||
/* 3a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 3b */ {Cache_L2, DATA_BOTH, 128 k, 2, 64 },
|
||||
/* 3c */ {Cache_L2, DATA_BOTH, 256 k, 4, 64 },
|
||||
/* 3d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 3e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 3f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 40 */ {Cache_L2, DATA_NONE, 0, 0, 0 },
|
||||
/* 41 */ {Cache_L2, DATA_BOTH, 128 k, 4, 32 },
|
||||
/* 42 */ {Cache_L2, DATA_BOTH, 256 k, 4, 32 },
|
||||
/* 43 */ {Cache_L2, DATA_BOTH, 512 k, 4, 32 },
|
||||
/* 44 */ {Cache_L2, DATA_BOTH, 1 M, 4, 32 },
|
||||
/* 45 */ {Cache_L2, DATA_BOTH, 2 M, 4, 32 },
|
||||
/* 46 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 47 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 48 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 49 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 4a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 4b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 4c */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 4d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 4e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 4f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 50 */ {Cache_TLB, DATA_INSTR, TLB_ALL, 0, 64 },
|
||||
/* 51 */ {Cache_TLB, DATA_INSTR, TLB_ALL, 0, 128 },
|
||||
/* 52 */ {Cache_TLB, DATA_INSTR, TLB_ALL, 0, 256 },
|
||||
/* 53 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 54 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 55 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 56 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 57 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 58 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 59 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 5a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 5b */ {Cache_TLB, DATA_DATA, TLB_4Mk, 0, 64 },
|
||||
/* 5c */ {Cache_TLB, DATA_DATA, TLB_4Mk, 0, 128 },
|
||||
/* 5d */ {Cache_TLB, DATA_DATA, TLB_4Mk, 0, 256 },
|
||||
/* 5e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 5f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 60 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 61 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 62 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 63 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 64 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 65 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 66 */ {Cache_L1, DATA_DATA, 8 k, 4, 64 },
|
||||
/* 67 */ {Cache_L1, DATA_DATA, 16 k, 4, 64 },
|
||||
/* 68 */ {Cache_L1, DATA_DATA, 32 k, 4, 64 },
|
||||
/* 69 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 6a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 6b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 6c */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 6d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 6e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 6f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 70 */ {Cache_Trace, DATA_TRACE, 12 k, 8, 1 },
|
||||
/* 71 */ {Cache_Trace, DATA_TRACE, 16 k, 8, 1 },
|
||||
/* 72 */ {Cache_Trace, DATA_TRACE, 32 k, 8, 1 },
|
||||
/* 73 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 74 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 75 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 76 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 77 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 78 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 79 */ {Cache_L2, DATA_BOTH, 128 k, 8, 64 },
|
||||
/* 7a */ {Cache_L2, DATA_BOTH, 256 k, 8, 64 },
|
||||
/* 7b */ {Cache_L2, DATA_BOTH, 512 k, 8, 64 },
|
||||
/* 7c */ {Cache_L2, DATA_BOTH, 1 M, 8, 64 },
|
||||
/* 7d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 7e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 7f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 80 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 81 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 82 */ {Cache_L2, DATA_BOTH, 256 k, 8, 32 },
|
||||
/* 83 */ {Cache_L2, DATA_BOTH, 512 k, 8, 32 },
|
||||
/* 84 */ {Cache_L2, DATA_BOTH, 1 M, 8, 32 },
|
||||
/* 85 */ {Cache_L2, DATA_BOTH, 2 M, 8, 32 },
|
||||
/* 86 */ {Cache_L2, DATA_BOTH, 512 k, 4, 64 },
|
||||
/* 87 */ {Cache_L2, DATA_BOTH, 1 M, 8, 64 },
|
||||
/* 88 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 89 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 8a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 8b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 8c */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 8d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 8e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 8f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 90 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 91 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 92 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 93 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 94 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 95 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 96 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 97 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 98 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 99 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 9a */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 9b */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 9c */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 9d */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 9e */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* 9f */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* a0 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* a1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* a2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* a3 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* a4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* a5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* a6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* a7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* a8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* a9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* aa */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ab */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ac */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ad */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ae */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* af */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* b0 */ {Cache_TLB, DATA_INSTR, TLB_4k, 4, 128 },
|
||||
/* b1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* b2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* b3 */ {Cache_TLB, DATA_DATA, TLB_4k, 4, 128 },
|
||||
/* b4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* b5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* b6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* b7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* b8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* b9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ba */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* bb */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* bc */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* bd */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* be */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* bf */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* c0 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* c1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* c2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* c3 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* c4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* c5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* c6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* c7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* c8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* c9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ca */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* cb */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* cc */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* cd */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ce */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* cf */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* d0 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* d1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* d2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* d3 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* d4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* d5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* d6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* d7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* d8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* d9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* da */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* db */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* dc */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* dd */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* de */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* df */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* e0 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* e1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* e2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* e3 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* e4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* e5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* e6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* e7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* e8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* e9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ea */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* eb */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ec */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ed */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ee */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ef */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* f0 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* f1 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* f2 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* f3 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* f4 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* f5 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* f6 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* f7 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* f8 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* f9 */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* fa */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* fb */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* fc */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* fd */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* fe */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 },
|
||||
/* ff */ {Cache_UNKNOWN, DATA_NONE, 0, 0, 0 }
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* use the above table to determine the CacheEntryLineSize.
|
||||
*/
|
||||
static void
|
||||
getIntelCacheEntryLineSize(unsigned long val, int *level,
|
||||
unsigned long *lineSize)
|
||||
{
|
||||
CacheType type;
|
||||
|
||||
type = CacheMap[val].type;
|
||||
/* only interested in data caches */
|
||||
/* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
|
||||
* this data check has the side effect of rejecting that entry. If
|
||||
* that wasn't the case, we could have to reject it explicitly */
|
||||
if ((CacheMap[val].data & DATA_DATA) != DATA_DATA) {
|
||||
return;
|
||||
}
|
||||
/* look at the caches, skip types we aren't interested in.
|
||||
* if we already have a value for a lower level cache, skip the
|
||||
* current entry */
|
||||
if (type == Cache_L1) {
|
||||
*level = 1;
|
||||
*lineSize = CacheMap[val].lineSize;
|
||||
} else if ((*level >= 2) && type == Cache_L2) {
|
||||
*level = 2;
|
||||
*lineSize = CacheMap[val].lineSize;
|
||||
} else if ((*level >= 3) && type == Cache_L3) {
|
||||
*level = 3;
|
||||
*lineSize = CacheMap[val].lineSize;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
getIntelRegisterCacheLineSize(unsigned long val,
|
||||
int *level, unsigned long *lineSize)
|
||||
{
|
||||
getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
|
||||
getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
|
||||
getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
|
||||
getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
|
||||
}
|
||||
|
||||
/*
|
||||
* returns '0' if no recognized cache is found, or if the cache
|
||||
* information is supported by this processor
|
||||
*/
|
||||
static unsigned long
|
||||
getIntelCacheLineSize(int cpuidLevel)
|
||||
{
|
||||
int level = 4;
|
||||
unsigned long lineSize = 0;
|
||||
unsigned long eax, ebx, ecx, edx;
|
||||
int repeat, count;
|
||||
|
||||
if (cpuidLevel < 2) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* command '2' of the cpuid is intel's cache info call. Each byte of the
|
||||
* 4 registers contain a potential descriptor for the cache. The CacheMap
|
||||
* table maps the cache entry with the processor cache. Register 'al'
|
||||
* contains a count value that cpuid '2' needs to be called in order to
|
||||
* find all the cache descriptors. Only registers with the high bit set
|
||||
* to 'zero' have valid descriptors. This code loops through all the
|
||||
* required calls to cpuid '2' and passes any valid descriptors it finds
|
||||
* to the getIntelRegisterCacheLineSize code, which breaks the registers
|
||||
* down into their component descriptors. In the end the lineSize of the
|
||||
* lowest level cache data cache is returned. */
|
||||
cpuid(2, &eax, &ebx, &ecx, &edx);
|
||||
repeat = eax & 0xf;
|
||||
for (count = 0; count < repeat; count++) {
|
||||
if ((eax & 0x80000000) == 0) {
|
||||
getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
|
||||
}
|
||||
if ((ebx & 0x80000000) == 0) {
|
||||
getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
|
||||
}
|
||||
if ((ecx & 0x80000000) == 0) {
|
||||
getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
|
||||
}
|
||||
if ((edx & 0x80000000) == 0) {
|
||||
getIntelRegisterCacheLineSize(edx, &level, &lineSize);
|
||||
}
|
||||
if (count+1 != repeat) {
|
||||
cpuid(2, &eax, &ebx, &ecx, &edx);
|
||||
}
|
||||
}
|
||||
return lineSize;
|
||||
}
|
||||
|
||||
/*
|
||||
* returns '0' if the cache info is not supported by this processor.
|
||||
* This is based on the AMD extended cache commands for cpuid.
|
||||
* (see "AMD Processor Recognition Application Note" Publication 20734).
|
||||
* Some other processors use the identical scheme.
|
||||
* (see "Processor Recognition, Transmeta Corporation").
|
||||
*/
|
||||
static unsigned long
|
||||
getOtherCacheLineSize(unsigned long cpuidLevel)
|
||||
{
|
||||
unsigned long lineSize = 0;
|
||||
unsigned long eax, ebx, ecx, edx;
|
||||
|
||||
/* get the Extended CPUID level */
|
||||
cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
|
||||
cpuidLevel = eax;
|
||||
|
||||
if (cpuidLevel >= 0x80000005) {
|
||||
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
|
||||
lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
|
||||
}
|
||||
return lineSize;
|
||||
}
|
||||
|
||||
char *manMap[] = {
|
||||
#define INTEL 0
|
||||
"GenuineIntel",
|
||||
#define AMD 1
|
||||
"AuthenticAMD",
|
||||
#define CYRIX 2
|
||||
"CyrixInstead",
|
||||
#define CENTAUR 2
|
||||
"CentaurHauls",
|
||||
#define NEXGEN 3
|
||||
"NexGenDriven",
|
||||
#define TRANSMETA 4
|
||||
"GenuineTMx86",
|
||||
#define RISE 5
|
||||
"RiseRiseRise",
|
||||
#define UMC 6
|
||||
"UMC UMC UMC ",
|
||||
#define SIS 7
|
||||
"Sis Sis Sis ",
|
||||
#define NATIONAL 8
|
||||
"Geode by NSC",
|
||||
};
|
||||
|
||||
int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
|
||||
|
||||
#define MAN_UNKNOWN 9
|
||||
|
||||
|
||||
unsigned long
|
||||
mpi_getProcessorLineSize()
|
||||
{
|
||||
unsigned long eax, ebx, ecx, edx;
|
||||
unsigned long cpuidLevel;
|
||||
unsigned long cacheLineSize = 0;
|
||||
int manufacturer = MAN_UNKNOWN;
|
||||
int i;
|
||||
char string[65];
|
||||
|
||||
if (is386()) {
|
||||
return 0; /* 386 had no cache */
|
||||
} if (is486()) {
|
||||
return 32; /* really? need more info */
|
||||
}
|
||||
|
||||
/* Pentium, cpuid command is available */
|
||||
cpuid(0, &eax, &ebx, &ecx, &edx);
|
||||
cpuidLevel = eax;
|
||||
*(int *)string = ebx;
|
||||
*(int *)&string[4] = edx;
|
||||
*(int *)&string[8] = ecx;
|
||||
string[12] = 0;
|
||||
|
||||
manufacturer = MAN_UNKNOWN;
|
||||
for (i=0; i < n_manufacturers; i++) {
|
||||
if ( strcmp(manMap[i],string) == 0) {
|
||||
manufacturer = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (manufacturer == INTEL) {
|
||||
cacheLineSize = getIntelCacheLineSize(cpuidLevel);
|
||||
} else {
|
||||
cacheLineSize = getOtherCacheLineSize(cpuidLevel);
|
||||
}
|
||||
/* doesn't support cache info based on cpuid. This means
|
||||
* an old pentium class processor, which have cache lines of
|
||||
* 32. If we learn differently, we can use a switch based on
|
||||
* the Manufacturer id */
|
||||
if (cacheLineSize == 0) {
|
||||
cacheLineSize = 32;
|
||||
}
|
||||
return cacheLineSize;
|
||||
}
|
||||
#define MPI_GET_PROCESSER_LINE_SIZE_DEFINED 1
|
||||
#endif
|
||||
|
||||
#if defined(__ppc64__)
|
||||
/*
|
||||
* Sigh, The PPC has some really nice features to help us determine cache
|
||||
* size, since it had lots of direct control functions to do so. The POWER
|
||||
* processor even has an instruction to do this, but it was dropped in
|
||||
* PowerPC. Unfortunately most of them are not available in user mode.
|
||||
*
|
||||
* The dcbz function would be a great way to determine cache line size except
|
||||
* 1) it only works on write-back memory (it throws an exception otherwise),
|
||||
* and 2) because so many mac programs 'knew' the processor cache size was
|
||||
* 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
|
||||
* G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
|
||||
* these programs happy. dcbzl work if 64 bit instructions are supported.
|
||||
* If you know 64 bit instructions are supported, and that stack is
|
||||
* write-back, you can use this code.
|
||||
*/
|
||||
#include "memory.h"
|
||||
|
||||
/* clear the cache line that contains 'array' */
|
||||
static inline void dcbzl(char *array)
|
||||
{
|
||||
register char *a asm("r2") = array;
|
||||
__asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
|
||||
}
|
||||
|
||||
|
||||
#define PPC_DO_ALIGN(x,y) ((char *)\
|
||||
((((long long) (x))+((y)-1))&~((y)-1)))
|
||||
|
||||
#define PPC_MAX_LINE_SIZE 256
|
||||
unsigned long
|
||||
mpi_getProcessorLineSize()
|
||||
{
|
||||
char testArray[2*PPC_MAX_LINE_SIZE+1];
|
||||
char *test;
|
||||
int i;
|
||||
|
||||
/* align the array on a maximum line size boundary, so we
|
||||
* know we are starting to clear from the first address */
|
||||
test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
|
||||
/* set all the values to 1's */
|
||||
memset(test, 0xff, PPC_MAX_LINE_SIZE);
|
||||
/* clear one cache block starting at 'test' */
|
||||
dcbzl(test);
|
||||
|
||||
/* find the size of the cleared area, that's our block size */
|
||||
for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
|
||||
if (test[i-1] == 0) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MPI_GET_PROCESSER_LINE_SIZE_DEFINED 1
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* put other processor and platform specific cache code here
|
||||
* return the smallest cache line size in bytes on the processor
|
||||
* (usually the L1 cache). If the OS has a call, this would be
|
||||
* a greate place to put it.
|
||||
*
|
||||
* If there is no cache, return 0;
|
||||
*
|
||||
* define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
|
||||
* below aren't compiled.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* target.mk can define LINE_SIZE if it's common for the family or OS */
|
||||
#if defined(LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
|
||||
|
||||
unsigned long
|
||||
mpi_getProcessorLineSize()
|
||||
{
|
||||
return LINE_SIZE;
|
||||
}
|
||||
#define MPI_GET_PROCESSER_LINE_SIZE_DEFINED 1
|
||||
#endif
|
||||
|
||||
|
||||
/* If no way to get the processor cache line size has been defined, assume
|
||||
* it's 32 bytes (most common value, does not significantly impact performance
|
||||
*/
|
||||
#ifndef MPI_GET_PROCESSER_LINE_SIZE_DEFINED
|
||||
unsigned long
|
||||
mpi_getProcessorLineSize()
|
||||
{
|
||||
return 32;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef TEST_IT
|
||||
#include <stdio.h>
|
||||
|
||||
main()
|
||||
{
|
||||
printf("line size = %d\n", mpi_getProcessorLineSize());
|
||||
}
|
||||
#endif
|
||||
@@ -1,112 +0,0 @@
|
||||
/* Default configuration for MPI library
|
||||
*
|
||||
* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Michael J. Fromberger.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1997
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Netscape Communications Corporation
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: mpi-config.h,v 1.5 2004-04-25 15:03:10 gerv%gerv.net Exp $ */
|
||||
|
||||
#ifndef MPI_CONFIG_H_
|
||||
#define MPI_CONFIG_H_
|
||||
|
||||
/*
|
||||
For boolean options,
|
||||
0 = no
|
||||
1 = yes
|
||||
|
||||
Other options are documented individually.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef MP_IOFUNC
|
||||
#define MP_IOFUNC 0 /* include mp_print() ? */
|
||||
#endif
|
||||
|
||||
#ifndef MP_MODARITH
|
||||
#define MP_MODARITH 1 /* include modular arithmetic ? */
|
||||
#endif
|
||||
|
||||
#ifndef MP_NUMTH
|
||||
#define MP_NUMTH 1 /* include number theoretic functions? */
|
||||
#endif
|
||||
|
||||
#ifndef MP_LOGTAB
|
||||
#define MP_LOGTAB 1 /* use table of logs instead of log()? */
|
||||
#endif
|
||||
|
||||
#ifndef MP_MEMSET
|
||||
#define MP_MEMSET 1 /* use memset() to zero buffers? */
|
||||
#endif
|
||||
|
||||
#ifndef MP_MEMCPY
|
||||
#define MP_MEMCPY 1 /* use memcpy() to copy buffers? */
|
||||
#endif
|
||||
|
||||
#ifndef MP_CRYPTO
|
||||
#define MP_CRYPTO 1 /* erase memory on free? */
|
||||
#endif
|
||||
|
||||
#ifndef MP_ARGCHK
|
||||
/*
|
||||
0 = no parameter checks
|
||||
1 = runtime checks, continue execution and return an error to caller
|
||||
2 = assertions; dump core on parameter errors
|
||||
*/
|
||||
#ifdef DEBUG
|
||||
#define MP_ARGCHK 2 /* how to check input arguments */
|
||||
#else
|
||||
#define MP_ARGCHK 1 /* how to check input arguments */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MP_DEBUG
|
||||
#define MP_DEBUG 0 /* print diagnostic output? */
|
||||
#endif
|
||||
|
||||
#ifndef MP_DEFPREC
|
||||
#define MP_DEFPREC 64 /* default precision, in digits */
|
||||
#endif
|
||||
|
||||
#ifndef MP_MACRO
|
||||
#define MP_MACRO 0 /* use macros for frequent calls? */
|
||||
#endif
|
||||
|
||||
#ifndef MP_SQUARE
|
||||
#define MP_SQUARE 1 /* use separate squaring code? */
|
||||
#endif
|
||||
|
||||
#endif /* ifndef MPI_CONFIG_H_ */
|
||||
|
||||
|
||||
@@ -1,289 +0,0 @@
|
||||
/*
|
||||
* mpi-priv.h - Private header file for MPI
|
||||
* Arbitrary precision integer arithmetic library
|
||||
*
|
||||
* NOTE WELL: the content of this header file is NOT part of the "public"
|
||||
* API for the MPI library, and may change at any time.
|
||||
* Application programs that use libmpi should NOT include this header file.
|
||||
*
|
||||
* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Michael J. Fromberger.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Netscape Communications Corporation
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: mpi-priv.h,v 1.18 2005-02-25 04:30:11 julien.pierre.bugs%sun.com Exp $ */
|
||||
#ifndef _MPI_PRIV_H_
|
||||
#define _MPI_PRIV_H_ 1
|
||||
|
||||
#include "mpi.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if MP_DEBUG
|
||||
#include <stdio.h>
|
||||
|
||||
#define DIAG(T,V) {fprintf(stderr,T);mp_print(V,stderr);fputc('\n',stderr);}
|
||||
#else
|
||||
#define DIAG(T,V)
|
||||
#endif
|
||||
|
||||
/* If we aren't using a wired-in logarithm table, we need to include
|
||||
the math library to get the log() function
|
||||
*/
|
||||
|
||||
/* {{{ s_logv_2[] - log table for 2 in various bases */
|
||||
|
||||
#if MP_LOGTAB
|
||||
/*
|
||||
A table of the logs of 2 for various bases (the 0 and 1 entries of
|
||||
this table are meaningless and should not be referenced).
|
||||
|
||||
This table is used to compute output lengths for the mp_toradix()
|
||||
function. Since a number n in radix r takes up about log_r(n)
|
||||
digits, we estimate the output size by taking the least integer
|
||||
greater than log_r(n), where:
|
||||
|
||||
log_r(n) = log_2(n) * log_r(2)
|
||||
|
||||
This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
|
||||
which are the output bases supported.
|
||||
*/
|
||||
|
||||
extern const float s_logv_2[];
|
||||
#define LOG_V_2(R) s_logv_2[(R)]
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
If MP_LOGTAB is not defined, use the math library to compute the
|
||||
logarithms on the fly. Otherwise, use the table.
|
||||
Pick which works best for your system.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#define LOG_V_2(R) (log(2.0)/log(R))
|
||||
|
||||
#endif /* if MP_LOGTAB */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ Digit arithmetic macros */
|
||||
|
||||
/*
|
||||
When adding and multiplying digits, the results can be larger than
|
||||
can be contained in an mp_digit. Thus, an mp_word is used. These
|
||||
macros mask off the upper and lower digits of the mp_word (the
|
||||
mp_word may be more than 2 mp_digits wide, but we only concern
|
||||
ourselves with the low-order 2 mp_digits)
|
||||
*/
|
||||
|
||||
#define CARRYOUT(W) (mp_digit)((W)>>DIGIT_BIT)
|
||||
#define ACCUM(W) (mp_digit)(W)
|
||||
|
||||
#define MP_MIN(a,b) (((a) < (b)) ? (a) : (b))
|
||||
#define MP_MAX(a,b) (((a) > (b)) ? (a) : (b))
|
||||
#define MP_HOWMANY(a,b) (((a) + (b) - 1)/(b))
|
||||
#define MP_ROUNDUP(a,b) (MP_HOWMANY(a,b) * (b))
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ Comparison constants */
|
||||
|
||||
#define MP_LT -1
|
||||
#define MP_EQ 0
|
||||
#define MP_GT 1
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ private function declarations */
|
||||
|
||||
/*
|
||||
If MP_MACRO is false, these will be defined as actual functions;
|
||||
otherwise, suitable macro definitions will be used. This works
|
||||
around the fact that ANSI C89 doesn't support an 'inline' keyword
|
||||
(although I hear C9x will ... about bloody time). At present, the
|
||||
macro definitions are identical to the function bodies, but they'll
|
||||
expand in place, instead of generating a function call.
|
||||
|
||||
I chose these particular functions to be made into macros because
|
||||
some profiling showed they are called a lot on a typical workload,
|
||||
and yet they are primarily housekeeping.
|
||||
*/
|
||||
#if MP_MACRO == 0
|
||||
void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits */
|
||||
void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count); /* copy */
|
||||
void *s_mp_alloc(size_t nb, size_t ni); /* general allocator */
|
||||
void s_mp_free(void *ptr); /* general free function */
|
||||
extern unsigned long mp_allocs;
|
||||
extern unsigned long mp_frees;
|
||||
extern unsigned long mp_copies;
|
||||
#else
|
||||
|
||||
/* Even if these are defined as macros, we need to respect the settings
|
||||
of the MP_MEMSET and MP_MEMCPY configuration options...
|
||||
*/
|
||||
#if MP_MEMSET == 0
|
||||
#define s_mp_setz(dp, count) \
|
||||
{int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=0;}
|
||||
#else
|
||||
#define s_mp_setz(dp, count) memset(dp, 0, (count) * sizeof(mp_digit))
|
||||
#endif /* MP_MEMSET */
|
||||
|
||||
#if MP_MEMCPY == 0
|
||||
#define s_mp_copy(sp, dp, count) \
|
||||
{int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=(sp)[ix];}
|
||||
#else
|
||||
#define s_mp_copy(sp, dp, count) memcpy(dp, sp, (count) * sizeof(mp_digit))
|
||||
#endif /* MP_MEMCPY */
|
||||
|
||||
#define s_mp_alloc(nb, ni) calloc(nb, ni)
|
||||
#define s_mp_free(ptr) {if(ptr) free(ptr);}
|
||||
#endif /* MP_MACRO */
|
||||
|
||||
mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */
|
||||
mp_err s_mp_pad(mp_int *mp, mp_size min); /* left pad with zeroes */
|
||||
|
||||
#if MP_MACRO == 0
|
||||
void s_mp_clamp(mp_int *mp); /* clip leading zeroes */
|
||||
#else
|
||||
#define s_mp_clamp(mp)\
|
||||
{ mp_size used = MP_USED(mp); \
|
||||
while (used > 1 && DIGIT(mp, used - 1) == 0) --used; \
|
||||
MP_USED(mp) = used; \
|
||||
}
|
||||
#endif /* MP_MACRO */
|
||||
|
||||
void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place */
|
||||
|
||||
mp_err s_mp_lshd(mp_int *mp, mp_size p); /* left-shift by p digits */
|
||||
void s_mp_rshd(mp_int *mp, mp_size p); /* right-shift by p digits */
|
||||
mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place */
|
||||
void s_mp_div_2d(mp_int *mp, mp_digit d); /* divide by 2^d in place */
|
||||
void s_mp_mod_2d(mp_int *mp, mp_digit d); /* modulo 2^d in place */
|
||||
void s_mp_div_2(mp_int *mp); /* divide by 2 in place */
|
||||
mp_err s_mp_mul_2(mp_int *mp); /* multiply by 2 in place */
|
||||
mp_err s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd);
|
||||
/* normalize for division */
|
||||
mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */
|
||||
mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */
|
||||
mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */
|
||||
mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r);
|
||||
/* unsigned digit divide */
|
||||
mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu);
|
||||
/* Barrett reduction */
|
||||
mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */
|
||||
mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c);
|
||||
mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */
|
||||
mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c);
|
||||
mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset);
|
||||
/* a += b * RADIX^offset */
|
||||
mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */
|
||||
#if MP_SQUARE
|
||||
mp_err s_mp_sqr(mp_int *a); /* magnitude square */
|
||||
#else
|
||||
#define s_mp_sqr(a) s_mp_mul(a, a)
|
||||
#endif
|
||||
mp_err s_mp_div(mp_int *rem, mp_int *div, mp_int *quot); /* magnitude div */
|
||||
mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
|
||||
mp_err s_mp_2expt(mp_int *a, mp_digit k); /* a = 2^k */
|
||||
int s_mp_cmp(const mp_int *a, const mp_int *b); /* magnitude comparison */
|
||||
int s_mp_cmp_d(const mp_int *a, mp_digit d); /* magnitude digit compare */
|
||||
int s_mp_ispow2(const mp_int *v); /* is v a power of 2? */
|
||||
int s_mp_ispow2d(mp_digit d); /* is d a power of 2? */
|
||||
|
||||
int s_mp_tovalue(char ch, int r); /* convert ch to value */
|
||||
char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */
|
||||
int s_mp_outlen(int bits, int r); /* output length in bytes */
|
||||
mp_digit s_mp_invmod_radix(mp_digit P); /* returns (P ** -1) mod RADIX */
|
||||
mp_err s_mp_invmod_odd_m( const mp_int *a, const mp_int *m, mp_int *c);
|
||||
mp_err s_mp_invmod_2d( const mp_int *a, mp_size k, mp_int *c);
|
||||
mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c);
|
||||
|
||||
/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */
|
||||
#if defined (__OS2__) && defined (__IBMC__)
|
||||
#define MPI_ASM_DECL __cdecl
|
||||
#else
|
||||
#define MPI_ASM_DECL
|
||||
#endif
|
||||
|
||||
#ifdef MPI_AMD64
|
||||
|
||||
mp_digit MPI_ASM_DECL s_mpv_mul_set_vec64(mp_digit*, mp_digit *, mp_size, mp_digit);
|
||||
mp_digit MPI_ASM_DECL s_mpv_mul_add_vec64(mp_digit*, const mp_digit*, mp_size, mp_digit);
|
||||
|
||||
/* c = a * b */
|
||||
#define s_mpv_mul_d(a, a_len, b, c) \
|
||||
((unsigned long*)c)[a_len] = s_mpv_mul_set_vec64(c, a, a_len, b)
|
||||
|
||||
/* c += a * b */
|
||||
#define s_mpv_mul_d_add(a, a_len, b, c) \
|
||||
((unsigned long*)c)[a_len] = s_mpv_mul_add_vec64(c, a, a_len, b)
|
||||
|
||||
#else
|
||||
|
||||
void MPI_ASM_DECL s_mpv_mul_d(const mp_digit *a, mp_size a_len,
|
||||
mp_digit b, mp_digit *c);
|
||||
void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
|
||||
mp_digit b, mp_digit *c);
|
||||
|
||||
#endif
|
||||
|
||||
void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
|
||||
mp_size a_len, mp_digit b,
|
||||
mp_digit *c);
|
||||
void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
|
||||
mp_size a_len,
|
||||
mp_digit *sqrs);
|
||||
|
||||
mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo,
|
||||
mp_digit divisor, mp_digit *quot, mp_digit *rem);
|
||||
|
||||
/* c += a * b * (MP_RADIX ** offset); */
|
||||
#define s_mp_mul_d_add_offset(a, b, c, off) \
|
||||
(s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off), MP_OKAY)
|
||||
|
||||
typedef struct {
|
||||
mp_int N; /* modulus N */
|
||||
mp_digit n0prime; /* n0' = - (n0 ** -1) mod MP_RADIX */
|
||||
mp_size b; /* R == 2 ** b, also b = # significant bits in N */
|
||||
} mp_mont_modulus;
|
||||
|
||||
mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
|
||||
mp_mont_modulus *mmm);
|
||||
mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm);
|
||||
|
||||
/* }}} */
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,349 +0,0 @@
|
||||
/*
|
||||
* mpi.h
|
||||
*
|
||||
* Arbitrary precision integer arithmetic library
|
||||
*
|
||||
* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Michael J. Fromberger.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Netscape Communications Corporation
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: mpi.h,v 1.22.8.1 2005-06-09 20:44:58 relyea%netscape.com Exp $ */
|
||||
|
||||
#ifndef _H_MPI_
|
||||
#define _H_MPI_
|
||||
|
||||
#include "mpi-config.h"
|
||||
|
||||
#if MP_DEBUG
|
||||
#undef MP_IOFUNC
|
||||
#define MP_IOFUNC 1
|
||||
#endif
|
||||
|
||||
#if MP_IOFUNC
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#if defined(BSDI)
|
||||
#undef ULLONG_MAX
|
||||
#endif
|
||||
|
||||
#if defined( macintosh )
|
||||
#include <Types.h>
|
||||
#elif defined( _WIN32_WCE)
|
||||
/* #include <sys/types.h> What do we need here ?? */
|
||||
#else
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#define MP_NEG 1
|
||||
#define MP_ZPOS 0
|
||||
|
||||
#define MP_OKAY 0 /* no error, all is well */
|
||||
#define MP_YES 0 /* yes (boolean result) */
|
||||
#define MP_NO -1 /* no (boolean result) */
|
||||
#define MP_MEM -2 /* out of memory */
|
||||
#define MP_RANGE -3 /* argument out of range */
|
||||
#define MP_BADARG -4 /* invalid parameter */
|
||||
#define MP_UNDEF -5 /* answer is undefined */
|
||||
#define MP_LAST_CODE MP_UNDEF
|
||||
|
||||
typedef unsigned int mp_sign;
|
||||
typedef unsigned int mp_size;
|
||||
typedef int mp_err;
|
||||
|
||||
#define MP_32BIT_MAX 4294967295U
|
||||
|
||||
#if !defined(ULONG_MAX)
|
||||
#error "ULONG_MAX not defined"
|
||||
#elif !defined(UINT_MAX)
|
||||
#error "UINT_MAX not defined"
|
||||
#elif !defined(USHRT_MAX)
|
||||
#error "USHRT_MAX not defined"
|
||||
#endif
|
||||
|
||||
#if defined(ULONG_LONG_MAX) /* GCC, HPUX */
|
||||
#define MP_ULONG_LONG_MAX ULONG_LONG_MAX
|
||||
#elif defined(ULLONG_MAX) /* Solaris */
|
||||
#define MP_ULONG_LONG_MAX ULLONG_MAX
|
||||
/* MP_ULONG_LONG_MAX was defined to be ULLONG_MAX */
|
||||
#elif defined(ULONGLONG_MAX) /* IRIX, AIX */
|
||||
#define MP_ULONG_LONG_MAX ULONGLONG_MAX
|
||||
#endif
|
||||
|
||||
/* We only use unsigned long for mp_digit iff long is more than 32 bits. */
|
||||
#if !defined(MP_USE_UINT_DIGIT) && ULONG_MAX > MP_32BIT_MAX
|
||||
typedef unsigned long mp_digit;
|
||||
#define MP_DIGIT_MAX ULONG_MAX
|
||||
#define MP_DIGIT_FMT "%016lX" /* printf() format for 1 digit */
|
||||
#define MP_HALF_DIGIT_MAX UINT_MAX
|
||||
#undef MP_NO_MP_WORD
|
||||
#define MP_NO_MP_WORD 1
|
||||
#undef MP_USE_LONG_DIGIT
|
||||
#define MP_USE_LONG_DIGIT 1
|
||||
#undef MP_USE_LONG_LONG_DIGIT
|
||||
|
||||
#elif !defined(MP_USE_UINT_DIGIT) && defined(MP_ULONG_LONG_MAX)
|
||||
typedef unsigned long long mp_digit;
|
||||
#define MP_DIGIT_MAX MP_ULONG_LONG_MAX
|
||||
#define MP_DIGIT_FMT "%016llX" /* printf() format for 1 digit */
|
||||
#define MP_HALF_DIGIT_MAX UINT_MAX
|
||||
#undef MP_NO_MP_WORD
|
||||
#define MP_NO_MP_WORD 1
|
||||
#undef MP_USE_LONG_LONG_DIGIT
|
||||
#define MP_USE_LONG_LONG_DIGIT 1
|
||||
#undef MP_USE_LONG_DIGIT
|
||||
|
||||
#else
|
||||
typedef unsigned int mp_digit;
|
||||
#define MP_DIGIT_MAX UINT_MAX
|
||||
#define MP_DIGIT_FMT "%08X" /* printf() format for 1 digit */
|
||||
#define MP_HALF_DIGIT_MAX USHRT_MAX
|
||||
#undef MP_USE_UINT_DIGIT
|
||||
#define MP_USE_UINT_DIGIT 1
|
||||
#undef MP_USE_LONG_LONG_DIGIT
|
||||
#undef MP_USE_LONG_DIGIT
|
||||
#endif
|
||||
|
||||
#if !defined(MP_NO_MP_WORD)
|
||||
#if defined(MP_USE_UINT_DIGIT) && \
|
||||
(defined(MP_ULONG_LONG_MAX) || (ULONG_MAX > UINT_MAX))
|
||||
|
||||
#if (ULONG_MAX > UINT_MAX)
|
||||
typedef unsigned long mp_word;
|
||||
typedef long mp_sword;
|
||||
#define MP_WORD_MAX ULONG_MAX
|
||||
|
||||
#else
|
||||
typedef unsigned long long mp_word;
|
||||
typedef long long mp_sword;
|
||||
#define MP_WORD_MAX MP_ULONG_LONG_MAX
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define MP_NO_MP_WORD 1
|
||||
#endif
|
||||
#endif /* !defined(MP_NO_MP_WORD) */
|
||||
|
||||
#if !defined(MP_WORD_MAX) && defined(MP_DEFINE_SMALL_WORD)
|
||||
typedef unsigned int mp_word;
|
||||
typedef int mp_sword;
|
||||
#define MP_WORD_MAX UINT_MAX
|
||||
#endif
|
||||
|
||||
#define MP_DIGIT_BIT (CHAR_BIT*sizeof(mp_digit))
|
||||
#define MP_WORD_BIT (CHAR_BIT*sizeof(mp_word))
|
||||
#define MP_RADIX (1+(mp_word)MP_DIGIT_MAX)
|
||||
|
||||
#define MP_HALF_DIGIT_BIT (MP_DIGIT_BIT/2)
|
||||
#define MP_HALF_RADIX (1+(mp_digit)MP_HALF_DIGIT_MAX)
|
||||
/* MP_HALF_RADIX really ought to be called MP_SQRT_RADIX, but it's named
|
||||
** MP_HALF_RADIX because it's the radix for MP_HALF_DIGITs, and it's
|
||||
** consistent with the other _HALF_ names.
|
||||
*/
|
||||
|
||||
|
||||
/* Macros for accessing the mp_int internals */
|
||||
#define MP_SIGN(MP) ((MP)->sign)
|
||||
#define MP_USED(MP) ((MP)->used)
|
||||
#define MP_ALLOC(MP) ((MP)->alloc)
|
||||
#define MP_DIGITS(MP) ((MP)->dp)
|
||||
#define MP_DIGIT(MP,N) (MP)->dp[(N)]
|
||||
|
||||
/* This defines the maximum I/O base (minimum is 2) */
|
||||
#define MP_MAX_RADIX 64
|
||||
|
||||
typedef struct {
|
||||
mp_sign sign; /* sign of this quantity */
|
||||
mp_size alloc; /* how many digits allocated */
|
||||
mp_size used; /* how many digits used */
|
||||
mp_digit *dp; /* the digits themselves */
|
||||
} mp_int;
|
||||
|
||||
/* Default precision */
|
||||
mp_size mp_get_prec(void);
|
||||
void mp_set_prec(mp_size prec);
|
||||
|
||||
/* Memory management */
|
||||
mp_err mp_init(mp_int *mp);
|
||||
mp_err mp_init_size(mp_int *mp, mp_size prec);
|
||||
mp_err mp_init_copy(mp_int *mp, const mp_int *from);
|
||||
mp_err mp_copy(const mp_int *from, mp_int *to);
|
||||
void mp_exch(mp_int *mp1, mp_int *mp2);
|
||||
void mp_clear(mp_int *mp);
|
||||
void mp_zero(mp_int *mp);
|
||||
void mp_set(mp_int *mp, mp_digit d);
|
||||
mp_err mp_set_int(mp_int *mp, long z);
|
||||
#define mp_set_long(mp,z) mp_set_int(mp,z)
|
||||
mp_err mp_set_ulong(mp_int *mp, unsigned long z);
|
||||
|
||||
/* Single digit arithmetic */
|
||||
mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b);
|
||||
mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b);
|
||||
mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b);
|
||||
mp_err mp_mul_2(const mp_int *a, mp_int *c);
|
||||
mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r);
|
||||
mp_err mp_div_2(const mp_int *a, mp_int *c);
|
||||
mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c);
|
||||
|
||||
/* Sign manipulations */
|
||||
mp_err mp_abs(const mp_int *a, mp_int *b);
|
||||
mp_err mp_neg(const mp_int *a, mp_int *b);
|
||||
|
||||
/* Full arithmetic */
|
||||
mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c);
|
||||
mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
|
||||
mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
|
||||
#if MP_SQUARE
|
||||
mp_err mp_sqr(const mp_int *a, mp_int *b);
|
||||
#else
|
||||
#define mp_sqr(a, b) mp_mul(a, a, b)
|
||||
#endif
|
||||
mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r);
|
||||
mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r);
|
||||
mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c);
|
||||
mp_err mp_2expt(mp_int *a, mp_digit k);
|
||||
mp_err mp_sqrt(const mp_int *a, mp_int *b);
|
||||
|
||||
/* Modular arithmetic */
|
||||
#if MP_MODARITH
|
||||
mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c);
|
||||
mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c);
|
||||
mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
|
||||
mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
|
||||
mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
|
||||
#if MP_SQUARE
|
||||
mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c);
|
||||
#else
|
||||
#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c)
|
||||
#endif
|
||||
mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
|
||||
mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c);
|
||||
#endif /* MP_MODARITH */
|
||||
|
||||
/* Comparisons */
|
||||
int mp_cmp_z(const mp_int *a);
|
||||
int mp_cmp_d(const mp_int *a, mp_digit d);
|
||||
int mp_cmp(const mp_int *a, const mp_int *b);
|
||||
int mp_cmp_mag(mp_int *a, mp_int *b);
|
||||
int mp_cmp_int(const mp_int *a, long z);
|
||||
int mp_isodd(const mp_int *a);
|
||||
int mp_iseven(const mp_int *a);
|
||||
|
||||
/* Number theoretic */
|
||||
#if MP_NUMTH
|
||||
mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
|
||||
mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c);
|
||||
mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y);
|
||||
mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c);
|
||||
mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c);
|
||||
#endif /* end MP_NUMTH */
|
||||
|
||||
/* Input and output */
|
||||
#if MP_IOFUNC
|
||||
void mp_print(mp_int *mp, FILE *ofp);
|
||||
#endif /* end MP_IOFUNC */
|
||||
|
||||
/* Base conversion */
|
||||
mp_err mp_read_raw(mp_int *mp, char *str, int len);
|
||||
int mp_raw_size(mp_int *mp);
|
||||
mp_err mp_toraw(mp_int *mp, char *str);
|
||||
mp_err mp_read_radix(mp_int *mp, const char *str, int radix);
|
||||
mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix);
|
||||
int mp_radix_size(mp_int *mp, int radix);
|
||||
mp_err mp_toradix(mp_int *mp, char *str, int radix);
|
||||
int mp_tovalue(char ch, int r);
|
||||
|
||||
#define mp_tobinary(M, S) mp_toradix((M), (S), 2)
|
||||
#define mp_tooctal(M, S) mp_toradix((M), (S), 8)
|
||||
#define mp_todecimal(M, S) mp_toradix((M), (S), 10)
|
||||
#define mp_tohex(M, S) mp_toradix((M), (S), 16)
|
||||
|
||||
/* Error strings */
|
||||
const char *mp_strerror(mp_err ec);
|
||||
|
||||
/* Octet string conversion functions */
|
||||
mp_err mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len);
|
||||
int mp_unsigned_octet_size(const mp_int *mp);
|
||||
mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
|
||||
mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
|
||||
mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len);
|
||||
|
||||
/* Miscellaneous */
|
||||
mp_size mp_trailing_zeros(const mp_int *mp);
|
||||
|
||||
#define MP_CHECKOK(x) if (MP_OKAY > (res = (x))) goto CLEANUP
|
||||
#define MP_CHECKERR(x) if (MP_OKAY > (res = (x))) goto CLEANUP
|
||||
|
||||
#if defined(MP_API_COMPATIBLE)
|
||||
#define NEG MP_NEG
|
||||
#define ZPOS MP_ZPOS
|
||||
#define DIGIT_MAX MP_DIGIT_MAX
|
||||
#define DIGIT_BIT MP_DIGIT_BIT
|
||||
#define DIGIT_FMT MP_DIGIT_FMT
|
||||
#define RADIX MP_RADIX
|
||||
#define MAX_RADIX MP_MAX_RADIX
|
||||
#define SIGN(MP) MP_SIGN(MP)
|
||||
#define USED(MP) MP_USED(MP)
|
||||
#define ALLOC(MP) MP_ALLOC(MP)
|
||||
#define DIGITS(MP) MP_DIGITS(MP)
|
||||
#define DIGIT(MP,N) MP_DIGIT(MP,N)
|
||||
|
||||
#if MP_ARGCHK == 1
|
||||
#define ARGCHK(X,Y) {if(!(X)){return (Y);}}
|
||||
#elif MP_ARGCHK == 2
|
||||
#include <assert.h>
|
||||
#define ARGCHK(X,Y) assert(X)
|
||||
#else
|
||||
#define ARGCHK(X,Y) /* */
|
||||
#endif
|
||||
#endif /* defined MP_API_COMPATIBLE */
|
||||
|
||||
/*
|
||||
* mpi_getProcessorLineSize() returns the size in bytes of the cache line
|
||||
* if a cache exists, or zero if there is no cache. If more than one
|
||||
* cache line exists, it should return the smallest line size (which is
|
||||
* usually the L1 cache).
|
||||
*
|
||||
* mp_modexp uses this information to make sure that private key information
|
||||
* isn't being leaked through the cache.
|
||||
*
|
||||
* see mpcpucache.c for the implementation.
|
||||
*/
|
||||
unsigned long mpi_getProcessorLineSize();
|
||||
|
||||
#endif /* end _H_MPI_ */
|
||||
@@ -1,65 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the Solaris software cryptographic token.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Sun Microsystems, Inc.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2005
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Sun Microsystems, Inc.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef MPI_AMD64
|
||||
#error This file only works on AMD64 platforms.
|
||||
#endif
|
||||
|
||||
#include <mpi-priv.h>
|
||||
|
||||
/*
|
||||
* MPI glue
|
||||
*
|
||||
*/
|
||||
|
||||
/* Presently, this is only used by the Montgomery arithmetic code. */
|
||||
/* c += a * b */
|
||||
void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
|
||||
mp_digit b, mp_digit *c)
|
||||
{
|
||||
mp_digit w;
|
||||
mp_digit d;
|
||||
|
||||
d = s_mpv_mul_add_vec64(c, a, a_len, b);
|
||||
c += a_len;
|
||||
while (d) {
|
||||
w = c[0] + d;
|
||||
d = (w < c[0] || w < d);
|
||||
*c++ = w;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,418 +0,0 @@
|
||||
# ***** BEGIN LICENSE BLOCK *****
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is the Solaris software cryptographic token.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Sun Microsystems, Inc.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2005
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Sun Microsystems, Inc.
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ***** END LICENSE BLOCK ***** */
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
#
|
||||
# Implementation of s_mpv_mul_set_vec which exploits
|
||||
# the 64X64->128 bit unsigned multiply instruction.
|
||||
#
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
# r = a * digit, r and a are vectors of length len
|
||||
# returns the carry digit
|
||||
# r and a are 64 bit aligned.
|
||||
#
|
||||
# uint64_t
|
||||
# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
|
||||
#
|
||||
|
||||
.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
|
||||
|
||||
xorq %rax, %rax # if (len == 0) return (0)
|
||||
testq %rdx, %rdx
|
||||
jz .L17
|
||||
|
||||
movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
|
||||
xorq %r9, %r9 # cy = 0
|
||||
|
||||
.L15:
|
||||
cmpq $8, %r8 # 8 - len
|
||||
jb .L16
|
||||
movq 0(%rsi), %rax # rax = a[0]
|
||||
movq 8(%rsi), %r11 # prefetch a[1]
|
||||
mulq %rcx # p = a[0] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 0(%rdi) # r[0] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 16(%rsi), %r11 # prefetch a[2]
|
||||
mulq %rcx # p = a[1] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 8(%rdi) # r[1] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 24(%rsi), %r11 # prefetch a[3]
|
||||
mulq %rcx # p = a[2] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 16(%rdi) # r[2] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 32(%rsi), %r11 # prefetch a[4]
|
||||
mulq %rcx # p = a[3] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 24(%rdi) # r[3] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 40(%rsi), %r11 # prefetch a[5]
|
||||
mulq %rcx # p = a[4] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 32(%rdi) # r[4] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 48(%rsi), %r11 # prefetch a[6]
|
||||
mulq %rcx # p = a[5] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 40(%rdi) # r[5] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 56(%rsi), %r11 # prefetch a[7]
|
||||
mulq %rcx # p = a[6] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 48(%rdi) # r[6] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
mulq %rcx # p = a[7] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 56(%rdi) # r[7] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
addq $64, %rsi
|
||||
addq $64, %rdi
|
||||
subq $8, %r8
|
||||
|
||||
jz .L17
|
||||
jmp .L15
|
||||
|
||||
.L16:
|
||||
movq 0(%rsi), %rax
|
||||
mulq %rcx # p = a[0] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 0(%rdi) # r[0] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 8(%rsi), %rax
|
||||
mulq %rcx # p = a[1] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 8(%rdi) # r[1] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 16(%rsi), %rax
|
||||
mulq %rcx # p = a[2] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 16(%rdi) # r[2] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 24(%rsi), %rax
|
||||
mulq %rcx # p = a[3] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 24(%rdi) # r[3] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 32(%rsi), %rax
|
||||
mulq %rcx # p = a[4] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 32(%rdi) # r[4] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 40(%rsi), %rax
|
||||
mulq %rcx # p = a[5] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 40(%rdi) # r[5] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 48(%rsi), %rax
|
||||
mulq %rcx # p = a[6] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 48(%rdi) # r[6] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
|
||||
.L17:
|
||||
movq %r9, %rax
|
||||
ret
|
||||
|
||||
.size s_mpv_mul_set_vec64, [.-s_mpv_mul_set_vec64]
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
#
|
||||
# Implementation of s_mpv_mul_add_vec which exploits
|
||||
# the 64X64->128 bit unsigned multiply instruction.
|
||||
#
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
# r += a * digit, r and a are vectors of length len
|
||||
# returns the carry digit
|
||||
# r and a are 64 bit aligned.
|
||||
#
|
||||
# uint64_t
|
||||
# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
|
||||
#
|
||||
|
||||
.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
|
||||
|
||||
xorq %rax, %rax # if (len == 0) return (0)
|
||||
testq %rdx, %rdx
|
||||
jz .L27
|
||||
|
||||
movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
|
||||
xorq %r9, %r9 # cy = 0
|
||||
|
||||
.L25:
|
||||
cmpq $8, %r8 # 8 - len
|
||||
jb .L26
|
||||
movq 0(%rsi), %rax # rax = a[0]
|
||||
movq 0(%rdi), %r10 # r10 = r[0]
|
||||
movq 8(%rsi), %r11 # prefetch a[1]
|
||||
mulq %rcx # p = a[0] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[0]
|
||||
movq 8(%rdi), %r10 # prefetch r[1]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 0(%rdi) # r[0] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 16(%rsi), %r11 # prefetch a[2]
|
||||
mulq %rcx # p = a[1] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[1]
|
||||
movq 16(%rdi), %r10 # prefetch r[2]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 8(%rdi) # r[1] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 24(%rsi), %r11 # prefetch a[3]
|
||||
mulq %rcx # p = a[2] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[2]
|
||||
movq 24(%rdi), %r10 # prefetch r[3]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 16(%rdi) # r[2] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 32(%rsi), %r11 # prefetch a[4]
|
||||
mulq %rcx # p = a[3] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[3]
|
||||
movq 32(%rdi), %r10 # prefetch r[4]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 24(%rdi) # r[3] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 40(%rsi), %r11 # prefetch a[5]
|
||||
mulq %rcx # p = a[4] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[4]
|
||||
movq 40(%rdi), %r10 # prefetch r[5]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 32(%rdi) # r[4] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 48(%rsi), %r11 # prefetch a[6]
|
||||
mulq %rcx # p = a[5] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[5]
|
||||
movq 48(%rdi), %r10 # prefetch r[6]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 40(%rdi) # r[5] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 56(%rsi), %r11 # prefetch a[7]
|
||||
mulq %rcx # p = a[6] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[6]
|
||||
movq 56(%rdi), %r10 # prefetch r[7]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 48(%rdi) # r[6] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
mulq %rcx # p = a[7] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[7]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 56(%rdi) # r[7] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
addq $64, %rsi
|
||||
addq $64, %rdi
|
||||
subq $8, %r8
|
||||
|
||||
jz .L27
|
||||
jmp .L25
|
||||
|
||||
.L26:
|
||||
movq 0(%rsi), %rax
|
||||
movq 0(%rdi), %r10
|
||||
mulq %rcx # p = a[0] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[0]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 0(%rdi) # r[0] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 8(%rsi), %rax
|
||||
movq 8(%rdi), %r10
|
||||
mulq %rcx # p = a[1] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[1]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 8(%rdi) # r[1] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 16(%rsi), %rax
|
||||
movq 16(%rdi), %r10
|
||||
mulq %rcx # p = a[2] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[2]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 16(%rdi) # r[2] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 24(%rsi), %rax
|
||||
movq 24(%rdi), %r10
|
||||
mulq %rcx # p = a[3] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[3]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 24(%rdi) # r[3] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 32(%rsi), %rax
|
||||
movq 32(%rdi), %r10
|
||||
mulq %rcx # p = a[4] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[4]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 32(%rdi) # r[4] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 40(%rsi), %rax
|
||||
movq 40(%rdi), %r10
|
||||
mulq %rcx # p = a[5] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[5]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 40(%rdi) # r[5] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 48(%rsi), %rax
|
||||
movq 48(%rdi), %r10
|
||||
mulq %rcx # p = a[6] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[6]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 48(%rdi) # r[6] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
|
||||
.L27:
|
||||
movq %r9, %rax
|
||||
ret
|
||||
|
||||
.size s_mpv_mul_add_vec64, [.-s_mpv_mul_add_vec64]
|
||||
@@ -1,418 +0,0 @@
|
||||
/ ***** BEGIN LICENSE BLOCK *****
|
||||
/ Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
/
|
||||
/ The contents of this file are subject to the Mozilla Public License Version
|
||||
/ 1.1 (the "License"); you may not use this file except in compliance with
|
||||
/ the License. You may obtain a copy of the License at
|
||||
/ http://www.mozilla.org/MPL/
|
||||
/
|
||||
/ Software distributed under the License is distributed on an "AS IS" basis,
|
||||
/ WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
/ for the specific language governing rights and limitations under the
|
||||
/ License.
|
||||
/
|
||||
/ The Original Code is the Solaris software cryptographic token.
|
||||
/
|
||||
/ The Initial Developer of the Original Code is
|
||||
/ Sun Microsystems, Inc.
|
||||
/ Portions created by the Initial Developer are Copyright (C) 2005
|
||||
/ the Initial Developer. All Rights Reserved.
|
||||
/
|
||||
/ Contributor(s):
|
||||
/ Sun Microsystems, Inc.
|
||||
/
|
||||
/ Alternatively, the contents of this file may be used under the terms of
|
||||
/ either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
/ the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
/ in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
/ of those above. If you wish to allow use of your version of this file only
|
||||
/ under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
/ use your version of this file under the terms of the MPL, indicate your
|
||||
/ decision by deleting the provisions above and replace them with the notice
|
||||
/ and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
/ the provisions above, a recipient may use your version of this file under
|
||||
/ the terms of any one of the MPL, the GPL or the LGPL.
|
||||
/
|
||||
/ ***** END LICENSE BLOCK ***** */
|
||||
|
||||
|
||||
/ ------------------------------------------------------------------------
|
||||
/
|
||||
/ Implementation of s_mpv_mul_set_vec which exploits
|
||||
/ the 64X64->128 bit unsigned multiply instruction.
|
||||
/
|
||||
/ ------------------------------------------------------------------------
|
||||
|
||||
/ r = a * digit, r and a are vectors of length len
|
||||
/ returns the carry digit
|
||||
/ r and a are 64 bit aligned.
|
||||
/
|
||||
/ uint64_t
|
||||
/ s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
|
||||
/
|
||||
|
||||
.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
|
||||
|
||||
xorq %rax, %rax / if (len == 0) return (0)
|
||||
testq %rdx, %rdx
|
||||
jz .L17
|
||||
|
||||
movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
|
||||
xorq %r9, %r9 / cy = 0
|
||||
|
||||
.L15:
|
||||
cmpq $8, %r8 / 8 - len
|
||||
jb .L16
|
||||
movq 0(%rsi), %rax / rax = a[0]
|
||||
movq 8(%rsi), %r11 / prefetch a[1]
|
||||
mulq %rcx / p = a[0] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 0(%rdi) / r[0] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 16(%rsi), %r11 / prefetch a[2]
|
||||
mulq %rcx / p = a[1] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 8(%rdi) / r[1] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 24(%rsi), %r11 / prefetch a[3]
|
||||
mulq %rcx / p = a[2] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 16(%rdi) / r[2] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 32(%rsi), %r11 / prefetch a[4]
|
||||
mulq %rcx / p = a[3] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 24(%rdi) / r[3] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 40(%rsi), %r11 / prefetch a[5]
|
||||
mulq %rcx / p = a[4] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 32(%rdi) / r[4] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 48(%rsi), %r11 / prefetch a[6]
|
||||
mulq %rcx / p = a[5] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 40(%rdi) / r[5] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 56(%rsi), %r11 / prefetch a[7]
|
||||
mulq %rcx / p = a[6] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 48(%rdi) / r[6] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
mulq %rcx / p = a[7] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 56(%rdi) / r[7] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
addq $64, %rsi
|
||||
addq $64, %rdi
|
||||
subq $8, %r8
|
||||
|
||||
jz .L17
|
||||
jmp .L15
|
||||
|
||||
.L16:
|
||||
movq 0(%rsi), %rax
|
||||
mulq %rcx / p = a[0] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 0(%rdi) / r[0] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 8(%rsi), %rax
|
||||
mulq %rcx / p = a[1] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 8(%rdi) / r[1] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 16(%rsi), %rax
|
||||
mulq %rcx / p = a[2] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 16(%rdi) / r[2] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 24(%rsi), %rax
|
||||
mulq %rcx / p = a[3] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 24(%rdi) / r[3] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 32(%rsi), %rax
|
||||
mulq %rcx / p = a[4] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 32(%rdi) / r[4] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 40(%rsi), %rax
|
||||
mulq %rcx / p = a[5] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 40(%rdi) / r[5] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 48(%rsi), %rax
|
||||
mulq %rcx / p = a[6] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 48(%rdi) / r[6] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
|
||||
.L17:
|
||||
movq %r9, %rax
|
||||
ret
|
||||
|
||||
.size s_mpv_mul_set_vec64, [.-s_mpv_mul_set_vec64]
|
||||
|
||||
/ ------------------------------------------------------------------------
|
||||
/
|
||||
/ Implementation of s_mpv_mul_add_vec which exploits
|
||||
/ the 64X64->128 bit unsigned multiply instruction.
|
||||
/
|
||||
/ ------------------------------------------------------------------------
|
||||
|
||||
/ r += a * digit, r and a are vectors of length len
|
||||
/ returns the carry digit
|
||||
/ r and a are 64 bit aligned.
|
||||
/
|
||||
/ uint64_t
|
||||
/ s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
|
||||
/
|
||||
|
||||
.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
|
||||
|
||||
xorq %rax, %rax / if (len == 0) return (0)
|
||||
testq %rdx, %rdx
|
||||
jz .L27
|
||||
|
||||
movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
|
||||
xorq %r9, %r9 / cy = 0
|
||||
|
||||
.L25:
|
||||
cmpq $8, %r8 / 8 - len
|
||||
jb .L26
|
||||
movq 0(%rsi), %rax / rax = a[0]
|
||||
movq 0(%rdi), %r10 / r10 = r[0]
|
||||
movq 8(%rsi), %r11 / prefetch a[1]
|
||||
mulq %rcx / p = a[0] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[0]
|
||||
movq 8(%rdi), %r10 / prefetch r[1]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 0(%rdi) / r[0] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 16(%rsi), %r11 / prefetch a[2]
|
||||
mulq %rcx / p = a[1] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[1]
|
||||
movq 16(%rdi), %r10 / prefetch r[2]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 8(%rdi) / r[1] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 24(%rsi), %r11 / prefetch a[3]
|
||||
mulq %rcx / p = a[2] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[2]
|
||||
movq 24(%rdi), %r10 / prefetch r[3]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 16(%rdi) / r[2] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 32(%rsi), %r11 / prefetch a[4]
|
||||
mulq %rcx / p = a[3] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[3]
|
||||
movq 32(%rdi), %r10 / prefetch r[4]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 24(%rdi) / r[3] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 40(%rsi), %r11 / prefetch a[5]
|
||||
mulq %rcx / p = a[4] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[4]
|
||||
movq 40(%rdi), %r10 / prefetch r[5]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 32(%rdi) / r[4] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 48(%rsi), %r11 / prefetch a[6]
|
||||
mulq %rcx / p = a[5] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[5]
|
||||
movq 48(%rdi), %r10 / prefetch r[6]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 40(%rdi) / r[5] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 56(%rsi), %r11 / prefetch a[7]
|
||||
mulq %rcx / p = a[6] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[6]
|
||||
movq 56(%rdi), %r10 / prefetch r[7]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 48(%rdi) / r[6] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
mulq %rcx / p = a[7] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[7]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 56(%rdi) / r[7] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
addq $64, %rsi
|
||||
addq $64, %rdi
|
||||
subq $8, %r8
|
||||
|
||||
jz .L27
|
||||
jmp .L25
|
||||
|
||||
.L26:
|
||||
movq 0(%rsi), %rax
|
||||
movq 0(%rdi), %r10
|
||||
mulq %rcx / p = a[0] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[0]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 0(%rdi) / r[0] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 8(%rsi), %rax
|
||||
movq 8(%rdi), %r10
|
||||
mulq %rcx / p = a[1] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[1]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 8(%rdi) / r[1] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 16(%rsi), %rax
|
||||
movq 16(%rdi), %r10
|
||||
mulq %rcx / p = a[2] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[2]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 16(%rdi) / r[2] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 24(%rsi), %rax
|
||||
movq 24(%rdi), %r10
|
||||
mulq %rcx / p = a[3] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[3]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 24(%rdi) / r[3] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 32(%rsi), %rax
|
||||
movq 32(%rdi), %r10
|
||||
mulq %rcx / p = a[4] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[4]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 32(%rdi) / r[4] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 40(%rsi), %rax
|
||||
movq 40(%rdi), %r10
|
||||
mulq %rcx / p = a[5] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[5]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 40(%rdi) / r[5] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 48(%rsi), %rax
|
||||
movq 48(%rdi), %r10
|
||||
mulq %rcx / p = a[6] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[6]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 48(%rdi) / r[6] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
|
||||
.L27:
|
||||
movq %r9, %rax
|
||||
ret
|
||||
|
||||
.size s_mpv_mul_add_vec64, [.-s_mpv_mul_add_vec64]
|
||||
@@ -1,115 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the Netscape security libraries.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Netscape Communications Corporation.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2000
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: mpi_hp.c,v 1.5 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
|
||||
|
||||
/* This file contains routines that perform vector multiplication. */
|
||||
|
||||
#include "mpi-priv.h"
|
||||
#include <unistd.h>
|
||||
|
||||
#include <stddef.h>
|
||||
/* #include <sys/systeminfo.h> */
|
||||
#include <strings.h>
|
||||
|
||||
extern void multacc512(
|
||||
int length, /* doublewords in multiplicand vector. */
|
||||
const mp_digit *scalaraddr, /* Address of scalar. */
|
||||
const mp_digit *multiplicand, /* The multiplicand vector. */
|
||||
mp_digit * result); /* Where to accumulate the result. */
|
||||
|
||||
extern void maxpy_little(
|
||||
int length, /* doublewords in multiplicand vector. */
|
||||
const mp_digit *scalaraddr, /* Address of scalar. */
|
||||
const mp_digit *multiplicand, /* The multiplicand vector. */
|
||||
mp_digit * result); /* Where to accumulate the result. */
|
||||
|
||||
extern void add_diag_little(
|
||||
int length, /* doublewords in input vector. */
|
||||
const mp_digit *root, /* The vector to square. */
|
||||
mp_digit * result); /* Where to accumulate the result. */
|
||||
|
||||
void
|
||||
s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
|
||||
{
|
||||
add_diag_little(a_len, pa, ps);
|
||||
}
|
||||
|
||||
#define MAX_STACK_DIGITS 258
|
||||
#define MULTACC512_LEN (512 / MP_DIGIT_BIT)
|
||||
#define HP_MPY_ADD_FN (a_len == MULTACC512_LEN ? multacc512 : maxpy_little)
|
||||
|
||||
/* c = a * b */
|
||||
void
|
||||
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
mp_digit x[MAX_STACK_DIGITS];
|
||||
mp_digit *px = x;
|
||||
size_t xSize = 0;
|
||||
|
||||
if (a == c) {
|
||||
if (a_len > MAX_STACK_DIGITS) {
|
||||
xSize = sizeof(mp_digit) * (a_len + 2);
|
||||
px = malloc(xSize);
|
||||
if (!px)
|
||||
return;
|
||||
}
|
||||
memcpy(px, a, a_len * sizeof(*a));
|
||||
a = px;
|
||||
}
|
||||
s_mp_setz(c, a_len + 1);
|
||||
HP_MPY_ADD_FN(a_len, &b, a, c);
|
||||
if (px != x && px) {
|
||||
memset(px, 0, xSize);
|
||||
free(px);
|
||||
}
|
||||
}
|
||||
|
||||
/* c += a * b, where a is a_len words long. */
|
||||
void
|
||||
s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
c[a_len] = 0; /* so carry propagation stops here. */
|
||||
HP_MPY_ADD_FN(a_len, &b, a, c);
|
||||
}
|
||||
|
||||
/* c += a * b, where a is y words long. */
|
||||
void
|
||||
s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
|
||||
mp_digit *c)
|
||||
{
|
||||
HP_MPY_ADD_FN(a_len, &b, a, c);
|
||||
}
|
||||
|
||||
@@ -1,342 +0,0 @@
|
||||
/
|
||||
/ The contents of this file are subject to the Mozilla Public
|
||||
/ License Version 1.1 (the "License"); you may not use this file
|
||||
/ except in compliance with the License. You may obtain a copy of
|
||||
/ the License at http://www.mozilla.org/MPL/
|
||||
/
|
||||
/ Software distributed under the License is distributed on an "AS
|
||||
/ IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
/ implied. See the License for the specific language governing
|
||||
/ rights and limitations under the License.
|
||||
/
|
||||
/ The Original Code is the Netscape security libraries.
|
||||
/
|
||||
/ The Initial Developer of the Original Code is Netscape
|
||||
/ Communications Corporation. Portions created by Netscape are
|
||||
/ Copyright (C) 2001 Netscape Communications Corporation. All
|
||||
/ Rights Reserved.
|
||||
/
|
||||
/ Contributor(s):
|
||||
/
|
||||
/ Alternatively, the contents of this file may be used under the
|
||||
/ terms of the GNU General Public License Version 2 or later (the
|
||||
/ "GPL"), in which case the provisions of the GPL are applicable
|
||||
/ instead of those above. If you wish to allow use of your
|
||||
/ version of this file only under the terms of the GPL and not to
|
||||
/ allow others to use your version of this file under the MPL,
|
||||
/ indicate your decision by deleting the provisions above and
|
||||
/ replace them with the notice and other provisions required by
|
||||
/ the GPL. If you do not delete the provisions above, a recipient
|
||||
/ may use your version of this file under either the MPL or the
|
||||
/ GPL.
|
||||
/ $Id: mpi_i86pc.s,v 1.1 2001-04-27 20:47:39 nelsonb%netscape.com Exp $
|
||||
/
|
||||
|
||||
.text
|
||||
|
||||
/ ebp - 36: caller's esi
|
||||
/ ebp - 32: caller's edi
|
||||
/ ebp - 28:
|
||||
/ ebp - 24:
|
||||
/ ebp - 20:
|
||||
/ ebp - 16:
|
||||
/ ebp - 12:
|
||||
/ ebp - 8:
|
||||
/ ebp - 4:
|
||||
/ ebp + 0: caller's ebp
|
||||
/ ebp + 4: return address
|
||||
/ ebp + 8: a argument
|
||||
/ ebp + 12: a_len argument
|
||||
/ ebp + 16: b argument
|
||||
/ ebp + 20: c argument
|
||||
/ registers:
|
||||
/ eax:
|
||||
/ ebx: carry
|
||||
/ ecx: a_len
|
||||
/ edx:
|
||||
/ esi: a ptr
|
||||
/ edi: c ptr
|
||||
.globl s_mpv_mul_d
|
||||
.type s_mpv_mul_d,@function
|
||||
s_mpv_mul_d:
|
||||
push %ebp
|
||||
mov %esp,%ebp
|
||||
sub $28,%esp
|
||||
push %edi
|
||||
push %esi
|
||||
push %ebx
|
||||
movl $0,%ebx / carry = 0
|
||||
mov 12(%ebp),%ecx / ecx = a_len
|
||||
mov 20(%ebp),%edi
|
||||
cmp $0,%ecx
|
||||
je L2 / jmp if a_len == 0
|
||||
mov 8(%ebp),%esi / esi = a
|
||||
cld
|
||||
L1:
|
||||
lodsl / eax = [ds:esi]; esi += 4
|
||||
mov 16(%ebp),%edx / edx = b
|
||||
mull %edx / edx:eax = Phi:Plo = a_i * b
|
||||
|
||||
add %ebx,%eax / add carry (%ebx) to edx:eax
|
||||
adc $0,%edx
|
||||
mov %edx,%ebx / high half of product becomes next carry
|
||||
|
||||
stosl / [es:edi] = ax; edi += 4;
|
||||
dec %ecx / --a_len
|
||||
jnz L1 / jmp if a_len != 0
|
||||
L2:
|
||||
mov %ebx,0(%edi) / *c = carry
|
||||
pop %ebx
|
||||
pop %esi
|
||||
pop %edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
|
||||
/ ebp - 36: caller's esi
|
||||
/ ebp - 32: caller's edi
|
||||
/ ebp - 28:
|
||||
/ ebp - 24:
|
||||
/ ebp - 20:
|
||||
/ ebp - 16:
|
||||
/ ebp - 12:
|
||||
/ ebp - 8:
|
||||
/ ebp - 4:
|
||||
/ ebp + 0: caller's ebp
|
||||
/ ebp + 4: return address
|
||||
/ ebp + 8: a argument
|
||||
/ ebp + 12: a_len argument
|
||||
/ ebp + 16: b argument
|
||||
/ ebp + 20: c argument
|
||||
/ registers:
|
||||
/ eax:
|
||||
/ ebx: carry
|
||||
/ ecx: a_len
|
||||
/ edx:
|
||||
/ esi: a ptr
|
||||
/ edi: c ptr
|
||||
.globl s_mpv_mul_d_add
|
||||
.type s_mpv_mul_d_add,@function
|
||||
s_mpv_mul_d_add:
|
||||
push %ebp
|
||||
mov %esp,%ebp
|
||||
sub $28,%esp
|
||||
push %edi
|
||||
push %esi
|
||||
push %ebx
|
||||
movl $0,%ebx / carry = 0
|
||||
mov 12(%ebp),%ecx / ecx = a_len
|
||||
mov 20(%ebp),%edi
|
||||
cmp $0,%ecx
|
||||
je L4 / jmp if a_len == 0
|
||||
mov 8(%ebp),%esi / esi = a
|
||||
cld
|
||||
L3:
|
||||
lodsl / eax = [ds:esi]; esi += 4
|
||||
mov 16(%ebp),%edx / edx = b
|
||||
mull %edx / edx:eax = Phi:Plo = a_i * b
|
||||
|
||||
add %ebx,%eax / add carry (%ebx) to edx:eax
|
||||
adc $0,%edx
|
||||
mov 0(%edi),%ebx / add in current word from *c
|
||||
add %ebx,%eax
|
||||
adc $0,%edx
|
||||
mov %edx,%ebx / high half of product becomes next carry
|
||||
|
||||
stosl / [es:edi] = ax; edi += 4;
|
||||
dec %ecx / --a_len
|
||||
jnz L3 / jmp if a_len != 0
|
||||
L4:
|
||||
mov %ebx,0(%edi) / *c = carry
|
||||
pop %ebx
|
||||
pop %esi
|
||||
pop %edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
|
||||
/ ebp - 36: caller's esi
|
||||
/ ebp - 32: caller's edi
|
||||
/ ebp - 28:
|
||||
/ ebp - 24:
|
||||
/ ebp - 20:
|
||||
/ ebp - 16:
|
||||
/ ebp - 12:
|
||||
/ ebp - 8:
|
||||
/ ebp - 4:
|
||||
/ ebp + 0: caller's ebp
|
||||
/ ebp + 4: return address
|
||||
/ ebp + 8: a argument
|
||||
/ ebp + 12: a_len argument
|
||||
/ ebp + 16: b argument
|
||||
/ ebp + 20: c argument
|
||||
/ registers:
|
||||
/ eax:
|
||||
/ ebx: carry
|
||||
/ ecx: a_len
|
||||
/ edx:
|
||||
/ esi: a ptr
|
||||
/ edi: c ptr
|
||||
.globl s_mpv_mul_d_add_prop
|
||||
.type s_mpv_mul_d_add_prop,@function
|
||||
s_mpv_mul_d_add_prop:
|
||||
push %ebp
|
||||
mov %esp,%ebp
|
||||
sub $28,%esp
|
||||
push %edi
|
||||
push %esi
|
||||
push %ebx
|
||||
movl $0,%ebx / carry = 0
|
||||
mov 12(%ebp),%ecx / ecx = a_len
|
||||
mov 20(%ebp),%edi
|
||||
cmp $0,%ecx
|
||||
je L6 / jmp if a_len == 0
|
||||
cld
|
||||
mov 8(%ebp),%esi / esi = a
|
||||
L5:
|
||||
lodsl / eax = [ds:esi]; esi += 4
|
||||
mov 16(%ebp),%edx / edx = b
|
||||
mull %edx / edx:eax = Phi:Plo = a_i * b
|
||||
|
||||
add %ebx,%eax / add carry (%ebx) to edx:eax
|
||||
adc $0,%edx
|
||||
mov 0(%edi),%ebx / add in current word from *c
|
||||
add %ebx,%eax
|
||||
adc $0,%edx
|
||||
mov %edx,%ebx / high half of product becomes next carry
|
||||
|
||||
stosl / [es:edi] = ax; edi += 4;
|
||||
dec %ecx / --a_len
|
||||
jnz L5 / jmp if a_len != 0
|
||||
L6:
|
||||
cmp $0,%ebx / is carry zero?
|
||||
jz L8
|
||||
mov 0(%edi),%eax / add in current word from *c
|
||||
add %ebx,%eax
|
||||
stosl / [es:edi] = ax; edi += 4;
|
||||
jnc L8
|
||||
L7:
|
||||
mov 0(%edi),%eax / add in current word from *c
|
||||
adc $0,%eax
|
||||
stosl / [es:edi] = ax; edi += 4;
|
||||
jc L7
|
||||
L8:
|
||||
pop %ebx
|
||||
pop %esi
|
||||
pop %edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
|
||||
/ ebp - 20: caller's esi
|
||||
/ ebp - 16: caller's edi
|
||||
/ ebp - 12:
|
||||
/ ebp - 8: carry
|
||||
/ ebp - 4: a_len local
|
||||
/ ebp + 0: caller's ebp
|
||||
/ ebp + 4: return address
|
||||
/ ebp + 8: pa argument
|
||||
/ ebp + 12: a_len argument
|
||||
/ ebp + 16: ps argument
|
||||
/ ebp + 20:
|
||||
/ registers:
|
||||
/ eax:
|
||||
/ ebx: carry
|
||||
/ ecx: a_len
|
||||
/ edx:
|
||||
/ esi: a ptr
|
||||
/ edi: c ptr
|
||||
|
||||
.globl s_mpv_sqr_add_prop
|
||||
.type s_mpv_sqr_add_prop,@function
|
||||
s_mpv_sqr_add_prop:
|
||||
push %ebp
|
||||
mov %esp,%ebp
|
||||
sub $12,%esp
|
||||
push %edi
|
||||
push %esi
|
||||
push %ebx
|
||||
movl $0,%ebx / carry = 0
|
||||
mov 12(%ebp),%ecx / a_len
|
||||
mov 16(%ebp),%edi / edi = ps
|
||||
cmp $0,%ecx
|
||||
je L11 / jump if a_len == 0
|
||||
cld
|
||||
mov 8(%ebp),%esi / esi = pa
|
||||
L10:
|
||||
lodsl / %eax = [ds:si]; si += 4;
|
||||
mull %eax
|
||||
|
||||
add %ebx,%eax / add "carry"
|
||||
adc $0,%edx
|
||||
mov 0(%edi),%ebx
|
||||
add %ebx,%eax / add low word from result
|
||||
mov 4(%edi),%ebx
|
||||
stosl / [es:di] = %eax; di += 4;
|
||||
adc %ebx,%edx / add high word from result
|
||||
movl $0,%ebx
|
||||
mov %edx,%eax
|
||||
adc $0,%ebx
|
||||
stosl / [es:di] = %eax; di += 4;
|
||||
dec %ecx / --a_len
|
||||
jnz L10 / jmp if a_len != 0
|
||||
L11:
|
||||
cmp $0,%ebx / is carry zero?
|
||||
jz L14
|
||||
mov 0(%edi),%eax / add in current word from *c
|
||||
add %ebx,%eax
|
||||
stosl / [es:edi] = ax; edi += 4;
|
||||
jnc L14
|
||||
L12:
|
||||
mov 0(%edi),%eax / add in current word from *c
|
||||
adc $0,%eax
|
||||
stosl / [es:edi] = ax; edi += 4;
|
||||
jc L12
|
||||
L14:
|
||||
pop %ebx
|
||||
pop %esi
|
||||
pop %edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
|
||||
/
|
||||
/ Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
|
||||
/ so its high bit is 1. This code is from NSPR.
|
||||
/
|
||||
/ mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
|
||||
/ mp_digit *qp, mp_digit *rp)
|
||||
|
||||
/ esp + 0: Caller's ebx
|
||||
/ esp + 4: return address
|
||||
/ esp + 8: Nhi argument
|
||||
/ esp + 12: Nlo argument
|
||||
/ esp + 16: divisor argument
|
||||
/ esp + 20: qp argument
|
||||
/ esp + 24: rp argument
|
||||
/ registers:
|
||||
/ eax:
|
||||
/ ebx: carry
|
||||
/ ecx: a_len
|
||||
/ edx:
|
||||
/ esi: a ptr
|
||||
/ edi: c ptr
|
||||
/
|
||||
|
||||
.globl s_mpv_div_2dx1d
|
||||
.type s_mpv_div_2dx1d,@function
|
||||
s_mpv_div_2dx1d:
|
||||
push %ebx
|
||||
mov 8(%esp),%edx
|
||||
mov 12(%esp),%eax
|
||||
mov 16(%esp),%ebx
|
||||
div %ebx
|
||||
mov 20(%esp),%ebx
|
||||
mov %eax,0(%ebx)
|
||||
mov 24(%esp),%ebx
|
||||
mov %edx,0(%ebx)
|
||||
xor %eax,%eax / return zero
|
||||
pop %ebx
|
||||
ret
|
||||
nop
|
||||
|
||||
@@ -1,502 +0,0 @@
|
||||
/*
|
||||
* The contents of this file are subject to the Mozilla Public
|
||||
* License Version 1.1 (the "License"); you may not use this file
|
||||
* except in compliance with the License. You may obtain a copy of
|
||||
* the License at http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS
|
||||
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is the Netscape security libraries.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 2000 Netscape Communications Corporation. All
|
||||
* Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the
|
||||
* terms of the GNU General Public License Version 2 or later (the
|
||||
* "GPL"), in which case the provisions of the GPL are applicable
|
||||
* instead of those above. If you wish to allow use of your
|
||||
* version of this file only under the terms of the GPL and not to
|
||||
* allow others to use your version of this file under the MPL,
|
||||
* indicate your decision by deleting the provisions above and
|
||||
* replace them with the notice and other provisions required by
|
||||
* the GPL. If you do not delete the provisions above, a recipient
|
||||
* may use your version of this file under either the MPL or the
|
||||
* GPL.
|
||||
* $Id: mpi_mips.s,v 1.2 2000-08-31 02:40:32 nelsonb%netscape.com Exp $
|
||||
*/
|
||||
#include <regdef.h>
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
.section .text, 1, 0x00000006, 4, 4
|
||||
.text:
|
||||
.section .text
|
||||
|
||||
.ent s_mpv_mul_d_add
|
||||
.globl s_mpv_mul_d_add
|
||||
|
||||
s_mpv_mul_d_add:
|
||||
#/* c += a * b */
|
||||
#void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
|
||||
# mp_digit *c)
|
||||
#{
|
||||
# mp_digit a0, a1; regs a4, a5
|
||||
# mp_digit c0, c1; regs a6, a7
|
||||
# mp_digit cy = 0; reg t2
|
||||
# mp_word w0, w1; regs t0, t1
|
||||
#
|
||||
# if (a_len) {
|
||||
beq a1,zero,.L.1
|
||||
move t2,zero # cy = 0
|
||||
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
|
||||
dsrl32 a2,a2,0 # This clears the upper 32 bits.
|
||||
# a0 = a[0];
|
||||
lwu a4,0(a0)
|
||||
# w0 = ((mp_word)b * a0);
|
||||
dmultu a2,a4
|
||||
# if (--a_len) {
|
||||
addiu a1,a1,-1
|
||||
beq a1,zero,.L.2
|
||||
# while (a_len >= 2) {
|
||||
sltiu t3,a1,2
|
||||
bne t3,zero,.L.3
|
||||
# a1 = a[1];
|
||||
lwu a5,4(a0)
|
||||
.L.4:
|
||||
# a_len -= 2;
|
||||
addiu a1,a1,-2
|
||||
# c0 = c[0];
|
||||
lwu a6,0(a3)
|
||||
# w0 += cy;
|
||||
mflo t0
|
||||
daddu t0,t0,t2
|
||||
# w0 += c0;
|
||||
daddu t0,t0,a6
|
||||
# w1 = (mp_word)b * a1;
|
||||
dmultu a2,a5 #
|
||||
# cy = CARRYOUT(w0);
|
||||
dsrl32 t2,t0,0
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# a0 = a[2];
|
||||
lwu a4,8(a0)
|
||||
# a += 2;
|
||||
addiu a0,a0,8
|
||||
# c1 = c[1];
|
||||
lwu a7,4(a3)
|
||||
# w1 += cy;
|
||||
mflo t1
|
||||
daddu t1,t1,t2
|
||||
# w1 += c1;
|
||||
daddu t1,t1,a7
|
||||
# w0 = (mp_word)b * a0;
|
||||
dmultu a2,a4 #
|
||||
# cy = CARRYOUT(w1);
|
||||
dsrl32 t2,t1,0
|
||||
# c[1] = ACCUM(w1);
|
||||
sw t1,4(a3)
|
||||
# c += 2;
|
||||
addiu a3,a3,8
|
||||
sltiu t3,a1,2
|
||||
beq t3,zero,.L.4
|
||||
# a1 = a[1];
|
||||
lwu a5,4(a0)
|
||||
# }
|
||||
.L.3:
|
||||
# c0 = c[0];
|
||||
lwu a6,0(a3)
|
||||
# w0 += cy;
|
||||
# if (a_len) {
|
||||
mflo t0
|
||||
beq a1,zero,.L.5
|
||||
daddu t0,t0,t2
|
||||
# w1 = (mp_word)b * a1;
|
||||
dmultu a2,a5
|
||||
# w0 += c0;
|
||||
daddu t0,t0,a6 #
|
||||
# cy = CARRYOUT(w0);
|
||||
dsrl32 t2,t0,0
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# c1 = c[1];
|
||||
lwu a7,4(a3)
|
||||
# w1 += cy;
|
||||
mflo t1
|
||||
daddu t1,t1,t2
|
||||
# w1 += c1;
|
||||
daddu t1,t1,a7
|
||||
# c[1] = ACCUM(w1);
|
||||
sw t1,4(a3)
|
||||
# cy = CARRYOUT(w1);
|
||||
dsrl32 t2,t1,0
|
||||
# c += 1;
|
||||
b .L.6
|
||||
addiu a3,a3,4
|
||||
# } else {
|
||||
.L.5:
|
||||
# w0 += c0;
|
||||
daddu t0,t0,a6
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# cy = CARRYOUT(w0);
|
||||
b .L.6
|
||||
dsrl32 t2,t0,0
|
||||
# }
|
||||
# } else {
|
||||
.L.2:
|
||||
# c0 = c[0];
|
||||
lwu a6,0(a3)
|
||||
# w0 += c0;
|
||||
mflo t0
|
||||
daddu t0,t0,a6
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# cy = CARRYOUT(w0);
|
||||
dsrl32 t2,t0,0
|
||||
# }
|
||||
.L.6:
|
||||
# c[1] = cy;
|
||||
jr ra
|
||||
sw t2,4(a3)
|
||||
# }
|
||||
.L.1:
|
||||
jr ra
|
||||
nop
|
||||
#}
|
||||
#
|
||||
.end s_mpv_mul_d_add
|
||||
|
||||
.ent s_mpv_mul_d_add_prop
|
||||
.globl s_mpv_mul_d_add_prop
|
||||
|
||||
s_mpv_mul_d_add_prop:
|
||||
#/* c += a * b */
|
||||
#void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
|
||||
# mp_digit *c)
|
||||
#{
|
||||
# mp_digit a0, a1; regs a4, a5
|
||||
# mp_digit c0, c1; regs a6, a7
|
||||
# mp_digit cy = 0; reg t2
|
||||
# mp_word w0, w1; regs t0, t1
|
||||
#
|
||||
# if (a_len) {
|
||||
beq a1,zero,.M.1
|
||||
move t2,zero # cy = 0
|
||||
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
|
||||
dsrl32 a2,a2,0 # This clears the upper 32 bits.
|
||||
# a0 = a[0];
|
||||
lwu a4,0(a0)
|
||||
# w0 = ((mp_word)b * a0);
|
||||
dmultu a2,a4
|
||||
# if (--a_len) {
|
||||
addiu a1,a1,-1
|
||||
beq a1,zero,.M.2
|
||||
# while (a_len >= 2) {
|
||||
sltiu t3,a1,2
|
||||
bne t3,zero,.M.3
|
||||
# a1 = a[1];
|
||||
lwu a5,4(a0)
|
||||
.M.4:
|
||||
# a_len -= 2;
|
||||
addiu a1,a1,-2
|
||||
# c0 = c[0];
|
||||
lwu a6,0(a3)
|
||||
# w0 += cy;
|
||||
mflo t0
|
||||
daddu t0,t0,t2
|
||||
# w0 += c0;
|
||||
daddu t0,t0,a6
|
||||
# w1 = (mp_word)b * a1;
|
||||
dmultu a2,a5 #
|
||||
# cy = CARRYOUT(w0);
|
||||
dsrl32 t2,t0,0
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# a0 = a[2];
|
||||
lwu a4,8(a0)
|
||||
# a += 2;
|
||||
addiu a0,a0,8
|
||||
# c1 = c[1];
|
||||
lwu a7,4(a3)
|
||||
# w1 += cy;
|
||||
mflo t1
|
||||
daddu t1,t1,t2
|
||||
# w1 += c1;
|
||||
daddu t1,t1,a7
|
||||
# w0 = (mp_word)b * a0;
|
||||
dmultu a2,a4 #
|
||||
# cy = CARRYOUT(w1);
|
||||
dsrl32 t2,t1,0
|
||||
# c[1] = ACCUM(w1);
|
||||
sw t1,4(a3)
|
||||
# c += 2;
|
||||
addiu a3,a3,8
|
||||
sltiu t3,a1,2
|
||||
beq t3,zero,.M.4
|
||||
# a1 = a[1];
|
||||
lwu a5,4(a0)
|
||||
# }
|
||||
.M.3:
|
||||
# c0 = c[0];
|
||||
lwu a6,0(a3)
|
||||
# w0 += cy;
|
||||
# if (a_len) {
|
||||
mflo t0
|
||||
beq a1,zero,.M.5
|
||||
daddu t0,t0,t2
|
||||
# w1 = (mp_word)b * a1;
|
||||
dmultu a2,a5
|
||||
# w0 += c0;
|
||||
daddu t0,t0,a6 #
|
||||
# cy = CARRYOUT(w0);
|
||||
dsrl32 t2,t0,0
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# c1 = c[1];
|
||||
lwu a7,4(a3)
|
||||
# w1 += cy;
|
||||
mflo t1
|
||||
daddu t1,t1,t2
|
||||
# w1 += c1;
|
||||
daddu t1,t1,a7
|
||||
# c[1] = ACCUM(w1);
|
||||
sw t1,4(a3)
|
||||
# cy = CARRYOUT(w1);
|
||||
dsrl32 t2,t1,0
|
||||
# c += 1;
|
||||
b .M.6
|
||||
addiu a3,a3,8
|
||||
# } else {
|
||||
.M.5:
|
||||
# w0 += c0;
|
||||
daddu t0,t0,a6
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# cy = CARRYOUT(w0);
|
||||
dsrl32 t2,t0,0
|
||||
b .M.6
|
||||
addiu a3,a3,4
|
||||
# }
|
||||
# } else {
|
||||
.M.2:
|
||||
# c0 = c[0];
|
||||
lwu a6,0(a3)
|
||||
# w0 += c0;
|
||||
mflo t0
|
||||
daddu t0,t0,a6
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# cy = CARRYOUT(w0);
|
||||
dsrl32 t2,t0,0
|
||||
addiu a3,a3,4
|
||||
# }
|
||||
.M.6:
|
||||
|
||||
# while (cy) {
|
||||
beq t2,zero,.M.1
|
||||
nop
|
||||
.M.7:
|
||||
# mp_word w = (mp_word)*c + cy;
|
||||
lwu a6,0(a3)
|
||||
daddu t2,t2,a6
|
||||
# *c++ = ACCUM(w);
|
||||
sw t2,0(a3)
|
||||
# cy = CARRYOUT(w);
|
||||
dsrl32 t2,t2,0
|
||||
bne t2,zero,.M.7
|
||||
addiu a3,a3,4
|
||||
|
||||
# }
|
||||
.M.1:
|
||||
jr ra
|
||||
nop
|
||||
#}
|
||||
#
|
||||
.end s_mpv_mul_d_add_prop
|
||||
|
||||
.ent s_mpv_mul_d
|
||||
.globl s_mpv_mul_d
|
||||
|
||||
s_mpv_mul_d:
|
||||
#/* c = a * b */
|
||||
#void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,
|
||||
# mp_digit *c)
|
||||
#{
|
||||
# mp_digit a0, a1; regs a4, a5
|
||||
# mp_digit cy = 0; reg t2
|
||||
# mp_word w0, w1; regs t0, t1
|
||||
#
|
||||
# if (a_len) {
|
||||
beq a1,zero,.N.1
|
||||
move t2,zero # cy = 0
|
||||
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
|
||||
dsrl32 a2,a2,0 # This clears the upper 32 bits.
|
||||
# a0 = a[0];
|
||||
lwu a4,0(a0)
|
||||
# w0 = ((mp_word)b * a0);
|
||||
dmultu a2,a4
|
||||
# if (--a_len) {
|
||||
addiu a1,a1,-1
|
||||
beq a1,zero,.N.2
|
||||
# while (a_len >= 2) {
|
||||
sltiu t3,a1,2
|
||||
bne t3,zero,.N.3
|
||||
# a1 = a[1];
|
||||
lwu a5,4(a0)
|
||||
.N.4:
|
||||
# a_len -= 2;
|
||||
addiu a1,a1,-2
|
||||
# w0 += cy;
|
||||
mflo t0
|
||||
daddu t0,t0,t2
|
||||
# cy = CARRYOUT(w0);
|
||||
dsrl32 t2,t0,0
|
||||
# w1 = (mp_word)b * a1;
|
||||
dmultu a2,a5
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# a0 = a[2];
|
||||
lwu a4,8(a0)
|
||||
# a += 2;
|
||||
addiu a0,a0,8
|
||||
# w1 += cy;
|
||||
mflo t1
|
||||
daddu t1,t1,t2
|
||||
# cy = CARRYOUT(w1);
|
||||
dsrl32 t2,t1,0
|
||||
# w0 = (mp_word)b * a0;
|
||||
dmultu a2,a4
|
||||
# c[1] = ACCUM(w1);
|
||||
sw t1,4(a3)
|
||||
# c += 2;
|
||||
addiu a3,a3,8
|
||||
sltiu t3,a1,2
|
||||
beq t3,zero,.N.4
|
||||
# a1 = a[1];
|
||||
lwu a5,4(a0)
|
||||
# }
|
||||
.N.3:
|
||||
# w0 += cy;
|
||||
# if (a_len) {
|
||||
mflo t0
|
||||
beq a1,zero,.N.5
|
||||
daddu t0,t0,t2
|
||||
# w1 = (mp_word)b * a1;
|
||||
dmultu a2,a5 #
|
||||
# cy = CARRYOUT(w0);
|
||||
dsrl32 t2,t0,0
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# w1 += cy;
|
||||
mflo t1
|
||||
daddu t1,t1,t2
|
||||
# c[1] = ACCUM(w1);
|
||||
sw t1,4(a3)
|
||||
# cy = CARRYOUT(w1);
|
||||
dsrl32 t2,t1,0
|
||||
# c += 1;
|
||||
b .N.6
|
||||
addiu a3,a3,4
|
||||
# } else {
|
||||
.N.5:
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# cy = CARRYOUT(w0);
|
||||
b .N.6
|
||||
dsrl32 t2,t0,0
|
||||
# }
|
||||
# } else {
|
||||
.N.2:
|
||||
mflo t0
|
||||
# c[0] = ACCUM(w0);
|
||||
sw t0,0(a3)
|
||||
# cy = CARRYOUT(w0);
|
||||
dsrl32 t2,t0,0
|
||||
# }
|
||||
.N.6:
|
||||
# c[1] = cy;
|
||||
jr ra
|
||||
sw t2,4(a3)
|
||||
# }
|
||||
.N.1:
|
||||
jr ra
|
||||
nop
|
||||
#}
|
||||
#
|
||||
.end s_mpv_mul_d
|
||||
|
||||
|
||||
.ent s_mpv_sqr_add_prop
|
||||
.globl s_mpv_sqr_add_prop
|
||||
#void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
|
||||
# registers
|
||||
# a0 *a
|
||||
# a1 a_len
|
||||
# a2 *sqr
|
||||
# a3 digit from *a, a_i
|
||||
# a4 square of digit from a
|
||||
# a5,a6 next 2 digits in sqr
|
||||
# a7,t0 carry
|
||||
s_mpv_sqr_add_prop:
|
||||
move a7,zero
|
||||
move t0,zero
|
||||
lwu a3,0(a0)
|
||||
addiu a1,a1,-1 # --a_len
|
||||
dmultu a3,a3
|
||||
beq a1,zero,.P.3 # jump if we've already done the only sqr
|
||||
addiu a0,a0,4 # ++a
|
||||
.P.2:
|
||||
lwu a5,0(a2)
|
||||
lwu a6,4(a2)
|
||||
addiu a2,a2,8 # sqrs += 2;
|
||||
dsll32 a6,a6,0
|
||||
daddu a5,a5,a6
|
||||
lwu a3,0(a0)
|
||||
addiu a0,a0,4 # ++a
|
||||
mflo a4
|
||||
daddu a6,a5,a4
|
||||
sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
|
||||
dmultu a3,a3
|
||||
daddu a4,a6,t0
|
||||
sltu t0,a4,a6
|
||||
add t0,t0,a7
|
||||
sw a4,-8(a2)
|
||||
addiu a1,a1,-1 # --a_len
|
||||
dsrl32 a4,a4,0
|
||||
bne a1,zero,.P.2 # loop if a_len > 0
|
||||
sw a4,-4(a2)
|
||||
.P.3:
|
||||
lwu a5,0(a2)
|
||||
lwu a6,4(a2)
|
||||
addiu a2,a2,8 # sqrs += 2;
|
||||
dsll32 a6,a6,0
|
||||
daddu a5,a5,a6
|
||||
mflo a4
|
||||
daddu a6,a5,a4
|
||||
sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
|
||||
daddu a4,a6,t0
|
||||
sltu t0,a4,a6
|
||||
add t0,t0,a7
|
||||
sw a4,-8(a2)
|
||||
beq t0,zero,.P.9 # jump if no carry
|
||||
dsrl32 a4,a4,0
|
||||
.P.8:
|
||||
sw a4,-4(a2)
|
||||
/* propagate final carry */
|
||||
lwu a5,0(a2)
|
||||
daddu a6,a5,t0
|
||||
sltu t0,a6,a5
|
||||
bne t0,zero,.P.8 # loop if carry persists
|
||||
addiu a2,a2,4 # sqrs++
|
||||
.P.9:
|
||||
jr ra
|
||||
sw a4,-4(a2)
|
||||
|
||||
.end s_mpv_sqr_add_prop
|
||||
@@ -1,361 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the Netscape security libraries.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Netscape Communications Corporation.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2000
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: mpi_sparc.c,v 1.6 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
|
||||
|
||||
/* Multiplication performance enhancements for sparc v8+vis CPUs. */
|
||||
|
||||
#include "mpi-priv.h"
|
||||
#include <stddef.h>
|
||||
#include <sys/systeminfo.h>
|
||||
#include <strings.h>
|
||||
|
||||
/* In the functions below, */
|
||||
/* vector y must be 8-byte aligned, and n must be even */
|
||||
/* returns carry out of high order word of result */
|
||||
/* maximum n is 256 */
|
||||
|
||||
/* vector x += vector y * scaler a; where y is of length n words. */
|
||||
extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
|
||||
|
||||
/* vector z = vector x + vector y * scaler a; where y is of length n words. */
|
||||
extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
|
||||
int n, mp_digit a);
|
||||
|
||||
/* v8 versions of these functions run on any Sparc v8 CPU. */
|
||||
|
||||
/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
|
||||
#define MP_MUL_DxD(a, b, Phi, Plo) \
|
||||
{ unsigned long long product = (unsigned long long)a * b; \
|
||||
Plo = (mp_digit)product; \
|
||||
Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
|
||||
|
||||
/* c = a * b */
|
||||
static void
|
||||
v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
#if !defined(MP_NO_MP_WORD)
|
||||
mp_digit d = 0;
|
||||
|
||||
/* Inner product: Digits of a */
|
||||
while (a_len--) {
|
||||
mp_word w = ((mp_word)b * *a++) + d;
|
||||
*c++ = ACCUM(w);
|
||||
d = CARRYOUT(w);
|
||||
}
|
||||
*c = d;
|
||||
#else
|
||||
mp_digit carry = 0;
|
||||
while (a_len--) {
|
||||
mp_digit a_i = *a++;
|
||||
mp_digit a0b0, a1b1;
|
||||
|
||||
MP_MUL_DxD(a_i, b, a1b1, a0b0);
|
||||
|
||||
a0b0 += carry;
|
||||
if (a0b0 < carry)
|
||||
++a1b1;
|
||||
*c++ = a0b0;
|
||||
carry = a1b1;
|
||||
}
|
||||
*c = carry;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* c += a * b */
|
||||
static void
|
||||
v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
#if !defined(MP_NO_MP_WORD)
|
||||
mp_digit d = 0;
|
||||
|
||||
/* Inner product: Digits of a */
|
||||
while (a_len--) {
|
||||
mp_word w = ((mp_word)b * *a++) + *c + d;
|
||||
*c++ = ACCUM(w);
|
||||
d = CARRYOUT(w);
|
||||
}
|
||||
*c = d;
|
||||
#else
|
||||
mp_digit carry = 0;
|
||||
while (a_len--) {
|
||||
mp_digit a_i = *a++;
|
||||
mp_digit a0b0, a1b1;
|
||||
|
||||
MP_MUL_DxD(a_i, b, a1b1, a0b0);
|
||||
|
||||
a0b0 += carry;
|
||||
if (a0b0 < carry)
|
||||
++a1b1;
|
||||
a0b0 += a_i = *c;
|
||||
if (a0b0 < a_i)
|
||||
++a1b1;
|
||||
*c++ = a0b0;
|
||||
carry = a1b1;
|
||||
}
|
||||
*c = carry;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Presently, this is only used by the Montgomery arithmetic code. */
|
||||
/* c += a * b */
|
||||
static void
|
||||
v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
#if !defined(MP_NO_MP_WORD)
|
||||
mp_digit d = 0;
|
||||
|
||||
/* Inner product: Digits of a */
|
||||
while (a_len--) {
|
||||
mp_word w = ((mp_word)b * *a++) + *c + d;
|
||||
*c++ = ACCUM(w);
|
||||
d = CARRYOUT(w);
|
||||
}
|
||||
|
||||
while (d) {
|
||||
mp_word w = (mp_word)*c + d;
|
||||
*c++ = ACCUM(w);
|
||||
d = CARRYOUT(w);
|
||||
}
|
||||
#else
|
||||
mp_digit carry = 0;
|
||||
while (a_len--) {
|
||||
mp_digit a_i = *a++;
|
||||
mp_digit a0b0, a1b1;
|
||||
|
||||
MP_MUL_DxD(a_i, b, a1b1, a0b0);
|
||||
|
||||
a0b0 += carry;
|
||||
if (a0b0 < carry)
|
||||
++a1b1;
|
||||
|
||||
a0b0 += a_i = *c;
|
||||
if (a0b0 < a_i)
|
||||
++a1b1;
|
||||
|
||||
*c++ = a0b0;
|
||||
carry = a1b1;
|
||||
}
|
||||
while (carry) {
|
||||
mp_digit c_i = *c;
|
||||
carry += c_i;
|
||||
*c++ = carry;
|
||||
carry = carry < c_i;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* vis versions of these functions run only on v8+vis or v9+vis CPUs. */
|
||||
|
||||
/* c = a * b */
|
||||
static void
|
||||
vis_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
mp_digit d;
|
||||
mp_digit x[258];
|
||||
if (a_len <= 256) {
|
||||
if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
|
||||
mp_digit * px;
|
||||
px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
|
||||
memcpy(px, a, a_len * sizeof(*a));
|
||||
a = px;
|
||||
if (a_len & 1) {
|
||||
px[a_len] = 0;
|
||||
}
|
||||
}
|
||||
s_mp_setz(c, a_len + 1);
|
||||
d = mul_add_inp(c, a, a_len, b);
|
||||
c[a_len] = d;
|
||||
} else {
|
||||
v8_mpv_mul_d(a, a_len, b, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* c += a * b, where a is a_len words long. */
|
||||
static void
|
||||
vis_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
mp_digit d;
|
||||
mp_digit x[258];
|
||||
if (a_len <= 256) {
|
||||
if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
|
||||
mp_digit * px;
|
||||
px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
|
||||
memcpy(px, a, a_len * sizeof(*a));
|
||||
a = px;
|
||||
if (a_len & 1) {
|
||||
px[a_len] = 0;
|
||||
}
|
||||
}
|
||||
d = mul_add_inp(c, a, a_len, b);
|
||||
c[a_len] = d;
|
||||
} else {
|
||||
v8_mpv_mul_d_add(a, a_len, b, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* c += a * b, where a is y words long. */
|
||||
static void
|
||||
vis_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
|
||||
mp_digit *c)
|
||||
{
|
||||
mp_digit d;
|
||||
mp_digit x[258];
|
||||
if (a_len <= 256) {
|
||||
if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
|
||||
mp_digit * px;
|
||||
px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
|
||||
memcpy(px, a, a_len * sizeof(*a));
|
||||
a = px;
|
||||
if (a_len & 1) {
|
||||
px[a_len] = 0;
|
||||
}
|
||||
}
|
||||
d = mul_add_inp(c, a, a_len, b);
|
||||
if (d) {
|
||||
c += a_len;
|
||||
do {
|
||||
mp_digit sum = d + *c;
|
||||
*c++ = sum;
|
||||
d = sum < d;
|
||||
} while (d);
|
||||
}
|
||||
} else {
|
||||
v8_mpv_mul_d_add_prop(a, a_len, b, c);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(SOLARIS2_5)
|
||||
static int
|
||||
isSparcV8PlusVis(void)
|
||||
{
|
||||
long buflen;
|
||||
int rv = 0; /* false */
|
||||
char buf[256];
|
||||
buflen = sysinfo(SI_MACHINE, buf, sizeof buf);
|
||||
if (buflen > 0) {
|
||||
rv = (!strcmp(buf, "sun4u") || !strcmp(buf, "sun4u1"));
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
#else /* SunOS2.6or higher has SI_ISALIST */
|
||||
|
||||
static int
|
||||
isSparcV8PlusVis(void)
|
||||
{
|
||||
long buflen;
|
||||
int rv = 0; /* false */
|
||||
char buf[256];
|
||||
buflen = sysinfo(SI_ISALIST, buf, sizeof buf);
|
||||
if (buflen > 0) {
|
||||
#if defined(MP_USE_LONG_DIGIT)
|
||||
char * found = strstr(buf, "sparcv9+vis");
|
||||
#else
|
||||
char * found = strstr(buf, "sparcv8plus+vis");
|
||||
#endif
|
||||
rv = (found != 0);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef void MPVmpy(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c);
|
||||
|
||||
/* forward static function declarations */
|
||||
static MPVmpy sp_mpv_mul_d;
|
||||
static MPVmpy sp_mpv_mul_d_add;
|
||||
static MPVmpy sp_mpv_mul_d_add_prop;
|
||||
|
||||
static MPVmpy *p_mpv_mul_d = &sp_mpv_mul_d;
|
||||
static MPVmpy *p_mpv_mul_d_add = &sp_mpv_mul_d_add;
|
||||
static MPVmpy *p_mpv_mul_d_add_prop = &sp_mpv_mul_d_add_prop;
|
||||
|
||||
static void
|
||||
initPtrs(void)
|
||||
{
|
||||
if (isSparcV8PlusVis()) {
|
||||
p_mpv_mul_d = &vis_mpv_mul_d;
|
||||
p_mpv_mul_d_add = &vis_mpv_mul_d_add;
|
||||
p_mpv_mul_d_add_prop = &vis_mpv_mul_d_add_prop;
|
||||
} else {
|
||||
p_mpv_mul_d = &v8_mpv_mul_d;
|
||||
p_mpv_mul_d_add = &v8_mpv_mul_d_add;
|
||||
p_mpv_mul_d_add_prop = &v8_mpv_mul_d_add_prop;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sp_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
initPtrs();
|
||||
(* p_mpv_mul_d)(a, a_len, b, c);
|
||||
}
|
||||
|
||||
static void
|
||||
sp_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
initPtrs();
|
||||
(* p_mpv_mul_d_add)(a, a_len, b, c);
|
||||
}
|
||||
|
||||
static void
|
||||
sp_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
initPtrs();
|
||||
(* p_mpv_mul_d_add_prop)(a, a_len, b, c);
|
||||
}
|
||||
|
||||
|
||||
/* This is the external interface */
|
||||
|
||||
void
|
||||
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
(* p_mpv_mul_d)(a, a_len, b, c);
|
||||
}
|
||||
|
||||
void
|
||||
s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
(* p_mpv_mul_d_add)(a, a_len, b, c);
|
||||
}
|
||||
|
||||
void
|
||||
s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
(* p_mpv_mul_d_add_prop)(a, a_len, b, c);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,356 +0,0 @@
|
||||
;
|
||||
; mpi_x86.asm - assembly language implementation of s_mpv_ functions.
|
||||
;
|
||||
; ***** BEGIN LICENSE BLOCK *****
|
||||
; Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
;
|
||||
; The contents of this file are subject to the Mozilla Public License Version
|
||||
; 1.1 (the "License"); you may not use this file except in compliance with
|
||||
; the License. You may obtain a copy of the License at
|
||||
; http://www.mozilla.org/MPL/
|
||||
;
|
||||
; Software distributed under the License is distributed on an "AS IS" basis,
|
||||
; WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
; for the specific language governing rights and limitations under the
|
||||
; License.
|
||||
;
|
||||
; The Original Code is the Netscape security libraries.
|
||||
;
|
||||
; The Initial Developer of the Original Code is
|
||||
; Netscape Communications Corporation.
|
||||
; Portions created by the Initial Developer are Copyright (C) 2000
|
||||
; the Initial Developer. All Rights Reserved.
|
||||
;
|
||||
; Contributor(s):
|
||||
;
|
||||
; Alternatively, the contents of this file may be used under the terms of
|
||||
; either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
; the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
; in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
; of those above. If you wish to allow use of your version of this file only
|
||||
; under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
; use your version of this file under the terms of the MPL, indicate your
|
||||
; decision by deleting the provisions above and replace them with the notice
|
||||
; and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
; the provisions above, a recipient may use your version of this file under
|
||||
; the terms of any one of the MPL, the GPL or the LGPL.
|
||||
;
|
||||
; ***** END LICENSE BLOCK *****
|
||||
|
||||
; $Id: mpi_x86.asm,v 1.2 2005-02-02 22:28:22 gerv%gerv.net Exp $
|
||||
|
||||
.386p
|
||||
.MODEL FLAT
|
||||
ASSUME CS: FLAT, DS: FLAT, SS: FLAT
|
||||
_TEXT SEGMENT
|
||||
|
||||
; ebp - 36: caller's esi
|
||||
; ebp - 32: caller's edi
|
||||
; ebp - 28:
|
||||
; ebp - 24:
|
||||
; ebp - 20:
|
||||
; ebp - 16:
|
||||
; ebp - 12:
|
||||
; ebp - 8:
|
||||
; ebp - 4:
|
||||
; ebp + 0: caller's ebp
|
||||
; ebp + 4: return address
|
||||
; ebp + 8: a argument
|
||||
; ebp + 12: a_len argument
|
||||
; ebp + 16: b argument
|
||||
; ebp + 20: c argument
|
||||
; registers:
|
||||
; eax:
|
||||
; ebx: carry
|
||||
; ecx: a_len
|
||||
; edx:
|
||||
; esi: a ptr
|
||||
; edi: c ptr
|
||||
|
||||
public _s_mpv_mul_d
|
||||
_s_mpv_mul_d PROC NEAR
|
||||
push ebp
|
||||
mov ebp,esp
|
||||
sub esp,28
|
||||
push edi
|
||||
push esi
|
||||
push ebx
|
||||
mov ebx,0 ; carry = 0
|
||||
mov ecx,[ebp+12] ; ecx = a_len
|
||||
mov edi,[ebp+20]
|
||||
cmp ecx,0
|
||||
je L_2 ; jmp if a_len == 0
|
||||
mov esi,[ebp+8] ; esi = a
|
||||
cld
|
||||
L_1:
|
||||
lodsd ; eax = [ds:esi]; esi += 4
|
||||
mov edx,[ebp+16] ; edx = b
|
||||
mul edx ; edx:eax = Phi:Plo = a_i * b
|
||||
|
||||
add eax,ebx ; add carry (ebx) to edx:eax
|
||||
adc edx,0
|
||||
mov ebx,edx ; high half of product becomes next carry
|
||||
|
||||
stosd ; [es:edi] = ax; edi += 4;
|
||||
dec ecx ; --a_len
|
||||
jnz L_1 ; jmp if a_len != 0
|
||||
L_2:
|
||||
mov [edi],ebx ; *c = carry
|
||||
pop ebx
|
||||
pop esi
|
||||
pop edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
_s_mpv_mul_d ENDP
|
||||
|
||||
; ebp - 36: caller's esi
|
||||
; ebp - 32: caller's edi
|
||||
; ebp - 28:
|
||||
; ebp - 24:
|
||||
; ebp - 20:
|
||||
; ebp - 16:
|
||||
; ebp - 12:
|
||||
; ebp - 8:
|
||||
; ebp - 4:
|
||||
; ebp + 0: caller's ebp
|
||||
; ebp + 4: return address
|
||||
; ebp + 8: a argument
|
||||
; ebp + 12: a_len argument
|
||||
; ebp + 16: b argument
|
||||
; ebp + 20: c argument
|
||||
; registers:
|
||||
; eax:
|
||||
; ebx: carry
|
||||
; ecx: a_len
|
||||
; edx:
|
||||
; esi: a ptr
|
||||
; edi: c ptr
|
||||
public _s_mpv_mul_d_add
|
||||
_s_mpv_mul_d_add PROC NEAR
|
||||
push ebp
|
||||
mov ebp,esp
|
||||
sub esp,28
|
||||
push edi
|
||||
push esi
|
||||
push ebx
|
||||
mov ebx,0 ; carry = 0
|
||||
mov ecx,[ebp+12] ; ecx = a_len
|
||||
mov edi,[ebp+20]
|
||||
cmp ecx,0
|
||||
je L_4 ; jmp if a_len == 0
|
||||
mov esi,[ebp+8] ; esi = a
|
||||
cld
|
||||
L_3:
|
||||
lodsd ; eax = [ds:esi]; esi += 4
|
||||
mov edx,[ebp+16] ; edx = b
|
||||
mul edx ; edx:eax = Phi:Plo = a_i * b
|
||||
|
||||
add eax,ebx ; add carry (ebx) to edx:eax
|
||||
adc edx,0
|
||||
mov ebx,[edi] ; add in current word from *c
|
||||
add eax,ebx
|
||||
adc edx,0
|
||||
mov ebx,edx ; high half of product becomes next carry
|
||||
|
||||
stosd ; [es:edi] = ax; edi += 4;
|
||||
dec ecx ; --a_len
|
||||
jnz L_3 ; jmp if a_len != 0
|
||||
L_4:
|
||||
mov [edi],ebx ; *c = carry
|
||||
pop ebx
|
||||
pop esi
|
||||
pop edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
_s_mpv_mul_d_add ENDP
|
||||
|
||||
; ebp - 36: caller's esi
|
||||
; ebp - 32: caller's edi
|
||||
; ebp - 28:
|
||||
; ebp - 24:
|
||||
; ebp - 20:
|
||||
; ebp - 16:
|
||||
; ebp - 12:
|
||||
; ebp - 8:
|
||||
; ebp - 4:
|
||||
; ebp + 0: caller's ebp
|
||||
; ebp + 4: return address
|
||||
; ebp + 8: a argument
|
||||
; ebp + 12: a_len argument
|
||||
; ebp + 16: b argument
|
||||
; ebp + 20: c argument
|
||||
; registers:
|
||||
; eax:
|
||||
; ebx: carry
|
||||
; ecx: a_len
|
||||
; edx:
|
||||
; esi: a ptr
|
||||
; edi: c ptr
|
||||
public _s_mpv_mul_d_add_prop
|
||||
_s_mpv_mul_d_add_prop PROC NEAR
|
||||
push ebp
|
||||
mov ebp,esp
|
||||
sub esp,28
|
||||
push edi
|
||||
push esi
|
||||
push ebx
|
||||
mov ebx,0 ; carry = 0
|
||||
mov ecx,[ebp+12] ; ecx = a_len
|
||||
mov edi,[ebp+20]
|
||||
cmp ecx,0
|
||||
je L_6 ; jmp if a_len == 0
|
||||
cld
|
||||
mov esi,[ebp+8] ; esi = a
|
||||
L_5:
|
||||
lodsd ; eax = [ds:esi]; esi += 4
|
||||
mov edx,[ebp+16] ; edx = b
|
||||
mul edx ; edx:eax = Phi:Plo = a_i * b
|
||||
|
||||
add eax,ebx ; add carry (ebx) to edx:eax
|
||||
adc edx,0
|
||||
mov ebx,[edi] ; add in current word from *c
|
||||
add eax,ebx
|
||||
adc edx,0
|
||||
mov ebx,edx ; high half of product becomes next carry
|
||||
|
||||
stosd ; [es:edi] = ax; edi += 4;
|
||||
dec ecx ; --a_len
|
||||
jnz L_5 ; jmp if a_len != 0
|
||||
L_6:
|
||||
cmp ebx,0 ; is carry zero?
|
||||
jz L_8
|
||||
mov eax,[edi] ; add in current word from *c
|
||||
add eax,ebx
|
||||
stosd ; [es:edi] = ax; edi += 4;
|
||||
jnc L_8
|
||||
L_7:
|
||||
mov eax,[edi] ; add in current word from *c
|
||||
adc eax,0
|
||||
stosd ; [es:edi] = ax; edi += 4;
|
||||
jc L_7
|
||||
L_8:
|
||||
pop ebx
|
||||
pop esi
|
||||
pop edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
_s_mpv_mul_d_add_prop ENDP
|
||||
|
||||
; ebp - 20: caller's esi
|
||||
; ebp - 16: caller's edi
|
||||
; ebp - 12:
|
||||
; ebp - 8: carry
|
||||
; ebp - 4: a_len local
|
||||
; ebp + 0: caller's ebp
|
||||
; ebp + 4: return address
|
||||
; ebp + 8: pa argument
|
||||
; ebp + 12: a_len argument
|
||||
; ebp + 16: ps argument
|
||||
; ebp + 20:
|
||||
; registers:
|
||||
; eax:
|
||||
; ebx: carry
|
||||
; ecx: a_len
|
||||
; edx:
|
||||
; esi: a ptr
|
||||
; edi: c ptr
|
||||
|
||||
public _s_mpv_sqr_add_prop
|
||||
_s_mpv_sqr_add_prop PROC NEAR
|
||||
push ebp
|
||||
mov ebp,esp
|
||||
sub esp,12
|
||||
push edi
|
||||
push esi
|
||||
push ebx
|
||||
mov ebx,0 ; carry = 0
|
||||
mov ecx,[ebp+12] ; a_len
|
||||
mov edi,[ebp+16] ; edi = ps
|
||||
cmp ecx,0
|
||||
je L_11 ; jump if a_len == 0
|
||||
cld
|
||||
mov esi,[ebp+8] ; esi = pa
|
||||
L_10:
|
||||
lodsd ; eax = [ds:si]; si += 4;
|
||||
mul eax
|
||||
|
||||
add eax,ebx ; add "carry"
|
||||
adc edx,0
|
||||
mov ebx,[edi]
|
||||
add eax,ebx ; add low word from result
|
||||
mov ebx,[edi+4]
|
||||
stosd ; [es:di] = eax; di += 4;
|
||||
adc edx,ebx ; add high word from result
|
||||
mov ebx,0
|
||||
mov eax,edx
|
||||
adc ebx,0
|
||||
stosd ; [es:di] = eax; di += 4;
|
||||
dec ecx ; --a_len
|
||||
jnz L_10 ; jmp if a_len != 0
|
||||
L_11:
|
||||
cmp ebx,0 ; is carry zero?
|
||||
jz L_14
|
||||
mov eax,[edi] ; add in current word from *c
|
||||
add eax,ebx
|
||||
stosd ; [es:edi] = ax; edi += 4;
|
||||
jnc L_14
|
||||
L_12:
|
||||
mov eax,[edi] ; add in current word from *c
|
||||
adc eax,0
|
||||
stosd ; [es:edi] = ax; edi += 4;
|
||||
jc L_12
|
||||
L_14:
|
||||
pop ebx
|
||||
pop esi
|
||||
pop edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
_s_mpv_sqr_add_prop ENDP
|
||||
|
||||
;
|
||||
; Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
|
||||
; so its high bit is 1. This code is from NSPR.
|
||||
;
|
||||
; mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
|
||||
; mp_digit *qp, mp_digit *rp)
|
||||
|
||||
; Dump of assembler code for function s_mpv_div_2dx1d:
|
||||
;
|
||||
; esp + 0: Caller's ebx
|
||||
; esp + 4: return address
|
||||
; esp + 8: Nhi argument
|
||||
; esp + 12: Nlo argument
|
||||
; esp + 16: divisor argument
|
||||
; esp + 20: qp argument
|
||||
; esp + 24: rp argument
|
||||
; registers:
|
||||
; eax:
|
||||
; ebx: carry
|
||||
; ecx: a_len
|
||||
; edx:
|
||||
; esi: a ptr
|
||||
; edi: c ptr
|
||||
;
|
||||
public _s_mpv_div_2dx1d
|
||||
_s_mpv_div_2dx1d PROC NEAR
|
||||
push ebx
|
||||
mov edx,[esp+8]
|
||||
mov eax,[esp+12]
|
||||
mov ebx,[esp+16]
|
||||
div ebx
|
||||
mov ebx,[esp+20]
|
||||
mov [ebx],eax
|
||||
mov ebx,[esp+24]
|
||||
mov [ebx],edx
|
||||
xor eax,eax ; return zero
|
||||
pop ebx
|
||||
ret
|
||||
nop
|
||||
_s_mpv_div_2dx1d ENDP
|
||||
|
||||
_TEXT ENDS
|
||||
END
|
||||
@@ -1,345 +0,0 @@
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public
|
||||
# License Version 1.1 (the "License"); you may not use this file
|
||||
# except in compliance with the License. You may obtain a copy of
|
||||
# the License at http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS
|
||||
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
# implied. See the License for the specific language governing
|
||||
# rights and limitations under the License.
|
||||
#
|
||||
# The Original Code is the Netscape security libraries.
|
||||
#
|
||||
# The Initial Developer of the Original Code is Netscape
|
||||
# Communications Corporation. Portions created by Netscape are
|
||||
# Copyright (C) 2000 Netscape Communications Corporation. All
|
||||
# Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the
|
||||
# terms of the GNU General Public License Version 2 or later (the
|
||||
# "GPL"), in which case the provisions of the GPL are applicable
|
||||
# instead of those above. If you wish to allow use of your
|
||||
# version of this file only under the terms of the GPL and not to
|
||||
# allow others to use your version of this file under the MPL,
|
||||
# indicate your decision by deleting the provisions above and
|
||||
# replace them with the notice and other provisions required by
|
||||
# the GPL. If you do not delete the provisions above, a recipient
|
||||
# may use your version of this file under either the MPL or the
|
||||
# GPL.
|
||||
# $Id: mpi_x86.s,v 1.4 2003-10-24 04:47:23 wchang0222%aol.com Exp $
|
||||
#
|
||||
|
||||
.text
|
||||
|
||||
# ebp - 36: caller's esi
|
||||
# ebp - 32: caller's edi
|
||||
# ebp - 28:
|
||||
# ebp - 24:
|
||||
# ebp - 20:
|
||||
# ebp - 16:
|
||||
# ebp - 12:
|
||||
# ebp - 8:
|
||||
# ebp - 4:
|
||||
# ebp + 0: caller's ebp
|
||||
# ebp + 4: return address
|
||||
# ebp + 8: a argument
|
||||
# ebp + 12: a_len argument
|
||||
# ebp + 16: b argument
|
||||
# ebp + 20: c argument
|
||||
# registers:
|
||||
# eax:
|
||||
# ebx: carry
|
||||
# ecx: a_len
|
||||
# edx:
|
||||
# esi: a ptr
|
||||
# edi: c ptr
|
||||
.globl s_mpv_mul_d
|
||||
.type s_mpv_mul_d,@function
|
||||
s_mpv_mul_d:
|
||||
push %ebp
|
||||
mov %esp,%ebp
|
||||
sub $28,%esp
|
||||
push %edi
|
||||
push %esi
|
||||
push %ebx
|
||||
movl $0,%ebx # carry = 0
|
||||
mov 12(%ebp),%ecx # ecx = a_len
|
||||
mov 20(%ebp),%edi
|
||||
cmp $0,%ecx
|
||||
je 2f # jmp if a_len == 0
|
||||
mov 8(%ebp),%esi # esi = a
|
||||
cld
|
||||
1:
|
||||
lodsl # eax = [ds:esi]; esi += 4
|
||||
mov 16(%ebp),%edx # edx = b
|
||||
mull %edx # edx:eax = Phi:Plo = a_i * b
|
||||
|
||||
add %ebx,%eax # add carry (%ebx) to edx:eax
|
||||
adc $0,%edx
|
||||
mov %edx,%ebx # high half of product becomes next carry
|
||||
|
||||
stosl # [es:edi] = ax; edi += 4;
|
||||
dec %ecx # --a_len
|
||||
jnz 1b # jmp if a_len != 0
|
||||
2:
|
||||
mov %ebx,0(%edi) # *c = carry
|
||||
pop %ebx
|
||||
pop %esi
|
||||
pop %edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
|
||||
# ebp - 36: caller's esi
|
||||
# ebp - 32: caller's edi
|
||||
# ebp - 28:
|
||||
# ebp - 24:
|
||||
# ebp - 20:
|
||||
# ebp - 16:
|
||||
# ebp - 12:
|
||||
# ebp - 8:
|
||||
# ebp - 4:
|
||||
# ebp + 0: caller's ebp
|
||||
# ebp + 4: return address
|
||||
# ebp + 8: a argument
|
||||
# ebp + 12: a_len argument
|
||||
# ebp + 16: b argument
|
||||
# ebp + 20: c argument
|
||||
# registers:
|
||||
# eax:
|
||||
# ebx: carry
|
||||
# ecx: a_len
|
||||
# edx:
|
||||
# esi: a ptr
|
||||
# edi: c ptr
|
||||
.globl s_mpv_mul_d_add
|
||||
.type s_mpv_mul_d_add,@function
|
||||
s_mpv_mul_d_add:
|
||||
push %ebp
|
||||
mov %esp,%ebp
|
||||
sub $28,%esp
|
||||
push %edi
|
||||
push %esi
|
||||
push %ebx
|
||||
movl $0,%ebx # carry = 0
|
||||
mov 12(%ebp),%ecx # ecx = a_len
|
||||
mov 20(%ebp),%edi
|
||||
cmp $0,%ecx
|
||||
je 4f # jmp if a_len == 0
|
||||
mov 8(%ebp),%esi # esi = a
|
||||
cld
|
||||
3:
|
||||
lodsl # eax = [ds:esi]; esi += 4
|
||||
mov 16(%ebp),%edx # edx = b
|
||||
mull %edx # edx:eax = Phi:Plo = a_i * b
|
||||
|
||||
add %ebx,%eax # add carry (%ebx) to edx:eax
|
||||
adc $0,%edx
|
||||
mov 0(%edi),%ebx # add in current word from *c
|
||||
add %ebx,%eax
|
||||
adc $0,%edx
|
||||
mov %edx,%ebx # high half of product becomes next carry
|
||||
|
||||
stosl # [es:edi] = ax; edi += 4;
|
||||
dec %ecx # --a_len
|
||||
jnz 3b # jmp if a_len != 0
|
||||
4:
|
||||
mov %ebx,0(%edi) # *c = carry
|
||||
pop %ebx
|
||||
pop %esi
|
||||
pop %edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
|
||||
# ebp - 36: caller's esi
|
||||
# ebp - 32: caller's edi
|
||||
# ebp - 28:
|
||||
# ebp - 24:
|
||||
# ebp - 20:
|
||||
# ebp - 16:
|
||||
# ebp - 12:
|
||||
# ebp - 8:
|
||||
# ebp - 4:
|
||||
# ebp + 0: caller's ebp
|
||||
# ebp + 4: return address
|
||||
# ebp + 8: a argument
|
||||
# ebp + 12: a_len argument
|
||||
# ebp + 16: b argument
|
||||
# ebp + 20: c argument
|
||||
# registers:
|
||||
# eax:
|
||||
# ebx: carry
|
||||
# ecx: a_len
|
||||
# edx:
|
||||
# esi: a ptr
|
||||
# edi: c ptr
|
||||
.globl s_mpv_mul_d_add_prop
|
||||
.type s_mpv_mul_d_add_prop,@function
|
||||
s_mpv_mul_d_add_prop:
|
||||
push %ebp
|
||||
mov %esp,%ebp
|
||||
sub $28,%esp
|
||||
push %edi
|
||||
push %esi
|
||||
push %ebx
|
||||
movl $0,%ebx # carry = 0
|
||||
mov 12(%ebp),%ecx # ecx = a_len
|
||||
mov 20(%ebp),%edi
|
||||
cmp $0,%ecx
|
||||
je 6f # jmp if a_len == 0
|
||||
cld
|
||||
mov 8(%ebp),%esi # esi = a
|
||||
5:
|
||||
lodsl # eax = [ds:esi]; esi += 4
|
||||
mov 16(%ebp),%edx # edx = b
|
||||
mull %edx # edx:eax = Phi:Plo = a_i * b
|
||||
|
||||
add %ebx,%eax # add carry (%ebx) to edx:eax
|
||||
adc $0,%edx
|
||||
mov 0(%edi),%ebx # add in current word from *c
|
||||
add %ebx,%eax
|
||||
adc $0,%edx
|
||||
mov %edx,%ebx # high half of product becomes next carry
|
||||
|
||||
stosl # [es:edi] = ax; edi += 4;
|
||||
dec %ecx # --a_len
|
||||
jnz 5b # jmp if a_len != 0
|
||||
6:
|
||||
cmp $0,%ebx # is carry zero?
|
||||
jz 8f
|
||||
mov 0(%edi),%eax # add in current word from *c
|
||||
add %ebx,%eax
|
||||
stosl # [es:edi] = ax; edi += 4;
|
||||
jnc 8f
|
||||
7:
|
||||
mov 0(%edi),%eax # add in current word from *c
|
||||
adc $0,%eax
|
||||
stosl # [es:edi] = ax; edi += 4;
|
||||
jc 7b
|
||||
8:
|
||||
pop %ebx
|
||||
pop %esi
|
||||
pop %edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
|
||||
# ebp - 20: caller's esi
|
||||
# ebp - 16: caller's edi
|
||||
# ebp - 12:
|
||||
# ebp - 8: carry
|
||||
# ebp - 4: a_len local
|
||||
# ebp + 0: caller's ebp
|
||||
# ebp + 4: return address
|
||||
# ebp + 8: pa argument
|
||||
# ebp + 12: a_len argument
|
||||
# ebp + 16: ps argument
|
||||
# ebp + 20:
|
||||
# registers:
|
||||
# eax:
|
||||
# ebx: carry
|
||||
# ecx: a_len
|
||||
# edx:
|
||||
# esi: a ptr
|
||||
# edi: c ptr
|
||||
|
||||
.globl s_mpv_sqr_add_prop
|
||||
.type s_mpv_sqr_add_prop,@function
|
||||
s_mpv_sqr_add_prop:
|
||||
push %ebp
|
||||
mov %esp,%ebp
|
||||
sub $12,%esp
|
||||
push %edi
|
||||
push %esi
|
||||
push %ebx
|
||||
movl $0,%ebx # carry = 0
|
||||
mov 12(%ebp),%ecx # a_len
|
||||
mov 16(%ebp),%edi # edi = ps
|
||||
cmp $0,%ecx
|
||||
je 11f # jump if a_len == 0
|
||||
cld
|
||||
mov 8(%ebp),%esi # esi = pa
|
||||
10:
|
||||
lodsl # %eax = [ds:si]; si += 4;
|
||||
mull %eax
|
||||
|
||||
add %ebx,%eax # add "carry"
|
||||
adc $0,%edx
|
||||
mov 0(%edi),%ebx
|
||||
add %ebx,%eax # add low word from result
|
||||
mov 4(%edi),%ebx
|
||||
stosl # [es:di] = %eax; di += 4;
|
||||
adc %ebx,%edx # add high word from result
|
||||
movl $0,%ebx
|
||||
mov %edx,%eax
|
||||
adc $0,%ebx
|
||||
stosl # [es:di] = %eax; di += 4;
|
||||
dec %ecx # --a_len
|
||||
jnz 10b # jmp if a_len != 0
|
||||
11:
|
||||
cmp $0,%ebx # is carry zero?
|
||||
jz 14f
|
||||
mov 0(%edi),%eax # add in current word from *c
|
||||
add %ebx,%eax
|
||||
stosl # [es:edi] = ax; edi += 4;
|
||||
jnc 14f
|
||||
12:
|
||||
mov 0(%edi),%eax # add in current word from *c
|
||||
adc $0,%eax
|
||||
stosl # [es:edi] = ax; edi += 4;
|
||||
jc 12b
|
||||
14:
|
||||
pop %ebx
|
||||
pop %esi
|
||||
pop %edi
|
||||
leave
|
||||
ret
|
||||
nop
|
||||
|
||||
#
|
||||
# Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
|
||||
# so its high bit is 1. This code is from NSPR.
|
||||
#
|
||||
# mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
|
||||
# mp_digit *qp, mp_digit *rp)
|
||||
|
||||
# esp + 0: Caller's ebx
|
||||
# esp + 4: return address
|
||||
# esp + 8: Nhi argument
|
||||
# esp + 12: Nlo argument
|
||||
# esp + 16: divisor argument
|
||||
# esp + 20: qp argument
|
||||
# esp + 24: rp argument
|
||||
# registers:
|
||||
# eax:
|
||||
# ebx: carry
|
||||
# ecx: a_len
|
||||
# edx:
|
||||
# esi: a ptr
|
||||
# edi: c ptr
|
||||
#
|
||||
|
||||
.globl s_mpv_div_2dx1d
|
||||
.type s_mpv_div_2dx1d,@function
|
||||
s_mpv_div_2dx1d:
|
||||
push %ebx
|
||||
mov 8(%esp),%edx
|
||||
mov 12(%esp),%eax
|
||||
mov 16(%esp),%ebx
|
||||
div %ebx
|
||||
mov 20(%esp),%ebx
|
||||
mov %eax,0(%ebx)
|
||||
mov 24(%esp),%ebx
|
||||
mov %edx,0(%ebx)
|
||||
xor %eax,%eax # return zero
|
||||
pop %ebx
|
||||
ret
|
||||
nop
|
||||
|
||||
# Magic indicating no need for an executable stack
|
||||
.section .note.GNU-stack, "", @progbits
|
||||
.previous
|
||||
@@ -1,465 +0,0 @@
|
||||
/*
|
||||
* mplogic.c
|
||||
*
|
||||
* Bitwise logical operations on MPI values
|
||||
*
|
||||
* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Michael J. Fromberger.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: mplogic.c,v 1.15 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
|
||||
|
||||
#include "mpi-priv.h"
|
||||
#include "mplogic.h"
|
||||
|
||||
/* {{{ Lookup table for population count */
|
||||
|
||||
static unsigned char bitc[] = {
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
||||
};
|
||||
|
||||
/* }}} */
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/*
|
||||
mpl_not(a, b) - compute b = ~a
|
||||
mpl_and(a, b, c) - compute c = a & b
|
||||
mpl_or(a, b, c) - compute c = a | b
|
||||
mpl_xor(a, b, c) - compute c = a ^ b
|
||||
*/
|
||||
|
||||
/* {{{ mpl_not(a, b) */
|
||||
|
||||
mp_err mpl_not(mp_int *a, mp_int *b)
|
||||
{
|
||||
mp_err res;
|
||||
unsigned int ix;
|
||||
|
||||
ARGCHK(a != NULL && b != NULL, MP_BADARG);
|
||||
|
||||
if((res = mp_copy(a, b)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
/* This relies on the fact that the digit type is unsigned */
|
||||
for(ix = 0; ix < USED(b); ix++)
|
||||
DIGIT(b, ix) = ~DIGIT(b, ix);
|
||||
|
||||
s_mp_clamp(b);
|
||||
|
||||
return MP_OKAY;
|
||||
|
||||
} /* end mpl_not() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mpl_and(a, b, c) */
|
||||
|
||||
mp_err mpl_and(mp_int *a, mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_int *which, *other;
|
||||
mp_err res;
|
||||
unsigned int ix;
|
||||
|
||||
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
|
||||
|
||||
if(USED(a) <= USED(b)) {
|
||||
which = a;
|
||||
other = b;
|
||||
} else {
|
||||
which = b;
|
||||
other = a;
|
||||
}
|
||||
|
||||
if((res = mp_copy(which, c)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
for(ix = 0; ix < USED(which); ix++)
|
||||
DIGIT(c, ix) &= DIGIT(other, ix);
|
||||
|
||||
s_mp_clamp(c);
|
||||
|
||||
return MP_OKAY;
|
||||
|
||||
} /* end mpl_and() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mpl_or(a, b, c) */
|
||||
|
||||
mp_err mpl_or(mp_int *a, mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_int *which, *other;
|
||||
mp_err res;
|
||||
unsigned int ix;
|
||||
|
||||
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
|
||||
|
||||
if(USED(a) >= USED(b)) {
|
||||
which = a;
|
||||
other = b;
|
||||
} else {
|
||||
which = b;
|
||||
other = a;
|
||||
}
|
||||
|
||||
if((res = mp_copy(which, c)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
for(ix = 0; ix < USED(which); ix++)
|
||||
DIGIT(c, ix) |= DIGIT(other, ix);
|
||||
|
||||
return MP_OKAY;
|
||||
|
||||
} /* end mpl_or() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mpl_xor(a, b, c) */
|
||||
|
||||
mp_err mpl_xor(mp_int *a, mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_int *which, *other;
|
||||
mp_err res;
|
||||
unsigned int ix;
|
||||
|
||||
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
|
||||
|
||||
if(USED(a) >= USED(b)) {
|
||||
which = a;
|
||||
other = b;
|
||||
} else {
|
||||
which = b;
|
||||
other = a;
|
||||
}
|
||||
|
||||
if((res = mp_copy(which, c)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
for(ix = 0; ix < USED(which); ix++)
|
||||
DIGIT(c, ix) ^= DIGIT(other, ix);
|
||||
|
||||
s_mp_clamp(c);
|
||||
|
||||
return MP_OKAY;
|
||||
|
||||
} /* end mpl_xor() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/*
|
||||
mpl_rsh(a, b, d) - b = a >> d
|
||||
mpl_lsh(a, b, d) - b = a << d
|
||||
*/
|
||||
|
||||
/* {{{ mpl_rsh(a, b, d) */
|
||||
|
||||
mp_err mpl_rsh(const mp_int *a, mp_int *b, mp_digit d)
|
||||
{
|
||||
mp_err res;
|
||||
|
||||
ARGCHK(a != NULL && b != NULL, MP_BADARG);
|
||||
|
||||
if((res = mp_copy(a, b)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
s_mp_div_2d(b, d);
|
||||
|
||||
return MP_OKAY;
|
||||
|
||||
} /* end mpl_rsh() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mpl_lsh(a, b, d) */
|
||||
|
||||
mp_err mpl_lsh(const mp_int *a, mp_int *b, mp_digit d)
|
||||
{
|
||||
mp_err res;
|
||||
|
||||
ARGCHK(a != NULL && b != NULL, MP_BADARG);
|
||||
|
||||
if((res = mp_copy(a, b)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
return s_mp_mul_2d(b, d);
|
||||
|
||||
} /* end mpl_lsh() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/*
|
||||
mpl_num_set(a, num)
|
||||
|
||||
Count the number of set bits in the binary representation of a.
|
||||
Returns MP_OKAY and sets 'num' to be the number of such bits, if
|
||||
possible. If num is NULL, the result is thrown away, but it is
|
||||
not considered an error.
|
||||
|
||||
mpl_num_clear() does basically the same thing for clear bits.
|
||||
*/
|
||||
|
||||
/* {{{ mpl_num_set(a, num) */
|
||||
|
||||
mp_err mpl_num_set(mp_int *a, int *num)
|
||||
{
|
||||
unsigned int ix;
|
||||
int db, nset = 0;
|
||||
mp_digit cur;
|
||||
unsigned char reg;
|
||||
|
||||
ARGCHK(a != NULL, MP_BADARG);
|
||||
|
||||
for(ix = 0; ix < USED(a); ix++) {
|
||||
cur = DIGIT(a, ix);
|
||||
|
||||
for(db = 0; db < sizeof(mp_digit); db++) {
|
||||
reg = (unsigned char)(cur >> (CHAR_BIT * db));
|
||||
|
||||
nset += bitc[reg];
|
||||
}
|
||||
}
|
||||
|
||||
if(num)
|
||||
*num = nset;
|
||||
|
||||
return MP_OKAY;
|
||||
|
||||
} /* end mpl_num_set() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mpl_num_clear(a, num) */
|
||||
|
||||
mp_err mpl_num_clear(mp_int *a, int *num)
|
||||
{
|
||||
unsigned int ix;
|
||||
int db, nset = 0;
|
||||
mp_digit cur;
|
||||
unsigned char reg;
|
||||
|
||||
ARGCHK(a != NULL, MP_BADARG);
|
||||
|
||||
for(ix = 0; ix < USED(a); ix++) {
|
||||
cur = DIGIT(a, ix);
|
||||
|
||||
for(db = 0; db < sizeof(mp_digit); db++) {
|
||||
reg = (unsigned char)(cur >> (CHAR_BIT * db));
|
||||
|
||||
nset += bitc[UCHAR_MAX - reg];
|
||||
}
|
||||
}
|
||||
|
||||
if(num)
|
||||
*num = nset;
|
||||
|
||||
return MP_OKAY;
|
||||
|
||||
|
||||
} /* end mpl_num_clear() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/*
|
||||
mpl_parity(a)
|
||||
|
||||
Determines the bitwise parity of the value given. Returns MP_EVEN
|
||||
if an even number of digits are set, MP_ODD if an odd number are
|
||||
set.
|
||||
*/
|
||||
|
||||
/* {{{ mpl_parity(a) */
|
||||
|
||||
mp_err mpl_parity(mp_int *a)
|
||||
{
|
||||
unsigned int ix;
|
||||
int par = 0;
|
||||
mp_digit cur;
|
||||
|
||||
ARGCHK(a != NULL, MP_BADARG);
|
||||
|
||||
for(ix = 0; ix < USED(a); ix++) {
|
||||
int shft = (sizeof(mp_digit) * CHAR_BIT) / 2;
|
||||
|
||||
cur = DIGIT(a, ix);
|
||||
|
||||
/* Compute parity for current digit */
|
||||
while(shft != 0) {
|
||||
cur ^= (cur >> shft);
|
||||
shft >>= 1;
|
||||
}
|
||||
cur &= 1;
|
||||
|
||||
/* XOR with running parity so far */
|
||||
par ^= cur;
|
||||
}
|
||||
|
||||
if(par)
|
||||
return MP_ODD;
|
||||
else
|
||||
return MP_EVEN;
|
||||
|
||||
} /* end mpl_parity() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/*
|
||||
mpl_set_bit
|
||||
|
||||
Returns MP_OKAY or some error code.
|
||||
Grows a if needed to set a bit to 1.
|
||||
*/
|
||||
mp_err mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value)
|
||||
{
|
||||
mp_size ix;
|
||||
mp_err rv;
|
||||
mp_digit mask;
|
||||
|
||||
ARGCHK(a != NULL, MP_BADARG);
|
||||
|
||||
ix = bitNum / MP_DIGIT_BIT;
|
||||
if (ix + 1 > MP_USED(a)) {
|
||||
rv = s_mp_pad(a, ix + 1);
|
||||
if (rv != MP_OKAY)
|
||||
return rv;
|
||||
}
|
||||
|
||||
bitNum = bitNum % MP_DIGIT_BIT;
|
||||
mask = (mp_digit)1 << bitNum;
|
||||
if (value)
|
||||
MP_DIGIT(a,ix) |= mask;
|
||||
else
|
||||
MP_DIGIT(a,ix) &= ~mask;
|
||||
s_mp_clamp(a);
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
||||
/*
|
||||
mpl_get_bit
|
||||
|
||||
returns 0 or 1 or some (negative) error code.
|
||||
*/
|
||||
mp_err mpl_get_bit(const mp_int *a, mp_size bitNum)
|
||||
{
|
||||
mp_size bit, ix;
|
||||
mp_err rv;
|
||||
|
||||
ARGCHK(a != NULL, MP_BADARG);
|
||||
|
||||
ix = bitNum / MP_DIGIT_BIT;
|
||||
ARGCHK(ix <= MP_USED(a) - 1, MP_RANGE);
|
||||
|
||||
bit = bitNum % MP_DIGIT_BIT;
|
||||
rv = (mp_err)(MP_DIGIT(a, ix) >> bit) & 1;
|
||||
return rv;
|
||||
}
|
||||
|
||||
/*
|
||||
mpl_get_bits
|
||||
- Extracts numBits bits from a, where the least significant extracted bit
|
||||
is bit lsbNum. Returns a negative value if error occurs.
|
||||
- Because sign bit is used to indicate error, maximum number of bits to
|
||||
be returned is the lesser of (a) the number of bits in an mp_digit, or
|
||||
(b) one less than the number of bits in an mp_err.
|
||||
- lsbNum + numbits can be greater than the number of significant bits in
|
||||
integer a, as long as bit lsbNum is in the high order digit of a.
|
||||
*/
|
||||
mp_err mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits)
|
||||
{
|
||||
mp_size rshift = (lsbNum % MP_DIGIT_BIT);
|
||||
mp_size lsWndx = (lsbNum / MP_DIGIT_BIT);
|
||||
mp_digit * digit = MP_DIGITS(a) + lsWndx;
|
||||
mp_digit mask = ((1 << numBits) - 1);
|
||||
|
||||
ARGCHK(numBits < CHAR_BIT * sizeof mask, MP_BADARG);
|
||||
ARGCHK(MP_HOWMANY(lsbNum, MP_DIGIT_BIT) <= MP_USED(a), MP_RANGE);
|
||||
|
||||
if ((numBits + lsbNum % MP_DIGIT_BIT <= MP_DIGIT_BIT) ||
|
||||
(lsWndx + 1 >= MP_USED(a))) {
|
||||
mask &= (digit[0] >> rshift);
|
||||
} else {
|
||||
mask &= ((digit[0] >> rshift) | (digit[1] << (MP_DIGIT_BIT - rshift)));
|
||||
}
|
||||
return (mp_err)mask;
|
||||
}
|
||||
|
||||
/*
|
||||
mpl_significant_bits
|
||||
returns number of significnant bits in abs(a).
|
||||
returns 1 if value is zero.
|
||||
*/
|
||||
mp_err mpl_significant_bits(const mp_int *a)
|
||||
{
|
||||
mp_err bits = 0;
|
||||
int ix;
|
||||
|
||||
ARGCHK(a != NULL, MP_BADARG);
|
||||
|
||||
ix = MP_USED(a);
|
||||
for (ix = MP_USED(a); ix > 0; ) {
|
||||
mp_digit d;
|
||||
d = MP_DIGIT(a, --ix);
|
||||
if (d) {
|
||||
while (d) {
|
||||
++bits;
|
||||
d >>= 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
bits += ix * MP_DIGIT_BIT;
|
||||
if (!bits)
|
||||
bits = 1;
|
||||
return bits;
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* HERE THERE BE DRAGONS */
|
||||
@@ -1,85 +0,0 @@
|
||||
/*
|
||||
* mplogic.h
|
||||
*
|
||||
* Bitwise logical operations on MPI values
|
||||
*
|
||||
* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Michael J. Fromberger.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: mplogic.h,v 1.7 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
|
||||
|
||||
#ifndef _H_MPLOGIC_
|
||||
#define _H_MPLOGIC_
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
/*
|
||||
The logical operations treat an mp_int as if it were a bit vector,
|
||||
without regard to its sign (an mp_int is represented in a signed
|
||||
magnitude format). Values are treated as if they had an infinite
|
||||
string of zeros left of the most-significant bit.
|
||||
*/
|
||||
|
||||
/* Parity results */
|
||||
|
||||
#define MP_EVEN MP_YES
|
||||
#define MP_ODD MP_NO
|
||||
|
||||
/* Bitwise functions */
|
||||
|
||||
mp_err mpl_not(mp_int *a, mp_int *b); /* one's complement */
|
||||
mp_err mpl_and(mp_int *a, mp_int *b, mp_int *c); /* bitwise AND */
|
||||
mp_err mpl_or(mp_int *a, mp_int *b, mp_int *c); /* bitwise OR */
|
||||
mp_err mpl_xor(mp_int *a, mp_int *b, mp_int *c); /* bitwise XOR */
|
||||
|
||||
/* Shift functions */
|
||||
|
||||
mp_err mpl_rsh(const mp_int *a, mp_int *b, mp_digit d); /* right shift */
|
||||
mp_err mpl_lsh(const mp_int *a, mp_int *b, mp_digit d); /* left shift */
|
||||
|
||||
/* Bit count and parity */
|
||||
|
||||
mp_err mpl_num_set(mp_int *a, int *num); /* count set bits */
|
||||
mp_err mpl_num_clear(mp_int *a, int *num); /* count clear bits */
|
||||
mp_err mpl_parity(mp_int *a); /* determine parity */
|
||||
|
||||
/* Get & Set the value of a bit */
|
||||
|
||||
mp_err mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value);
|
||||
mp_err mpl_get_bit(const mp_int *a, mp_size bitNum);
|
||||
mp_err mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits);
|
||||
mp_err mpl_significant_bits(const mp_int *a);
|
||||
|
||||
#endif /* end _H_MPLOGIC_ */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,626 +0,0 @@
|
||||
/*
|
||||
* mpprime.c
|
||||
*
|
||||
* Utilities for finding and working with prime and pseudo-prime
|
||||
* integers
|
||||
*
|
||||
* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Michael J. Fromberger.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1997
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Netscape Communications Corporation
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "mpi-priv.h"
|
||||
#include "mpprime.h"
|
||||
#include "mplogic.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define SMALL_TABLE 0 /* determines size of hard-wired prime table */
|
||||
|
||||
#define RANDOM() rand()
|
||||
|
||||
#include "primes.c" /* pull in the prime digit table */
|
||||
|
||||
/*
|
||||
Test if any of a given vector of digits divides a. If not, MP_NO
|
||||
is returned; otherwise, MP_YES is returned and 'which' is set to
|
||||
the index of the integer in the vector which divided a.
|
||||
*/
|
||||
mp_err s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which);
|
||||
|
||||
/* {{{ mpp_divis(a, b) */
|
||||
|
||||
/*
|
||||
mpp_divis(a, b)
|
||||
|
||||
Returns MP_YES if a is divisible by b, or MP_NO if it is not.
|
||||
*/
|
||||
|
||||
mp_err mpp_divis(mp_int *a, mp_int *b)
|
||||
{
|
||||
mp_err res;
|
||||
mp_int rem;
|
||||
|
||||
if((res = mp_init(&rem)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
if((res = mp_mod(a, b, &rem)) != MP_OKAY)
|
||||
goto CLEANUP;
|
||||
|
||||
if(mp_cmp_z(&rem) == 0)
|
||||
res = MP_YES;
|
||||
else
|
||||
res = MP_NO;
|
||||
|
||||
CLEANUP:
|
||||
mp_clear(&rem);
|
||||
return res;
|
||||
|
||||
} /* end mpp_divis() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mpp_divis_d(a, d) */
|
||||
|
||||
/*
|
||||
mpp_divis_d(a, d)
|
||||
|
||||
Return MP_YES if a is divisible by d, or MP_NO if it is not.
|
||||
*/
|
||||
|
||||
mp_err mpp_divis_d(mp_int *a, mp_digit d)
|
||||
{
|
||||
mp_err res;
|
||||
mp_digit rem;
|
||||
|
||||
ARGCHK(a != NULL, MP_BADARG);
|
||||
|
||||
if(d == 0)
|
||||
return MP_NO;
|
||||
|
||||
if((res = mp_mod_d(a, d, &rem)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
if(rem == 0)
|
||||
return MP_YES;
|
||||
else
|
||||
return MP_NO;
|
||||
|
||||
} /* end mpp_divis_d() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mpp_random(a) */
|
||||
|
||||
/*
|
||||
mpp_random(a)
|
||||
|
||||
Assigns a random value to a. This value is generated using the
|
||||
standard C library's rand() function, so it should not be used for
|
||||
cryptographic purposes, but it should be fine for primality testing,
|
||||
since all we really care about there is good statistical properties.
|
||||
|
||||
As many digits as a currently has are filled with random digits.
|
||||
*/
|
||||
|
||||
mp_err mpp_random(mp_int *a)
|
||||
|
||||
{
|
||||
mp_digit next = 0;
|
||||
unsigned int ix, jx;
|
||||
|
||||
ARGCHK(a != NULL, MP_BADARG);
|
||||
|
||||
for(ix = 0; ix < USED(a); ix++) {
|
||||
for(jx = 0; jx < sizeof(mp_digit); jx++) {
|
||||
next = (next << CHAR_BIT) | (RANDOM() & UCHAR_MAX);
|
||||
}
|
||||
DIGIT(a, ix) = next;
|
||||
}
|
||||
|
||||
return MP_OKAY;
|
||||
|
||||
} /* end mpp_random() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mpp_random_size(a, prec) */
|
||||
|
||||
mp_err mpp_random_size(mp_int *a, mp_size prec)
|
||||
{
|
||||
mp_err res;
|
||||
|
||||
ARGCHK(a != NULL && prec > 0, MP_BADARG);
|
||||
|
||||
if((res = s_mp_pad(a, prec)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
return mpp_random(a);
|
||||
|
||||
} /* end mpp_random_size() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mpp_divis_vector(a, vec, size, which) */
|
||||
|
||||
/*
|
||||
mpp_divis_vector(a, vec, size, which)
|
||||
|
||||
Determines if a is divisible by any of the 'size' digits in vec.
|
||||
Returns MP_YES and sets 'which' to the index of the offending digit,
|
||||
if it is; returns MP_NO if it is not.
|
||||
*/
|
||||
|
||||
mp_err mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which)
|
||||
{
|
||||
ARGCHK(a != NULL && vec != NULL && size > 0, MP_BADARG);
|
||||
|
||||
return s_mpp_divp(a, vec, size, which);
|
||||
|
||||
} /* end mpp_divis_vector() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mpp_divis_primes(a, np) */
|
||||
|
||||
/*
|
||||
mpp_divis_primes(a, np)
|
||||
|
||||
Test whether a is divisible by any of the first 'np' primes. If it
|
||||
is, returns MP_YES and sets *np to the value of the digit that did
|
||||
it. If not, returns MP_NO.
|
||||
*/
|
||||
mp_err mpp_divis_primes(mp_int *a, mp_digit *np)
|
||||
{
|
||||
int size, which;
|
||||
mp_err res;
|
||||
|
||||
ARGCHK(a != NULL && np != NULL, MP_BADARG);
|
||||
|
||||
size = (int)*np;
|
||||
if(size > prime_tab_size)
|
||||
size = prime_tab_size;
|
||||
|
||||
res = mpp_divis_vector(a, prime_tab, size, &which);
|
||||
if(res == MP_YES)
|
||||
*np = prime_tab[which];
|
||||
|
||||
return res;
|
||||
|
||||
} /* end mpp_divis_primes() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mpp_fermat(a, w) */
|
||||
|
||||
/*
|
||||
Using w as a witness, try pseudo-primality testing based on Fermat's
|
||||
little theorem. If a is prime, and (w, a) = 1, then w^a == w (mod
|
||||
a). So, we compute z = w^a (mod a) and compare z to w; if they are
|
||||
equal, the test passes and we return MP_YES. Otherwise, we return
|
||||
MP_NO.
|
||||
*/
|
||||
mp_err mpp_fermat(mp_int *a, mp_digit w)
|
||||
{
|
||||
mp_int base, test;
|
||||
mp_err res;
|
||||
|
||||
if((res = mp_init(&base)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
mp_set(&base, w);
|
||||
|
||||
if((res = mp_init(&test)) != MP_OKAY)
|
||||
goto TEST;
|
||||
|
||||
/* Compute test = base^a (mod a) */
|
||||
if((res = mp_exptmod(&base, a, a, &test)) != MP_OKAY)
|
||||
goto CLEANUP;
|
||||
|
||||
|
||||
if(mp_cmp(&base, &test) == 0)
|
||||
res = MP_YES;
|
||||
else
|
||||
res = MP_NO;
|
||||
|
||||
CLEANUP:
|
||||
mp_clear(&test);
|
||||
TEST:
|
||||
mp_clear(&base);
|
||||
|
||||
return res;
|
||||
|
||||
} /* end mpp_fermat() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/*
|
||||
Perform the fermat test on each of the primes in a list until
|
||||
a) one of them shows a is not prime, or
|
||||
b) the list is exhausted.
|
||||
Returns: MP_YES if it passes tests.
|
||||
MP_NO if fermat test reveals it is composite
|
||||
Some MP error code if some other error occurs.
|
||||
*/
|
||||
mp_err mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes)
|
||||
{
|
||||
mp_err rv = MP_YES;
|
||||
|
||||
while (nPrimes-- > 0 && rv == MP_YES) {
|
||||
rv = mpp_fermat(a, *primes++);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
/* {{{ mpp_pprime(a, nt) */
|
||||
|
||||
/*
|
||||
mpp_pprime(a, nt)
|
||||
|
||||
Performs nt iteration of the Miller-Rabin probabilistic primality
|
||||
test on a. Returns MP_YES if the tests pass, MP_NO if one fails.
|
||||
If MP_NO is returned, the number is definitely composite. If MP_YES
|
||||
is returned, it is probably prime (but that is not guaranteed).
|
||||
*/
|
||||
|
||||
mp_err mpp_pprime(mp_int *a, int nt)
|
||||
{
|
||||
mp_err res;
|
||||
mp_int x, amo, m, z; /* "amo" = "a minus one" */
|
||||
int iter;
|
||||
unsigned int jx;
|
||||
mp_size b;
|
||||
|
||||
ARGCHK(a != NULL, MP_BADARG);
|
||||
|
||||
MP_DIGITS(&x) = 0;
|
||||
MP_DIGITS(&amo) = 0;
|
||||
MP_DIGITS(&m) = 0;
|
||||
MP_DIGITS(&z) = 0;
|
||||
|
||||
/* Initialize temporaries... */
|
||||
MP_CHECKOK( mp_init(&amo));
|
||||
/* Compute amo = a - 1 for what follows... */
|
||||
MP_CHECKOK( mp_sub_d(a, 1, &amo) );
|
||||
|
||||
b = mp_trailing_zeros(&amo);
|
||||
if (!b) { /* a was even ? */
|
||||
res = MP_NO;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
MP_CHECKOK( mp_init_size(&x, MP_USED(a)) );
|
||||
MP_CHECKOK( mp_init(&z) );
|
||||
MP_CHECKOK( mp_init(&m) );
|
||||
MP_CHECKOK( mp_div_2d(&amo, b, &m, 0) );
|
||||
|
||||
/* Do the test nt times... */
|
||||
for(iter = 0; iter < nt; iter++) {
|
||||
|
||||
/* Choose a random value for x < a */
|
||||
s_mp_pad(&x, USED(a));
|
||||
mpp_random(&x);
|
||||
MP_CHECKOK( mp_mod(&x, a, &x) );
|
||||
|
||||
/* Compute z = (x ** m) mod a */
|
||||
MP_CHECKOK( mp_exptmod(&x, &m, a, &z) );
|
||||
|
||||
if(mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) {
|
||||
res = MP_YES;
|
||||
continue;
|
||||
}
|
||||
|
||||
res = MP_NO; /* just in case the following for loop never executes. */
|
||||
for (jx = 1; jx < b; jx++) {
|
||||
/* z = z^2 (mod a) */
|
||||
MP_CHECKOK( mp_sqrmod(&z, a, &z) );
|
||||
res = MP_NO; /* previous line set res to MP_YES */
|
||||
|
||||
if(mp_cmp_d(&z, 1) == 0) {
|
||||
break;
|
||||
}
|
||||
if(mp_cmp(&z, &amo) == 0) {
|
||||
res = MP_YES;
|
||||
break;
|
||||
}
|
||||
} /* end testing loop */
|
||||
|
||||
/* If the test passes, we will continue iterating, but a failed
|
||||
test means the candidate is definitely NOT prime, so we will
|
||||
immediately break out of this loop
|
||||
*/
|
||||
if(res == MP_NO)
|
||||
break;
|
||||
|
||||
} /* end iterations loop */
|
||||
|
||||
CLEANUP:
|
||||
mp_clear(&m);
|
||||
mp_clear(&z);
|
||||
mp_clear(&x);
|
||||
mp_clear(&amo);
|
||||
return res;
|
||||
|
||||
} /* end mpp_pprime() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/* Produce table of composites from list of primes and trial value.
|
||||
** trial must be odd. List of primes must not include 2.
|
||||
** sieve should have dimension >= MAXPRIME/2, where MAXPRIME is largest
|
||||
** prime in list of primes. After this function is finished,
|
||||
** if sieve[i] is non-zero, then (trial + 2*i) is composite.
|
||||
** Each prime used in the sieve costs one division of trial, and eliminates
|
||||
** one or more values from the search space. (3 eliminates 1/3 of the values
|
||||
** alone!) Each value left in the search space costs 1 or more modular
|
||||
** exponentations. So, these divisions are a bargain!
|
||||
*/
|
||||
mp_err mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
|
||||
unsigned char *sieve, mp_size nSieve)
|
||||
{
|
||||
mp_err res;
|
||||
mp_digit rem;
|
||||
mp_size ix;
|
||||
unsigned long offset;
|
||||
|
||||
memset(sieve, 0, nSieve);
|
||||
|
||||
for(ix = 0; ix < nPrimes; ix++) {
|
||||
mp_digit prime = primes[ix];
|
||||
mp_size i;
|
||||
if((res = mp_mod_d(trial, prime, &rem)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
if (rem == 0) {
|
||||
offset = 0;
|
||||
} else {
|
||||
offset = prime - (rem / 2);
|
||||
}
|
||||
for (i = offset; i < nSieve ; i += prime) {
|
||||
sieve[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
||||
#define SIEVE_SIZE 32*1024
|
||||
|
||||
mp_err mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong,
|
||||
unsigned long * nTries)
|
||||
{
|
||||
mp_digit np;
|
||||
mp_err res;
|
||||
int i = 0;
|
||||
mp_int trial;
|
||||
mp_int q;
|
||||
mp_size num_tests;
|
||||
/*
|
||||
* Always make sieve the last variabale allocated so that
|
||||
* Mac builds don't break by adding an extra variable
|
||||
* on the stack. -javi
|
||||
*/
|
||||
#if defined(macintosh) || defined (XP_OS2) \
|
||||
|| (defined(HPUX) && defined(__ia64))
|
||||
unsigned char *sieve;
|
||||
|
||||
sieve = malloc(SIEVE_SIZE);
|
||||
ARGCHK(sieve != NULL, MP_MEM);
|
||||
#else
|
||||
unsigned char sieve[SIEVE_SIZE];
|
||||
#endif
|
||||
|
||||
ARGCHK(start != 0, MP_BADARG);
|
||||
ARGCHK(nBits > 16, MP_RANGE);
|
||||
|
||||
MP_DIGITS(&trial) = 0;
|
||||
MP_DIGITS(&q) = 0;
|
||||
MP_CHECKOK( mp_init(&trial) );
|
||||
MP_CHECKOK( mp_init(&q) );
|
||||
/* values taken from table 4.4, HandBook of Applied Cryptography */
|
||||
if (nBits >= 1300) {
|
||||
num_tests = 2;
|
||||
} else if (nBits >= 850) {
|
||||
num_tests = 3;
|
||||
} else if (nBits >= 650) {
|
||||
num_tests = 4;
|
||||
} else if (nBits >= 550) {
|
||||
num_tests = 5;
|
||||
} else if (nBits >= 450) {
|
||||
num_tests = 6;
|
||||
} else if (nBits >= 400) {
|
||||
num_tests = 7;
|
||||
} else if (nBits >= 350) {
|
||||
num_tests = 8;
|
||||
} else if (nBits >= 300) {
|
||||
num_tests = 9;
|
||||
} else if (nBits >= 250) {
|
||||
num_tests = 12;
|
||||
} else if (nBits >= 200) {
|
||||
num_tests = 15;
|
||||
} else if (nBits >= 150) {
|
||||
num_tests = 18;
|
||||
} else if (nBits >= 100) {
|
||||
num_tests = 27;
|
||||
} else
|
||||
num_tests = 50;
|
||||
|
||||
if (strong)
|
||||
--nBits;
|
||||
MP_CHECKOK( mpl_set_bit(start, nBits - 1, 1) );
|
||||
MP_CHECKOK( mpl_set_bit(start, 0, 1) );
|
||||
for (i = mpl_significant_bits(start) - 1; i >= nBits; --i) {
|
||||
MP_CHECKOK( mpl_set_bit(start, i, 0) );
|
||||
}
|
||||
/* start sieveing with prime value of 3. */
|
||||
MP_CHECKOK(mpp_sieve(start, prime_tab + 1, prime_tab_size - 1,
|
||||
sieve, SIEVE_SIZE) );
|
||||
|
||||
#ifdef DEBUG_SIEVE
|
||||
res = 0;
|
||||
for (i = 0; i < SIEVE_SIZE; ++i) {
|
||||
if (!sieve[i])
|
||||
++res;
|
||||
}
|
||||
fprintf(stderr,"sieve found %d potential primes.\n", res);
|
||||
#define FPUTC(x,y) fputc(x,y)
|
||||
#else
|
||||
#define FPUTC(x,y)
|
||||
#endif
|
||||
|
||||
res = MP_NO;
|
||||
for(i = 0; i < SIEVE_SIZE; ++i) {
|
||||
if (sieve[i]) /* this number is composite */
|
||||
continue;
|
||||
MP_CHECKOK( mp_add_d(start, 2 * i, &trial) );
|
||||
FPUTC('.', stderr);
|
||||
/* run a Fermat test */
|
||||
res = mpp_fermat(&trial, 2);
|
||||
if (res != MP_OKAY) {
|
||||
if (res == MP_NO)
|
||||
continue; /* was composite */
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
FPUTC('+', stderr);
|
||||
/* If that passed, run some Miller-Rabin tests */
|
||||
res = mpp_pprime(&trial, num_tests);
|
||||
if (res != MP_OKAY) {
|
||||
if (res == MP_NO)
|
||||
continue; /* was composite */
|
||||
goto CLEANUP;
|
||||
}
|
||||
FPUTC('!', stderr);
|
||||
|
||||
if (!strong)
|
||||
break; /* success !! */
|
||||
|
||||
/* At this point, we have strong evidence that our candidate
|
||||
is itself prime. If we want a strong prime, we need now
|
||||
to test q = 2p + 1 for primality...
|
||||
*/
|
||||
MP_CHECKOK( mp_mul_2(&trial, &q) );
|
||||
MP_CHECKOK( mp_add_d(&q, 1, &q) );
|
||||
|
||||
/* Test q for small prime divisors ... */
|
||||
np = prime_tab_size;
|
||||
res = mpp_divis_primes(&q, &np);
|
||||
if (res == MP_YES) { /* is composite */
|
||||
mp_clear(&q);
|
||||
continue;
|
||||
}
|
||||
if (res != MP_NO)
|
||||
goto CLEANUP;
|
||||
|
||||
/* And test with Fermat, as with its parent ... */
|
||||
res = mpp_fermat(&q, 2);
|
||||
if (res != MP_YES) {
|
||||
mp_clear(&q);
|
||||
if (res == MP_NO)
|
||||
continue; /* was composite */
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* And test with Miller-Rabin, as with its parent ... */
|
||||
res = mpp_pprime(&q, num_tests);
|
||||
if (res != MP_YES) {
|
||||
mp_clear(&q);
|
||||
if (res == MP_NO)
|
||||
continue; /* was composite */
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* If it passed, we've got a winner */
|
||||
mp_exch(&q, &trial);
|
||||
mp_clear(&q);
|
||||
break;
|
||||
|
||||
} /* end of loop through sieved values */
|
||||
if (res == MP_YES)
|
||||
mp_exch(&trial, start);
|
||||
CLEANUP:
|
||||
mp_clear(&trial);
|
||||
mp_clear(&q);
|
||||
if (nTries)
|
||||
*nTries += i;
|
||||
#if defined(macintosh) || defined(XP_OS2) \
|
||||
|| (defined(HPUX) && defined(__ia64))
|
||||
if (sieve != NULL) {
|
||||
memset(sieve, 0, SIEVE_SIZE);
|
||||
free (sieve);
|
||||
}
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
/*========================================================================*/
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* Static functions visible only to the library internally */
|
||||
|
||||
/* {{{ s_mpp_divp(a, vec, size, which) */
|
||||
|
||||
/*
|
||||
Test for divisibility by members of a vector of digits. Returns
|
||||
MP_NO if a is not divisible by any of them; returns MP_YES and sets
|
||||
'which' to the index of the offender, if it is. Will stop on the
|
||||
first digit against which a is divisible.
|
||||
*/
|
||||
|
||||
mp_err s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which)
|
||||
{
|
||||
mp_err res;
|
||||
mp_digit rem;
|
||||
|
||||
int ix;
|
||||
|
||||
for(ix = 0; ix < size; ix++) {
|
||||
if((res = mp_mod_d(a, vec[ix], &rem)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
if(rem == 0) {
|
||||
if(which)
|
||||
*which = ix;
|
||||
return MP_YES;
|
||||
}
|
||||
}
|
||||
|
||||
return MP_NO;
|
||||
|
||||
} /* end s_mpp_divp() */
|
||||
|
||||
/* }}} */
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* HERE THERE BE DRAGONS */
|
||||
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
* mpprime.h
|
||||
*
|
||||
* Utilities for finding and working with prime and pseudo-prime
|
||||
* integers
|
||||
*
|
||||
* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Michael J. Fromberger.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1997
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef _H_MP_PRIME_
|
||||
#define _H_MP_PRIME_
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
extern const int prime_tab_size; /* number of primes available */
|
||||
extern const mp_digit prime_tab[];
|
||||
|
||||
/* Tests for divisibility */
|
||||
mp_err mpp_divis(mp_int *a, mp_int *b);
|
||||
mp_err mpp_divis_d(mp_int *a, mp_digit d);
|
||||
|
||||
/* Random selection */
|
||||
mp_err mpp_random(mp_int *a);
|
||||
mp_err mpp_random_size(mp_int *a, mp_size prec);
|
||||
|
||||
/* Pseudo-primality testing */
|
||||
mp_err mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which);
|
||||
mp_err mpp_divis_primes(mp_int *a, mp_digit *np);
|
||||
mp_err mpp_fermat(mp_int *a, mp_digit w);
|
||||
mp_err mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes);
|
||||
mp_err mpp_pprime(mp_int *a, int nt);
|
||||
mp_err mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
|
||||
unsigned char *sieve, mp_size nSieve);
|
||||
mp_err mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong,
|
||||
unsigned long * nTries);
|
||||
|
||||
#endif /* end _H_MP_PRIME_ */
|
||||
@@ -1,253 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is a SPARC/VIS optimized multiply and add function.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Sun Microsystems Inc.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1999-2000
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: mpv_sparc.c,v 1.4 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
|
||||
|
||||
#include "vis_proto.h"
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
typedef int t_s32;
|
||||
typedef unsigned int t_u32;
|
||||
#if defined(__sparcv9)
|
||||
typedef long t_s64;
|
||||
typedef unsigned long t_u64;
|
||||
#else
|
||||
typedef long long t_s64;
|
||||
typedef unsigned long long t_u64;
|
||||
#endif
|
||||
typedef double t_d64;
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
typedef union {
|
||||
t_d64 d64;
|
||||
struct {
|
||||
t_s32 i0;
|
||||
t_s32 i1;
|
||||
} i32s;
|
||||
} d64_2_i32;
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#define BUFF_SIZE 256
|
||||
|
||||
#define A_BITS 19
|
||||
#define A_MASK ((1 << A_BITS) - 1)
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
static t_u64 mask_cnst[] = {
|
||||
0x8000000080000000ull
|
||||
};
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#define DEF_VARS(N) \
|
||||
t_d64 *py = (t_d64*)y; \
|
||||
t_d64 mask = *((t_d64*)mask_cnst); \
|
||||
t_d64 ca = (1u << 31) - 1; \
|
||||
t_d64 da = (t_d64)a; \
|
||||
t_s64 buff[N], s; \
|
||||
d64_2_i32 dy
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#define MUL_U32_S64_2(i) \
|
||||
dy.d64 = vis_fxnor(mask, py[i]); \
|
||||
buff[2*(i) ] = (ca - (t_d64)dy.i32s.i0) * da; \
|
||||
buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da
|
||||
|
||||
#define MUL_U32_S64_2_D(i) \
|
||||
dy.d64 = vis_fxnor(mask, py[i]); \
|
||||
d0 = ca - (t_d64)dy.i32s.i0; \
|
||||
d1 = ca - (t_d64)dy.i32s.i1; \
|
||||
buff[4*(i) ] = (t_s64)(d0 * da); \
|
||||
buff[4*(i)+1] = (t_s64)(d0 * db); \
|
||||
buff[4*(i)+2] = (t_s64)(d1 * da); \
|
||||
buff[4*(i)+3] = (t_s64)(d1 * db)
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#define ADD_S64_U32(i) \
|
||||
s = buff[i] + x[i] + c; \
|
||||
z[i] = s; \
|
||||
c = (s >> 32)
|
||||
|
||||
#define ADD_S64_U32_D(i) \
|
||||
s = buff[2*(i)] +(((t_s64)(buff[2*(i)+1]))<<A_BITS) + x[i] + uc; \
|
||||
z[i] = s; \
|
||||
uc = ((t_u64)s >> 32)
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#define MUL_U32_S64_8(i) \
|
||||
MUL_U32_S64_2(i); \
|
||||
MUL_U32_S64_2(i+1); \
|
||||
MUL_U32_S64_2(i+2); \
|
||||
MUL_U32_S64_2(i+3)
|
||||
|
||||
#define MUL_U32_S64_D_8(i) \
|
||||
MUL_U32_S64_2_D(i); \
|
||||
MUL_U32_S64_2_D(i+1); \
|
||||
MUL_U32_S64_2_D(i+2); \
|
||||
MUL_U32_S64_2_D(i+3)
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#define ADD_S64_U32_8(i) \
|
||||
ADD_S64_U32(i); \
|
||||
ADD_S64_U32(i+1); \
|
||||
ADD_S64_U32(i+2); \
|
||||
ADD_S64_U32(i+3); \
|
||||
ADD_S64_U32(i+4); \
|
||||
ADD_S64_U32(i+5); \
|
||||
ADD_S64_U32(i+6); \
|
||||
ADD_S64_U32(i+7)
|
||||
|
||||
#define ADD_S64_U32_D_8(i) \
|
||||
ADD_S64_U32_D(i); \
|
||||
ADD_S64_U32_D(i+1); \
|
||||
ADD_S64_U32_D(i+2); \
|
||||
ADD_S64_U32_D(i+3); \
|
||||
ADD_S64_U32_D(i+4); \
|
||||
ADD_S64_U32_D(i+5); \
|
||||
ADD_S64_U32_D(i+6); \
|
||||
ADD_S64_U32_D(i+7)
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
|
||||
{
|
||||
if (a < (1 << A_BITS)) {
|
||||
|
||||
if (n == 8) {
|
||||
DEF_VARS(8);
|
||||
t_s32 c = 0;
|
||||
|
||||
MUL_U32_S64_8(0);
|
||||
ADD_S64_U32_8(0);
|
||||
|
||||
return c;
|
||||
|
||||
} else if (n == 16) {
|
||||
DEF_VARS(16);
|
||||
t_s32 c = 0;
|
||||
|
||||
MUL_U32_S64_8(0);
|
||||
MUL_U32_S64_8(4);
|
||||
ADD_S64_U32_8(0);
|
||||
ADD_S64_U32_8(8);
|
||||
|
||||
return c;
|
||||
|
||||
} else {
|
||||
DEF_VARS(BUFF_SIZE);
|
||||
t_s32 i, c = 0;
|
||||
|
||||
#pragma pipeloop(0)
|
||||
for (i = 0; i < (n+1)/2; i ++) {
|
||||
MUL_U32_S64_2(i);
|
||||
}
|
||||
|
||||
#pragma pipeloop(0)
|
||||
for (i = 0; i < n; i ++) {
|
||||
ADD_S64_U32(i);
|
||||
}
|
||||
|
||||
return c;
|
||||
|
||||
}
|
||||
} else {
|
||||
|
||||
if (n == 8) {
|
||||
DEF_VARS(2*8);
|
||||
t_d64 d0, d1, db;
|
||||
t_u32 uc = 0;
|
||||
|
||||
da = (t_d64)(a & A_MASK);
|
||||
db = (t_d64)(a >> A_BITS);
|
||||
|
||||
MUL_U32_S64_D_8(0);
|
||||
ADD_S64_U32_D_8(0);
|
||||
|
||||
return uc;
|
||||
|
||||
} else if (n == 16) {
|
||||
DEF_VARS(2*16);
|
||||
t_d64 d0, d1, db;
|
||||
t_u32 uc = 0;
|
||||
|
||||
da = (t_d64)(a & A_MASK);
|
||||
db = (t_d64)(a >> A_BITS);
|
||||
|
||||
MUL_U32_S64_D_8(0);
|
||||
MUL_U32_S64_D_8(4);
|
||||
ADD_S64_U32_D_8(0);
|
||||
ADD_S64_U32_D_8(8);
|
||||
|
||||
return uc;
|
||||
|
||||
} else {
|
||||
DEF_VARS(2*BUFF_SIZE);
|
||||
t_d64 d0, d1, db;
|
||||
t_u32 i, uc = 0;
|
||||
|
||||
da = (t_d64)(a & A_MASK);
|
||||
db = (t_d64)(a >> A_BITS);
|
||||
|
||||
#pragma pipeloop(0)
|
||||
for (i = 0; i < (n+1)/2; i ++) {
|
||||
MUL_U32_S64_2_D(i);
|
||||
}
|
||||
|
||||
#pragma pipeloop(0)
|
||||
for (i = 0; i < n; i ++) {
|
||||
ADD_S64_U32_D(i);
|
||||
}
|
||||
|
||||
return uc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
t_u32 mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
|
||||
{
|
||||
return mul_add(x, x, y, n, a);
|
||||
}
|
||||
|
||||
/***************************************************************/
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user