/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
 * All rights reserved.
 *
 * This package is an SSL implementation written
 * by Eric Young (eay@cryptsoft.com).
 * The implementation was written so as to conform with Netscapes SSL.
 * 
 * This library is free for commercial and non-commercial use as long as
 * the following conditions are aheared to.  The following conditions
 * apply to all code found in this distribution, be it the RC4, RSA,
 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
 * included with this distribution is covered by the same copyright terms
 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
 * 
 * Copyright remains Eric Young's, and as such any Copyright notices in
 * the code are not to be removed.
 * If this package is used in a product, Eric Young should be given attribution
 * as the author of the parts of the library used.
 * This can be in the form of a textual message at program startup or
 * in documentation (online or textual) provided with the package.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *    "This product includes cryptographic software written by
 *     Eric Young (eay@cryptsoft.com)"
 *    The word 'cryptographic' can be left out if the rouines from the library
 *    being used are not cryptographic related :-).
 * 4. If you include any Windows specific code (or a derivative thereof) from 
 *    the apps directory (application code) you must include an acknowledgement:
 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
 * 
 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * 
 * The licence and distribution terms for any publically available version or
 * derivative of this code cannot be changed.  i.e. this code cannot simply be
 * copied and put under another distribution licence
 * [including the GNU Public Licence.]
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "bn_lcl.h"

/* crypto/bn/bn_add.c */

/* r can == a or b */
int BN_add(BIGNUM *r, BIGNUM *a, BIGNUM *b)
	{
	BIGNUM *tmp;

	bn_check_top(a);
	bn_check_top(b);

	/*  a +  b	a+b
	 *  a + -b	a-b
	 * -a +  b	b-a
	 * -a + -b	-(a+b)
	 */
	if (a->neg ^ b->neg)
		{
		/* only one is negative */
		if (a->neg)
			{ tmp=a; a=b; b=tmp; }

		/* we are now a - b */

		if (BN_ucmp(a,b) < 0)
			{
			if (!BN_usub(r,b,a)) return(0);
			r->neg=1;
			}
		else
			{
			if (!BN_usub(r,a,b)) return(0);
			r->neg=0;
			}
		return(1);
		}

	if (a->neg) /* both are neg */
		r->neg=1;
	else
		r->neg=0;

	if (!BN_uadd(r,a,b)) return(0);
	return(1);
	}

/* unsigned add of b to a, r must be large enough */
int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
	{
	register int i;
	int max,min;
	BN_ULONG *ap,*bp,*rp,carry,t1;
	const BIGNUM *tmp;

	bn_check_top(a);
	bn_check_top(b);

	if (a->top < b->top)
		{ tmp=a; a=b; b=tmp; }
	max=a->top;
	min=b->top;

	if (bn_wexpand(r,max+1) == NULL)
		return(0);

	r->top=max;


	ap=a->d;
	bp=b->d;
	rp=r->d;
	carry=0;

	carry=bn_add_words(rp,ap,bp,min);
	rp+=min;
	ap+=min;
	bp+=min;
	i=min;

	if (carry)
		{
		while (i < max)
			{
			i++;
			t1= *(ap++);
			if ((*(rp++)=(t1+1)&BN_MASK2) >= t1)
				{
				carry=0;
				break;
				}
			}
		if ((i >= max) && carry)
			{
			*(rp++)=1;
			r->top++;
			}
		}
	if (rp != ap)
		{
		for (; i<max; i++)
			*(rp++)= *(ap++);
		}
	/* memcpy(rp,ap,sizeof(*ap)*(max-i));*/
	return(1);
	}

/* unsigned subtraction of b from a, a must be larger than b. */
int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
	{
	int max,min;
	register BN_ULONG t1,t2,*ap,*bp,*rp;
	int i,carry;

	bn_check_top(a);
	bn_check_top(b);

	if (a->top < b->top) /* hmm... should not be happening */
		{
		return(0);
		}

	max=a->top;
	min=b->top;
	if (bn_wexpand(r,max) == NULL) return(0);

	ap=a->d;
	bp=b->d;
	rp=r->d;

	carry=0;
	for (i=0; i<min; i++)
		{
		t1= *(ap++);
		t2= *(bp++);
		if (carry)
			{
			carry=(t1 <= t2);
			t1=(t1-t2-1)&BN_MASK2;
			}
		else
			{
			carry=(t1 < t2);
			t1=(t1-t2)&BN_MASK2;
			}
		*(rp++)=t1&BN_MASK2;
		}
	if (carry) /* subtracted */
		{
		while (i < max)
			{
			i++;
			t1= *(ap++);
			t2=(t1-1)&BN_MASK2;
			*(rp++)=t2;
			if (t1 > t2) break;
			}
		}
	if (rp != ap)
		{
		for (;;)
			{
			if (i++ >= max) break;
			rp[0]=ap[0];
			if (i++ >= max) break;
			rp[1]=ap[1];
			if (i++ >= max) break;
			rp[2]=ap[2];
			if (i++ >= max) break;
			rp[3]=ap[3];
			rp+=4;
			ap+=4;
			}
		}
	r->top=max;
	bn_fix_top(r);
	return(1);
	}

int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
	{
	int max;
	int add=0,neg=0;
	const BIGNUM *tmp;

	bn_check_top(a);
	bn_check_top(b);

	/*  a -  b	a-b
	 *  a - -b	a+b
	 * -a -  b	-(a+b)
	 * -a - -b	b-a
	 */
	if (a->neg)
		{
		if (b->neg)
			{ tmp=a; a=b; b=tmp; }
		else
			{ add=1; neg=1; }
		}
	else
		{
		if (b->neg) { add=1; neg=0; }
		}

	if (add)
		{
		if (!BN_uadd(r,a,b)) return(0);
		r->neg=neg;
		return(1);
		}

	/* We are actually doing a - b :-) */

	max=(a->top > b->top)?a->top:b->top;
	if (bn_wexpand(r,max) == NULL) return(0);
	if (BN_ucmp(a,b) < 0)
		{
		if (!BN_usub(r,b,a)) return(0);
		r->neg=1;
		}
	else
		{
		if (!BN_usub(r,a,b)) return(0);
		r->neg=0;
		}
	return(1);
	}

/* crypto/bn/bn_asm.c */

BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
	{
	BN_ULONG c=0;
	BN_ULONG bl,bh;

	bn_check_num(num);
	if (num <= 0) return((BN_ULONG)0);

	bl=LBITS(w);
	bh=HBITS(w);

	for (;;)
		{
		mul_add(rp[0],ap[0],bl,bh,c);
		if (--num == 0) break;
		mul_add(rp[1],ap[1],bl,bh,c);
		if (--num == 0) break;
		mul_add(rp[2],ap[2],bl,bh,c);
		if (--num == 0) break;
		mul_add(rp[3],ap[3],bl,bh,c);
		if (--num == 0) break;
		ap+=4;
		rp+=4;
		}
	return(c);
	} 

BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
	{
	BN_ULONG carry=0;
	BN_ULONG bl,bh;

	bn_check_num(num);
	if (num <= 0) return((BN_ULONG)0);

	bl=LBITS(w);
	bh=HBITS(w);

	for (;;)
		{
		mul(rp[0],ap[0],bl,bh,carry);
		if (--num == 0) break;
		mul(rp[1],ap[1],bl,bh,carry);
		if (--num == 0) break;
		mul(rp[2],ap[2],bl,bh,carry);
		if (--num == 0) break;
		mul(rp[3],ap[3],bl,bh,carry);
		if (--num == 0) break;
		ap+=4;
		rp+=4;
		}
	return(carry);
	} 

void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
        {
	bn_check_num(n);
	if (n <= 0) return;
	for (;;)
		{
		sqr64(r[0],r[1],a[0]);
		if (--n == 0) break;

		sqr64(r[2],r[3],a[1]);
		if (--n == 0) break;

		sqr64(r[4],r[5],a[2]);
		if (--n == 0) break;

		sqr64(r[6],r[7],a[3]);
		if (--n == 0) break;

		a+=4;
		r+=8;
		}
	}

/* Divide h-l by d and return the result. */
/* I need to test this some more :-( */
BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
	{
	BN_ULONG dh,dl,q,ret=0,th,tl,t;
	int i,count=2;

	if (d == 0) return(BN_MASK2);

	i=BN_num_bits_word(d);
	if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
		{
		abort();
		}
	i=BN_BITS2-i;
	if (h >= d) h-=d;

	if (i)
		{
		d<<=i;
		h=(h<<i)|(l>>(BN_BITS2-i));
		l<<=i;
		}
	dh=(d&BN_MASK2h)>>BN_BITS4;
	dl=(d&BN_MASK2l);
	for (;;)
		{
		if ((h>>BN_BITS4) == dh)
			q=BN_MASK2l;
		else
			q=h/dh;

		th=q*dh;
		tl=dl*q;
		for (;;)
			{
			t=h-th;
			if ((t&BN_MASK2h) ||
				((tl) <= (
					(t<<BN_BITS4)|
					((l&BN_MASK2h)>>BN_BITS4))))
				break;
			q--;
			th-=dh;
			tl-=dl;
			}
		t=(tl>>BN_BITS4);
		tl=(tl<<BN_BITS4)&BN_MASK2h;
		th+=t;

		if (l < tl) th++;
		l-=tl;
		if (h < th)
			{
			h+=d;
			q--;
			}
		h-=th;

		if (--count == 0) break;

		ret=q<<BN_BITS4;
		h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
		l=(l&BN_MASK2l)<<BN_BITS4;
		}
	ret|=q;
	return(ret);
	}

BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
        {
	BN_ULONG c,l,t;

	bn_check_num(n);
	if (n <= 0) return((BN_ULONG)0);

	c=0;
	for (;;)
		{
		t=a[0];
		t=(t+c)&BN_MASK2;
		c=(t < c);
		l=(t+b[0])&BN_MASK2;
		c+=(l < t);
		r[0]=l;
		if (--n <= 0) break;

		t=a[1];
		t=(t+c)&BN_MASK2;
		c=(t < c);
		l=(t+b[1])&BN_MASK2;
		c+=(l < t);
		r[1]=l;
		if (--n <= 0) break;

		t=a[2];
		t=(t+c)&BN_MASK2;
		c=(t < c);
		l=(t+b[2])&BN_MASK2;
		c+=(l < t);
		r[2]=l;
		if (--n <= 0) break;

		t=a[3];
		t=(t+c)&BN_MASK2;
		c=(t < c);
		l=(t+b[3])&BN_MASK2;
		c+=(l < t);
		r[3]=l;
		if (--n <= 0) break;

		a+=4;
		b+=4;
		r+=4;
		}
	return((BN_ULONG)c);
	}

BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
        {
	BN_ULONG t1,t2;
	int c=0;

	bn_check_num(n);
	if (n <= 0) return((BN_ULONG)0);

	for (;;)
		{
		t1=a[0]; t2=b[0];
		r[0]=(t1-t2-c)&BN_MASK2;
		if (t1 != t2) c=(t1 < t2);
		if (--n <= 0) break;

		t1=a[1]; t2=b[1];
		r[1]=(t1-t2-c)&BN_MASK2;
		if (t1 != t2) c=(t1 < t2);
		if (--n <= 0) break;

		t1=a[2]; t2=b[2];
		r[2]=(t1-t2-c)&BN_MASK2;
		if (t1 != t2) c=(t1 < t2);
		if (--n <= 0) break;

		t1=a[3]; t2=b[3];
		r[3]=(t1-t2-c)&BN_MASK2;
		if (t1 != t2) c=(t1 < t2);
		if (--n <= 0) break;

		a+=4;
		b+=4;
		r+=4;
		}
	return(c);
	}

/* hmm... is it faster just to do a multiply? */
void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
	{
	BN_ULONG t[8];
	bn_sqr_normal(r,a,4,t);
	}

void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
	{
	BN_ULONG t[16];
	bn_sqr_normal(r,a,8,t);
	}

void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
	{
	r[4]=bn_mul_words(    &(r[0]),a,4,b[0]);
	r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
	r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
	r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
	}

void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
	{
	r[ 8]=bn_mul_words(    &(r[0]),a,8,b[0]);
	r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
	r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
	r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
	r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
	r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
	r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
	r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
	}

/* crypto/bn/bn_div.c */

int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
	   BN_CTX *ctx)
	{
	int norm_shift,i,j,loop;
	BIGNUM *tmp,wnum,*snum,*sdiv,*res;
	BN_ULONG *resp,*wnump;
	BN_ULONG d0,d1;
	int num_n,div_n;

	bn_check_top(num);
	bn_check_top(divisor);

	if (BN_is_zero(divisor))
		{
		return(0);
		}

	if (BN_ucmp(num,divisor) < 0)
		{
		if (rm != NULL)
			{ if (BN_copy(rm,num) == NULL) return(0); }
		if (dv != NULL) BN_zero(dv);
		return(1);
		}

	tmp= &(ctx->bn[ctx->tos]);
	tmp->neg=0;
	snum= &(ctx->bn[ctx->tos+1]);
	sdiv= &(ctx->bn[ctx->tos+2]);
	if (dv == NULL)
		res= &(ctx->bn[ctx->tos+3]);
	else	res=dv;

	/* First we normalise the numbers */
	norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
	BN_lshift(sdiv,divisor,norm_shift);
	sdiv->neg=0;
	norm_shift+=BN_BITS2;
	BN_lshift(snum,num,norm_shift);
	snum->neg=0;
	div_n=sdiv->top;
	num_n=snum->top;
	loop=num_n-div_n;

	/* Lets setup a 'window' into snum
	 * This is the part that corresponds to the current
	 * 'area' being divided */
	BN_init(&wnum);
	wnum.d=	 &(snum->d[loop]);
	wnum.top= div_n;
	wnum.max= snum->max+1; /* a bit of a lie */

	/* Get the top 2 words of sdiv */
	/* i=sdiv->top; */
	d0=sdiv->d[div_n-1];
	d1=(div_n == 1)?0:sdiv->d[div_n-2];

	/* pointer to the 'top' of snum */
	wnump= &(snum->d[num_n-1]);

	/* Setup to 'res' */
	res->neg= (num->neg^divisor->neg);
	if (!bn_wexpand(res,(loop+1))) goto err;
	res->top=loop;
	resp= &(res->d[loop-1]);

	/* space for temp */
	if (!bn_wexpand(tmp,(div_n+1))) goto err;

	if (BN_ucmp(&wnum,sdiv) >= 0)
		{
		if (!BN_usub(&wnum,&wnum,sdiv)) goto err;
		*resp=1;
		res->d[res->top-1]=1;
		}
	else
		res->top--;
	resp--;

	for (i=0; i<loop-1; i++)
		{
		BN_ULONG q,l0;
		BN_ULONG n0,n1,rem=0;

		n0=wnump[0];
		n1=wnump[-1];
		if (n0 == d0)
			q=BN_MASK2;
		else
			q=bn_div_words(n0,n1,d0);
		{
		BN_ULONG t2l,t2h,ql,qh;

		/*
		 * It's more than enough with the only multiplication.
		 * See the comment above in BN_LLONG section...
		 */
		rem=(n1-q*d0)&BN_MASK2;
		t2l=LBITS(d1); t2h=HBITS(d1);
		ql =LBITS(q);  qh =HBITS(q);
		mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */

		for (;;)
			{
			if ((t2h < rem) ||
				((t2h == rem) && (t2l <= wnump[-2])))
				break;
			q--;
			rem += d0;
			if (rem < d0) break; /* don't let rem overflow */
			if (t2l < d1) t2h--; t2l -= d1;
			}
		}
		wnum.d--; wnum.top++;
		l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
		tmp->d[div_n]=l0;
		for (j=div_n+1; j>0; j--)
			if (tmp->d[j-1]) break;
		tmp->top=j;

		j=wnum.top;
		BN_sub(&wnum,&wnum,tmp);

		snum->top=snum->top+wnum.top-j;

		if (wnum.neg)
			{
			q--;
			j=wnum.top;
			BN_add(&wnum,&wnum,sdiv);
			snum->top+=wnum.top-j;
			}
		*(resp--)=q;
		wnump--;
		}
	if (rm != NULL)
		{
		BN_rshift(rm,snum,norm_shift);
		rm->neg=num->neg;
		}
	return(1);
err:
	return(0);
	}

/* rem != m */
int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
	{
	return(BN_div(NULL,rem,m,d,ctx));
	}

/* solves ax == 1 (mod n) */
BIGNUM *BN_mod_inverse(BIGNUM *in, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
	{
	BIGNUM *A,*B,*X,*Y,*M,*D,*R;
	BIGNUM *T,*ret=NULL;
	int sign;

	bn_check_top(a);
	bn_check_top(n);

	A= &(ctx->bn[ctx->tos]);
	B= &(ctx->bn[ctx->tos+1]);
	X= &(ctx->bn[ctx->tos+2]);
	D= &(ctx->bn[ctx->tos+3]);
	M= &(ctx->bn[ctx->tos+4]);
	Y= &(ctx->bn[ctx->tos+5]);
	ctx->tos+=6;
	if (in == NULL)
		R=BN_new();
	else
		R=in;
	if (R == NULL) goto err;

	BN_zero(X);
	BN_one(Y);
	if (BN_copy(A,a) == NULL) goto err;
	if (BN_copy(B,n) == NULL) goto err;
	sign=1;

	while (!BN_is_zero(B))
		{
		if (!BN_div(D,M,A,B,ctx)) goto err;
		T=A;
		A=B;
		B=M;
		/* T has a struct, M does not */

		if (!BN_mul(T,D,X,ctx)) goto err;
		if (!BN_add(T,T,Y)) goto err;
		M=Y;
		Y=X;
		X=T;
		sign= -sign;
		}
	if (sign < 0)
		{
		if (!BN_sub(Y,n,Y)) goto err;
		}

	if (BN_is_one(A))
		{ if (!BN_mod(R,Y,n,ctx)) goto err; }
	else
		{
		goto err;
		}
	ret=R;
err:
	if ((ret == NULL) && (in == NULL)) BN_free(R);
	ctx->tos-=6;
	return(ret);
	}

/* crypto/bn/bn_exp.c */

#define TABLE_SIZE	16

/* slow but works */
int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
	{
	BIGNUM *t;
	int r=0;

	bn_check_top(a);
	bn_check_top(b);
	bn_check_top(m);

	t= &(ctx->bn[ctx->tos++]);
	if (a == b)
		{ if (!BN_sqr(t,a,ctx)) goto err; }
	else
		{ if (!BN_mul(t,a,b,ctx)) goto err; }
	if (!BN_mod(ret,t,m,ctx)) goto err;
	r=1;
err:
	ctx->tos--;
	return(r);
	}

/* this one works - simple but works */
int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BN_CTX *ctx)
	{
	int i,bits,ret=0,tos;
	BIGNUM *v,*rr;

	tos=ctx->tos;
	v= &(ctx->bn[ctx->tos++]);
	if ((r == a) || (r == p))
		rr= &(ctx->bn[ctx->tos++]);
	else
		rr=r;

	if (BN_copy(v,a) == NULL) goto err;
	bits=BN_num_bits(p);

	if (BN_is_odd(p))
		{ if (BN_copy(rr,a) == NULL) goto err; }
	else	{ if (!BN_one(rr)) goto err; }

	for (i=1; i<bits; i++)
		{
		if (!BN_sqr(v,v,ctx)) goto err;
		if (BN_is_bit_set(p,i))
			{
			if (!BN_mul(rr,rr,v,ctx)) goto err;
			}
		}
	ret=1;
err:
	ctx->tos=tos;
	if (r != rr) BN_copy(r,rr);
	return(ret);
	}

int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
	       BN_CTX *ctx)
	{
	int ret;

	bn_check_top(a);
	bn_check_top(p);
	bn_check_top(m);

	/* I have finally been able to take out this pre-condition of
	 * the top bit being set.  It was caused by an error in BN_div
	 * with negatives.  There was also another problem when for a^b%m
	 * a >= m.  eay 07-May-97 */
/*	if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */

	if (BN_is_odd(m))
		{ ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); }
	else
		{ ret=BN_mod_exp_recp(r,a,p,m,ctx); }

	return(ret);
	}

int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
		    const BIGNUM *m, BN_CTX *ctx)
	{
	int i,j,bits,ret=0,wstart,wend,window,wvalue;
	int start=1,ts=0;
	BIGNUM *aa;
	BIGNUM val[TABLE_SIZE];
	BN_RECP_CTX recp;

	aa= &(ctx->bn[ctx->tos++]);
	bits=BN_num_bits(p);

	if (bits == 0)
		{
		BN_one(r);
		return(1);
		}
	BN_RECP_CTX_init(&recp);
	if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;

	BN_init(&(val[0]));
	ts=1;

	if (!BN_mod(&(val[0]),a,m,ctx)) goto err;		/* 1 */
	if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx))
		goto err;				/* 2 */

	if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */
		window=1;
	else if (bits >= 256)
		window=5;	/* max size of window */
	else if (bits >= 128)
		window=4;
	else
		window=3;

	j=1<<(window-1);
	for (i=1; i<j; i++)
		{
		BN_init(&val[i]);
		if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx))
			goto err;
		}
	ts=i;

	start=1;	/* This is used to avoid multiplication etc
			 * when there is only the value '1' in the
			 * buffer. */
	wvalue=0;	/* The 'value' of the window */
	wstart=bits-1;	/* The top bit of the window */
	wend=0;		/* The bottom bit of the window */

	if (!BN_one(r)) goto err;

	for (;;)
		{
		if (BN_is_bit_set(p,wstart) == 0)
			{
			if (!start)
				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
				goto err;
			if (wstart == 0) break;
			wstart--;
			continue;
			}
		/* We now have wstart on a 'set' bit, we now need to work out
		 * how bit a window to do.  To do this we need to scan
		 * forward until the last set bit before the end of the
		 * window */
		j=wstart;
		wvalue=1;
		wend=0;
		for (i=1; i<window; i++)
			{
			if (wstart-i < 0) break;
			if (BN_is_bit_set(p,wstart-i))
				{
				wvalue<<=(i-wend);
				wvalue|=1;
				wend=i;
				}
			}

		/* wend is the size of the current window */
		j=wend+1;
		/* add the 'bytes above' */
		if (!start)
			for (i=0; i<j; i++)
				{
				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
					goto err;
				}
		
		/* wvalue will be an odd number < 2^window */
		if (!BN_mod_mul_reciprocal(r,r,&(val[wvalue>>1]),&recp,ctx))
			goto err;

		/* move the 'window' down further */
		wstart-=wend+1;
		wvalue=0;
		start=0;
		if (wstart < 0) break;
		}
	ret=1;
err:
	ctx->tos--;
	for (i=0; i<ts; i++)
		BN_clear_free(&(val[i]));
	BN_RECP_CTX_free(&recp);
	return(ret);
	}

int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p,
		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
	{
	int i,j,bits,ret=0,wstart,wend,window,wvalue;
	int start=1,ts=0;
	BIGNUM *d,*r;
	BIGNUM *aa;
	BIGNUM val[TABLE_SIZE];
	BN_MONT_CTX *mont=NULL;

	bn_check_top(a);
	bn_check_top(p);
	bn_check_top(m);

	if (!(m->d[0] & 1))
		{
		return(0);
		}
	d= &(ctx->bn[ctx->tos++]);
	r= &(ctx->bn[ctx->tos++]);
	bits=BN_num_bits(p);
	if (bits == 0)
		{
		BN_one(r);
		return(1);
		}

	/* If this is not done, things will break in the montgomery
	 * part */

	if (in_mont != NULL)
		mont=in_mont;
	else
		{
		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
		}

	BN_init(&val[0]);
	ts=1;
	if (BN_ucmp(a,m) >= 0)
		{
		BN_mod(&(val[0]),a,m,ctx);
		aa= &(val[0]);
		}
	else
		aa=a;
	if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */
	if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */

	if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */
		window=1;
	else if (bits >= 256)
		window=5;	/* max size of window */
	else if (bits >= 128)
		window=4;
	else
		window=3;

	j=1<<(window-1);
	for (i=1; i<j; i++)
		{
		BN_init(&(val[i]));
		if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx))
			goto err;
		}
	ts=i;

	start=1;	/* This is used to avoid multiplication etc
			 * when there is only the value '1' in the
			 * buffer. */
	wvalue=0;	/* The 'value' of the window */
	wstart=bits-1;	/* The top bit of the window */
	wend=0;		/* The bottom bit of the window */

        if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
	for (;;)
		{
		if (BN_is_bit_set(p,wstart) == 0)
			{
			if (!start)
				{
				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
				goto err;
				}
			if (wstart == 0) break;
			wstart--;
			continue;
			}
		/* We now have wstart on a 'set' bit, we now need to work out
		 * how bit a window to do.  To do this we need to scan
		 * forward until the last set bit before the end of the
		 * window */
		j=wstart;
		wvalue=1;
		wend=0;
		for (i=1; i<window; i++)
			{
			if (wstart-i < 0) break;
			if (BN_is_bit_set(p,wstart-i))
				{
				wvalue<<=(i-wend);
				wvalue|=1;
				wend=i;
				}
			}

		/* wend is the size of the current window */
		j=wend+1;
		/* add the 'bytes above' */
		if (!start)
			for (i=0; i<j; i++)
				{
				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
					goto err;
				}
		
		/* wvalue will be an odd number < 2^window */
		if (!BN_mod_mul_montgomery(r,r,&(val[wvalue>>1]),mont,ctx))
			goto err;

		/* move the 'window' down further */
		wstart-=wend+1;
		wvalue=0;
		start=0;
		if (wstart < 0) break;
		}
	BN_from_montgomery(rr,r,mont,ctx);
	ret=1;
err:
	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
	ctx->tos-=2;
	for (i=0; i<ts; i++)
		BN_clear_free(&(val[i]));
	return(ret);
	}

/* crypto/bn/bn_lib.c */

/* For a 32 bit machine
 * 2 -   4 ==  128
 * 3 -   8 ==  256
 * 4 -  16 ==  512
 * 5 -  32 == 1024
 * 6 -  64 == 2048
 * 7 - 128 == 4096
 * 8 - 256 == 8192
 */
int bn_limit_bits=0;
int bn_limit_num=8;        /* (1<<bn_limit_bits) */
int bn_limit_bits_low=0;
int bn_limit_num_low=8;    /* (1<<bn_limit_bits_low) */
int bn_limit_bits_high=0;
int bn_limit_num_high=8;   /* (1<<bn_limit_bits_high) */
int bn_limit_bits_mont=0;
int bn_limit_num_mont=8;   /* (1<<bn_limit_bits_mont) */
BIGNUM *BN_value_one(void)
	{
	static BN_ULONG data_one=1L;
	static BIGNUM const_one={&data_one,1,1,0};

	return(&const_one);
	}

int BN_num_bits_word(BN_ULONG l)
	{
	static const char bits[256]={
		0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
		5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
		6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
		6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
		7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
		7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
		7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
		7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
		8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
		8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
		8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
		8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
		8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
		8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
		8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
		8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
		};

#if defined(SIXTY_FOUR_BIT_LONG)
	if (l & 0xffffffff00000000L)
		{
		if (l & 0xffff000000000000L)
			{
			if (l & 0xff00000000000000L)
				{
				return(bits[(int)(l>>56)]+56);
				}
			else	return(bits[(int)(l>>48)]+48);
			}
		else
			{
			if (l & 0x0000ff0000000000L)
				{
				return(bits[(int)(l>>40)]+40);
				}
			else	return(bits[(int)(l>>32)]+32);
			}
		}
	else
#else
#ifdef SIXTY_FOUR_BIT
	if (l & 0xffffffff00000000LL)
		{
		if (l & 0xffff000000000000LL)
			{
			if (l & 0xff00000000000000LL)
				{
				return(bits[(int)(l>>56)]+56);
				}
			else	return(bits[(int)(l>>48)]+48);
			}
		else
			{
			if (l & 0x0000ff0000000000LL)
				{
				return(bits[(int)(l>>40)]+40);
				}
			else	return(bits[(int)(l>>32)]+32);
			}
		}
	else
#endif
#endif
		{
#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
		if (l & 0xffff0000L)
			{
			if (l & 0xff000000L)
				return(bits[(int)(l>>24L)]+24);
			else	return(bits[(int)(l>>16L)]+16);
			}
		else
#endif
			{
#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
			if (l & 0xff00L)
				return(bits[(int)(l>>8)]+8);
			else	
#endif
				return(bits[(int)(l   )]  );
			}
		}
	}

int BN_num_bits(const BIGNUM *a)
	{
	BN_ULONG l;
	int i;

	bn_check_top(a);

	if (a->top == 0) return(0);
	l=a->d[a->top-1];
	i=(a->top-1)*BN_BITS2;
	if (l == 0)
		{
		abort();
		}
	return(i+BN_num_bits_word(l));
	}

void BN_clear_free(BIGNUM *a)
	{
	int i;

	if (a == NULL) return;
	if (a->d != NULL)
		{
		memset(a->d,0,a->max*sizeof(a->d[0]));
		if (!(BN_get_flags(a,BN_FLG_STATIC_DATA)))
			free(a->d);
		}
	i=BN_get_flags(a,BN_FLG_MALLOCED);
	memset(a,0,sizeof(BIGNUM));
	if (i)
		free(a);
	}

void BN_free(BIGNUM *a)
	{
	if (a == NULL) return;
	if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA)))
		free(a->d);
	a->flags|=BN_FLG_FREE; /* REMOVE? */
	if (a->flags & BN_FLG_MALLOCED)
		free(a);
	}

void BN_init(BIGNUM *a)
	{
	memset(a,0,sizeof(BIGNUM));
	}

BIGNUM *BN_new(void)
	{
	BIGNUM *ret;

	if ((ret=(BIGNUM *)malloc(sizeof(BIGNUM))) == NULL)
		{
		return(NULL);
		}
	ret->flags=BN_FLG_MALLOCED;
	ret->top=0;
	ret->neg=0;
	ret->max=0;
	ret->d=NULL;
	return(ret);
	}


BN_CTX *BN_CTX_new(void)
	{
	BN_CTX *ret;

	ret=(BN_CTX *)malloc(sizeof(BN_CTX));
	if (ret == NULL)
		{
		return(NULL);
		}

	BN_CTX_init(ret);
	ret->flags=BN_FLG_MALLOCED;
	return(ret);
	}

void BN_CTX_init(BN_CTX *ctx)
	{
	memset(ctx,0,sizeof(BN_CTX));
	ctx->tos=0;
	ctx->flags=0;
	}

void BN_CTX_free(BN_CTX *c)
	{
	int i;

	if(c == NULL)
	    return;

	for (i=0; i<BN_CTX_NUM; i++)
		BN_clear_free(&(c->bn[i]));
	if (c->flags & BN_FLG_MALLOCED)
		free(c);
	}

BIGNUM *bn_expand2(BIGNUM *b, int words)
	{
	BN_ULONG *A,*a;
	const BN_ULONG *B;
	int i;

	bn_check_top(b);

	if (words > b->max)
		{
		bn_check_top(b);	
		if (BN_get_flags(b,BN_FLG_STATIC_DATA))
			{
			return(NULL);
			}
		a=A=(BN_ULONG *)malloc(sizeof(BN_ULONG)*(words+1));
		if (A == NULL)
			{
			return(NULL);
			}
		B=b->d;
		/* Check if the previous number needs to be copied */
		if (B != NULL)
			{
			for (i=b->top>>2; i>0; i--,A+=4,B+=4)
				{
				/*
				 * The fact that the loop is unrolled
				 * 4-wise is a tribute to Intel. It's
				 * the one that doesn't have enough
				 * registers to accomodate more data.
				 * I'd unroll it 8-wise otherwise:-)
				 *
				 *		<appro@fy.chalmers.se>
				 */
				BN_ULONG a0,a1,a2,a3;
				a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
				A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
				}
			switch (b->top&3)
				{
				case 3:	A[2]=B[2];
				case 2:	A[1]=B[1];
				case 1:	A[0]=B[0];
				case 0:	; /* ultrix cc workaround, see above */
				}
			free(b->d);
			}

		b->d=a;
		b->max=words;

		/* Now need to zero any data between b->top and b->max */

		A= &(b->d[b->top]);
		for (i=(b->max - b->top)>>3; i>0; i--,A+=8)
			{
			A[0]=0; A[1]=0; A[2]=0; A[3]=0;
			A[4]=0; A[5]=0; A[6]=0; A[7]=0;
			}
		for (i=(b->max - b->top)&7; i>0; i--,A++)
			A[0]=0;

/*		memset(&(p[b->max]),0,((words+1)-b->max)*sizeof(BN_ULONG)); */
/*	{ int i; for (i=b->max; i<words+1; i++) p[i]=i;} */

		}
	return(b);
	}

BIGNUM *BN_dup(const BIGNUM *a)
	{
	BIGNUM *r;

	if (a == NULL) return NULL;

	bn_check_top(a);

	r=BN_new();
	if (r == NULL) return(NULL);
	return((BIGNUM *)BN_copy(r,a));
	}

BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
	{
	int i;
	BN_ULONG *A;
	const BN_ULONG *B;

	bn_check_top(b);

	if (a == b) return(a);
	if (bn_wexpand(a,b->top) == NULL) return(NULL);

	A=a->d;
	B=b->d;
	for (i=b->top>>2; i>0; i--,A+=4,B+=4)
		{
		BN_ULONG a0,a1,a2,a3;
		a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
		A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
		}
	switch (b->top&3)
		{
		case 3: A[2]=B[2];
		case 2: A[1]=B[1];
		case 1: A[0]=B[0];
		case 0: ; /* ultrix cc workaround, see comments in bn_expand2 */
		}

/*	memset(&(a->d[b->top]),0,sizeof(a->d[0])*(a->max-b->top));*/
	a->top=b->top;
	if ((a->top == 0) && (a->d != NULL))
		a->d[0]=0;
	a->neg=b->neg;
	return(a);
	}

void BN_clear(BIGNUM *a)
	{
	if (a->d != NULL)
		memset(a->d,0,a->max*sizeof(a->d[0]));
	a->top=0;
	a->neg=0;
	}

BN_ULONG BN_get_word(BIGNUM *a)
	{
	int i,n;
	BN_ULONG ret=0;

	n=BN_num_bytes(a);
	if (n > sizeof(BN_ULONG))
		return(BN_MASK2);
	for (i=a->top-1; i>=0; i--)
		{
#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
		ret<<=BN_BITS4; /* stops the compiler complaining */
		ret<<=BN_BITS4;
#else
		ret=0;
#endif
		ret|=a->d[i];
		}
	return(ret);
	}

int BN_set_word(BIGNUM *a, BN_ULONG w)
	{
	int i,n;
	if (bn_expand(a,sizeof(BN_ULONG)*8) == NULL) return(0);

	n=sizeof(BN_ULONG)/BN_BYTES;
	a->neg=0;
	a->top=0;
	a->d[0]=(BN_ULONG)w&BN_MASK2;
	if (a->d[0] != 0) a->top=1;
	for (i=1; i<n; i++)
		{
		/* the following is done instead of
		 * w>>=BN_BITS2 so compilers don't complain
		 * on builds where sizeof(long) == BN_TYPES */
#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
		w>>=BN_BITS4;
		w>>=BN_BITS4;
#else
		w=0;
#endif
		a->d[i]=(BN_ULONG)w&BN_MASK2;
		if (a->d[i] != 0) a->top=i+1;
		}
	return(1);
	}

/* ignore negative */
BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
	{
	unsigned int i,m;
	unsigned int n;
	BN_ULONG l;

	if (ret == NULL) ret=BN_new();
	if (ret == NULL) return(NULL);
	l=0;
	n=len;
	if (n == 0)
		{
		ret->top=0;
		return(ret);
		}
	if (bn_expand(ret,(int)(n+2)*8) == NULL)
		return(NULL);
	i=((n-1)/BN_BYTES)+1;
	m=((n-1)%(BN_BYTES));
	ret->top=i;
	while (n-- > 0)
		{
		l=(l<<8L)| *(s++);
		if (m-- == 0)
			{
			ret->d[--i]=l;
			l=0;
			m=BN_BYTES-1;
			}
		}
	/* need to call this due to clear byte at top if avoiding
	 * having the top bit set (-ve number) */
	bn_fix_top(ret);
	return(ret);
	}

/* ignore negative */
int BN_bn2bin(const BIGNUM *a, unsigned char *to)
{
    int n,i;
    BN_ULONG l;

    n=i=BN_num_bytes(a);
    while (i-- > 0)
    {
        l=a->d[i/BN_BYTES];
        *(to++)= (unsigned char) (l >> (8*(i%BN_BYTES)) ) &0xff;
    }
    return(n);
}

int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
	{
	int i;
	BN_ULONG t1,t2,*ap,*bp;

	bn_check_top(a);
	bn_check_top(b);

	i=a->top-b->top;
	if (i != 0) return(i);
	ap=a->d;
	bp=b->d;
	for (i=a->top-1; i>=0; i--)
		{
		t1= ap[i];
		t2= bp[i];
		if (t1 != t2)
			return(t1 > t2?1:-1);
		}
	return(0);
	}

int BN_cmp(const BIGNUM *a, const BIGNUM *b)
	{
	int i;
	int gt,lt;
	BN_ULONG t1,t2;

	if ((a == NULL) || (b == NULL))
		{
		if (a != NULL)
			return(-1);
		else if (b != NULL)
			return(1);
		else
			return(0);
		}

	bn_check_top(a);
	bn_check_top(b);

	if (a->neg != b->neg)
		{
		if (a->neg)
			return(-1);
		else	return(1);
		}
	if (a->neg == 0)
		{ gt=1; lt= -1; }
	else	{ gt= -1; lt=1; }

	if (a->top > b->top) return(gt);
	if (a->top < b->top) return(lt);
	for (i=a->top-1; i>=0; i--)
		{
		t1=a->d[i];
		t2=b->d[i];
		if (t1 > t2) return(gt);
		if (t1 < t2) return(lt);
		}
	return(0);
	}

int BN_set_bit(BIGNUM *a, int n)
	{
	int i,j,k;

	i=n/BN_BITS2;
	j=n%BN_BITS2;
	if (a->top <= i)
		{
		if (bn_wexpand(a,i+1) == NULL) return(0);
		for(k=a->top; k<i+1; k++)
			a->d[k]=0;
		a->top=i+1;
		}

	a->d[i]|=(((BN_ULONG)1)<<j);
	return(1);
	}

int BN_clear_bit(BIGNUM *a, int n)
	{
	int i,j;

	i=n/BN_BITS2;
	j=n%BN_BITS2;
	if (a->top <= i) return(0);

	a->d[i]&=(~(((BN_ULONG)1)<<j));
	bn_fix_top(a);
	return(1);
	}

int BN_is_bit_set(const BIGNUM *a, int n)
	{
	int i,j;

	if (n < 0) return(0);
	i=n/BN_BITS2;
	j=n%BN_BITS2;
	if (a->top <= i) return(0);
	return((a->d[i]&(((BN_ULONG)1)<<j))?1:0);
	}

int BN_mask_bits(BIGNUM *a, int n)
	{
	int b,w;

	w=n/BN_BITS2;
	b=n%BN_BITS2;
	if (w >= a->top) return(0);
	if (b == 0)
		a->top=w;
	else
		{
		a->top=w+1;
		a->d[w]&= ~(BN_MASK2<<b);
		}
	bn_fix_top(a);
	return(1);
	}

int bn_cmp_words(BN_ULONG *a, BN_ULONG *b, int n)
	{
	int i;
	BN_ULONG aa,bb;

	aa=a[n-1];
	bb=b[n-1];
	if (aa != bb) return((aa > bb)?1:-1);
	for (i=n-2; i>=0; i--)
		{
		aa=a[i];
		bb=b[i];
		if (aa != bb) return((aa > bb)?1:-1);
		}
	return(0);
	}

/* crypto/bn/bn_mont.c */

/*
 * Details about Montgomery multiplication algorithms can be found at:
 * http://www.ece.orst.edu/ISL/Publications.html
 * http://www.ece.orst.edu/ISL/Koc/papers/j37acmon.pdf
 */

int BN_mod_mul_montgomery(BIGNUM *r, BIGNUM *a, BIGNUM *b,
			  BN_MONT_CTX *mont, BN_CTX *ctx)
	{
	BIGNUM *tmp,*tmp2;

        tmp= &(ctx->bn[ctx->tos]);
        tmp2= &(ctx->bn[ctx->tos]);
	ctx->tos+=2;

	bn_check_top(tmp);
	bn_check_top(tmp2);

	if (a == b)
		{
		if (!BN_sqr(tmp,a,ctx)) goto err;
		}
	else
		{
		if (!BN_mul(tmp,a,b,ctx)) goto err;
		}
	/* reduce from aRR to aR */
	if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
	ctx->tos-=2;
	return(1);
err:
	return(0);
	}

int BN_from_montgomery(BIGNUM *ret, BIGNUM *a, BN_MONT_CTX *mont,
	     BN_CTX *ctx)
	{
		{
		BIGNUM *n,*r;
		BN_ULONG *ap,*np,*rp,n0,v,*nrp;
		int al,nl,max,i,x,ri;
		int retn=0;

		r= &(ctx->bn[ctx->tos]);

		if (!BN_copy(r,a)) goto err1;
		n= &(mont->N);

		ap=a->d;
		/* mont->ri is the size of mont->N in bits/words */
		al=ri=mont->ri/BN_BITS2;

		nl=n->top;
		if ((al == 0) || (nl == 0)) { r->top=0; return(1); }

		max=(nl+al+1); /* allow for overflow (no?) XXX */
		if (bn_wexpand(r,max) == NULL) goto err1;
		if (bn_wexpand(ret,max) == NULL) goto err1;

		r->neg=a->neg^n->neg;
		np=n->d;
		rp=r->d;
		nrp= &(r->d[nl]);

		/* clear the top words of T */
		for (i=r->top; i<max; i++) /* memset? XXX */
			r->d[i]=0;

		r->top=max;
		n0=mont->n0;

		for (i=0; i<nl; i++)
			{
			v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
			nrp++;
			rp++;
			if (((nrp[-1]+=v)&BN_MASK2) >= v)
				continue;
			else
				{
				if (((++nrp[0])&BN_MASK2) != 0) continue;
				if (((++nrp[1])&BN_MASK2) != 0) continue;
				for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
				}
			}
		bn_fix_top(r);

		/* mont->ri will be a multiple of the word size */
		x=ri;
		rp=ret->d;
		ap= &(r->d[x]);
		if (r->top < x)
			al=0;
		else
			al=r->top-x;
		ret->top=al;
		al-=4;
		for (i=0; i<al; i+=4)
			{
			BN_ULONG t1,t2,t3,t4;

			t1=ap[i+0];
			t2=ap[i+1];
			t3=ap[i+2];
			t4=ap[i+3];
			rp[i+0]=t1;
			rp[i+1]=t2;
			rp[i+2]=t3;
			rp[i+3]=t4;
			}
		al+=4;
		for (; i<al; i++)
			rp[i]=ap[i];

		if (BN_ucmp(ret, &(mont->N)) >= 0)
			{
			BN_usub(ret,ret,&(mont->N)); /* XXX */
			}
		retn=1;
err1:
		return(retn);
		}
	}

BN_MONT_CTX *BN_MONT_CTX_new(void)
	{
	BN_MONT_CTX *ret;

	if ((ret=(BN_MONT_CTX *)malloc(sizeof(BN_MONT_CTX))) == NULL)
		return(NULL);

	BN_MONT_CTX_init(ret);
	ret->flags=BN_FLG_MALLOCED;
	return(ret);
	}

void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
	{
	ctx->use_word=0;
	ctx->ri=0;
	BN_init(&(ctx->RR));
	BN_init(&(ctx->N));
	BN_init(&(ctx->Ni));
	ctx->flags=0;
	}

void BN_MONT_CTX_free(BN_MONT_CTX *mont)
	{
	if(mont == NULL)
	    return;

	BN_free(&(mont->RR));
	BN_free(&(mont->N));
	BN_free(&(mont->Ni));
	if (mont->flags & BN_FLG_MALLOCED)
		free(mont);
	}

int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
	{
	BIGNUM Ri,*R;

	BN_init(&Ri);
	R= &(mont->RR);					/* grab RR as a temp */
	BN_copy(&(mont->N),mod);			/* Set N */

		{
		BIGNUM tmod;
		BN_ULONG buf[2];

		mont->use_word=1;

		mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
		BN_zero(R);
		BN_set_bit(R,BN_BITS2);
		/* I was bad, this modification of a passed variable was
		 * breaking the multithreaded stuff :-(
		 * z=mod->top;
		 * mod->top=1; */

		buf[0]=mod->d[0];
		buf[1]=0;
		tmod.d=buf;
		tmod.top=1;
		tmod.max=mod->max;
		tmod.neg=mod->neg;

		if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL)
			goto err;
		BN_lshift(&Ri,&Ri,BN_BITS2);			/* R*Ri */
		if (!BN_is_zero(&Ri))
			{
			BN_sub_word(&Ri,1);
			}
		else
			{
			/* This is not common..., 1 in BN_MASK2,
			 * It happens when buf[0] was == 1.  So for 8 bit,
			 * this is 1/256, 16bit, 1 in 2^16 etc.
			 */
			BN_set_word(&Ri,BN_MASK2);
			}
		BN_div(&Ri,NULL,&Ri,&tmod,ctx);
		mont->n0=Ri.d[0];
		BN_free(&Ri);
		/* mod->top=z; */
		}

	/* setup RR for conversions */
	BN_zero(&(mont->RR));
	BN_set_bit(&(mont->RR),mont->ri*2);
	BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx);

	return(1);
err:
	return(0);
	}

BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
	{
	if (to == from) return(to);

	BN_copy(&(to->RR),&(from->RR));
	BN_copy(&(to->N),&(from->N));
	BN_copy(&(to->Ni),&(from->Ni));
	to->use_word=from->use_word;
	to->ri=from->ri;
	to->n0=from->n0;
	return(to);
	}

/* crypto/bn/bn_mul.c */

/* r is 2*n2 words in size,
 * a and b are both n2 words in size.
 * n2 must be a power of 2.
 * We multiply and return the result.
 * t must be 2*n2 words in size
 * We calulate
 * a[0]*b[0]
 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
 * a[1]*b[1]
 */
void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
	     BN_ULONG *t)
	{
	int n=n2/2,c1,c2;
	unsigned int neg,zero;
	BN_ULONG ln,lo,*p;

	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
		{
		/* This should not happen */
		bn_mul_normal(r,a,n2,b,n2);
		return;
		}
	/* r=(a[0]-a[1])*(b[1]-b[0]) */
	c1=bn_cmp_words(a,&(a[n]),n);
	c2=bn_cmp_words(&(b[n]),b,n);
	zero=neg=0;
	switch (c1*3+c2)
		{
	case -4:
		bn_sub_words(t,      &(a[n]),a,      n); /* - */
		bn_sub_words(&(t[n]),b,      &(b[n]),n); /* - */
		break;
	case -3:
		zero=1;
		break;
	case -2:
		bn_sub_words(t,      &(a[n]),a,      n); /* - */
		bn_sub_words(&(t[n]),&(b[n]),b,      n); /* + */
		neg=1;
		break;
	case -1:
	case 0:
	case 1:
		zero=1;
		break;
	case 2:
		bn_sub_words(t,      a,      &(a[n]),n); /* + */
		bn_sub_words(&(t[n]),b,      &(b[n]),n); /* - */
		neg=1;
		break;
	case 3:
		zero=1;
		break;
	case 4:
		bn_sub_words(t,      a,      &(a[n]),n);
		bn_sub_words(&(t[n]),&(b[n]),b,      n);
		break;
		}
		{
		p= &(t[n2*2]);
		if (!zero)
			bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
		else
			memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
		bn_mul_recursive(r,a,b,n,p);
		bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p);
		}

	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
	 * r[10] holds (a[0]*b[0])
	 * r[32] holds (b[1]*b[1])
	 */

	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));

	if (neg) /* if t[32] is negative */
		{
		c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
		}
	else
		{
		/* Might have a carry */
		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
		}

	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
	 * r[10] holds (a[0]*b[0])
	 * r[32] holds (b[1]*b[1])
	 * c1 holds the carry bits
	 */
	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
	if (c1)
		{
		p= &(r[n+n2]);
		lo= *p;
		ln=(lo+c1)&BN_MASK2;
		*p=ln;

		/* The overflow will stop before we over write
		 * words we should not overwrite */
		if (ln < (BN_ULONG)c1)
			{
			do	{
				p++;
				lo= *p;
				ln=(lo+1)&BN_MASK2;
				*p=ln;
				} while (ln == 0);
			}
		}
	}

/* n+tn is the word length
 * t needs to be n*4 is size, as does r */
void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn,
	     int n, BN_ULONG *t)
	{
	int i,j,n2=n*2;
	unsigned int c1;
	BN_ULONG ln,lo,*p;

	if (n < 8)
		{
		i=tn+n;
		bn_mul_normal(r,a,i,b,i);
		return;
		}

	/* r=(a[0]-a[1])*(b[1]-b[0]) */
	bn_sub_words(t,      a,      &(a[n]),n); /* + */
	bn_sub_words(&(t[n]),b,      &(b[n]),n); /* - */

/*	if (n == 4)
		{
		bn_mul_comba4(&(t[n2]),t,&(t[n]));
		bn_mul_comba4(r,a,b);
		bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
		memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
		}
	else */ if (n == 8)
		{
		bn_mul_comba8(&(t[n2]),t,&(t[n]));
		bn_mul_comba8(r,a,b);
		bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
		memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
		}
	else
		{
		p= &(t[n2*2]);
		bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
		bn_mul_recursive(r,a,b,n,p);
		i=n/2;
		/* If there is only a bottom half to the number,
		 * just do it */
		j=tn-i;
		if (j == 0)
			{
			bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p);
			memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
			}
		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
				{
				bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
					j,i,p);
				memset(&(r[n2+tn*2]),0,
					sizeof(BN_ULONG)*(n2-tn*2));
				}
		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
			{
			memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
			if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL)
				{
				bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
				}
			else
				{
				for (;;)
					{
					i/=2;
					if (i < tn)
						{
						bn_mul_part_recursive(&(r[n2]),
							&(a[n]),&(b[n]),
							tn-i,i,p);
						break;
						}
					else if (i == tn)
						{
						bn_mul_recursive(&(r[n2]),
							&(a[n]),&(b[n]),
							i,p);
						break;
						}
					}
				}
			}
		}

	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
	 * r[10] holds (a[0]*b[0])
	 * r[32] holds (b[1]*b[1])
	 */

	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
	c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));

	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
	 * r[10] holds (a[0]*b[0])
	 * r[32] holds (b[1]*b[1])
	 * c1 holds the carry bits
	 */
	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
	if (c1)
		{
		p= &(r[n+n2]);
		lo= *p;
		ln=(lo+c1)&BN_MASK2;
		*p=ln;

		/* The overflow will stop before we over write
		 * words we should not overwrite */
		if (ln < c1)
			{
			do	{
				p++;
				lo= *p;
				ln=(lo+1)&BN_MASK2;
				*p=ln;
				} while (ln == 0);
			}
		}
	}

/* a and b must be the same size, which is n2.
 * r needs to be n2 words and t needs to be n2*2
 */
void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
	     BN_ULONG *t)
	{
	int n=n2/2;

	bn_mul_recursive(r,a,b,n,&(t[0]));
	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
		{
		bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
		bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
		}
	else
		{
		bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
		bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
		bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
		}
	}

/* a and b must be the same size, which is n2.
 * r needs to be n2 words and t needs to be n2*2
 * l is the low words of the output.
 * t needs to be n2*3
 */
void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
	     BN_ULONG *t)
	{
	int i,n;
	int c1,c2;
	int neg,oneg,zero;
	BN_ULONG ll,lc,*lp,*mp;

	n=n2/2;

	/* Calculate (al-ah)*(bh-bl) */
	neg=zero=0;
	c1=bn_cmp_words(&(a[0]),&(a[n]),n);
	c2=bn_cmp_words(&(b[n]),&(b[0]),n);
	switch (c1*3+c2)
		{
	case -4:
		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
		break;
	case -3:
		zero=1;
		break;
	case -2:
		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
		neg=1;
		break;
	case -1:
	case 0:
	case 1:
		zero=1;
		break;
	case 2:
		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
		neg=1;
		break;
	case 3:
		zero=1;
		break;
	case 4:
		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
		break;
		}
		
	oneg=neg;
	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
	/* r[10] = (a[1]*b[1]) */
		{
		bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2]));
		bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2]));
		}

	/* s0 == low(al*bl)
	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
	 * We know s0 and s1 so the only unknown is high(al*bl)
	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
	 */
	if (l != NULL)
		{
		lp= &(t[n2+n]);
		c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
		}
	else
		{
		c1=0;
		lp= &(r[0]);
		}

	if (neg)
		neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
	else
		{
		bn_add_words(&(t[n2]),lp,&(t[0]),n);
		neg=0;
		}

	if (l != NULL)
		{
		bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
		}
	else
		{
		lp= &(t[n2+n]);
		mp= &(t[n2]);
		for (i=0; i<n; i++)
			lp[i]=((~mp[i])+1)&BN_MASK2;
		}

	/* s[0] = low(al*bl)
	 * t[3] = high(al*bl)
	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
	 * r[10] = (a[1]*b[1])
	 */
	/* R[10] = al*bl
	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
	 * R[32] = ah*bh
	 */
	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
	 * R[3]=r[1]+(carry/borrow)
	 */
	if (l != NULL)
		{
		lp= &(t[n2]);
		c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
		}
	else
		{
		lp= &(t[n2+n]);
		c1=0;
		}
	c1+=(int)(bn_add_words(&(t[n2]),lp,  &(r[0]),n));
	if (oneg)
		c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
	else
		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));

	c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
	c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
	if (oneg)
		c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
	else
		c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
	
	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
		{
		i=0;
		if (c1 > 0)
			{
			lc=c1;
			do	{
				ll=(r[i]+lc)&BN_MASK2;
				r[i++]=ll;
				lc=(lc > ll);
				} while (lc);
			}
		else
			{
			lc= -c1;
			do	{
				ll=r[i];
				r[i++]=(ll-lc)&BN_MASK2;
				lc=(lc > ll);
				} while (lc);
			}
		}
	if (c2 != 0) /* Add starting at r[1] */
		{
		i=n;
		if (c2 > 0)
			{
			lc=c2;
			do	{
				ll=(r[i]+lc)&BN_MASK2;
				r[i++]=ll;
				lc=(lc > ll);
				} while (lc);
			}
		else
			{
			lc= -c2;
			do	{
				ll=r[i];
				r[i++]=(ll-lc)&BN_MASK2;
				lc=(lc > ll);
				} while (lc);
			}
		}
	}

int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx)
	{
	int top,al,bl;
	BIGNUM *rr;
	BIGNUM *t;
	int i,j,k;

	bn_check_top(a);
	bn_check_top(b);
	bn_check_top(r);

	al=a->top;
	bl=b->top;
	r->neg=a->neg^b->neg;

	if ((al == 0) || (bl == 0))
		{
		BN_zero(r);
		return(1);
		}
	top=al+bl;

	if ((r == a) || (r == b))
		rr= &(ctx->bn[ctx->tos+1]);
	else
		rr=r;

	if (al == bl)
		{
		if (al < BN_MULL_SIZE_NORMAL)
			{
			if (bn_wexpand(rr,top) == NULL) return(0);
			rr->top=top;
			bn_mul_normal(rr->d,a->d,al,b->d,bl);
			goto end;
			}
		goto symetric;
		}
	else if ((al < BN_MULL_SIZE_NORMAL) || (bl < BN_MULL_SIZE_NORMAL))
		{
		if (bn_wexpand(rr,top) == NULL) return(0);
		rr->top=top;
		bn_mul_normal(rr->d,a->d,al,b->d,bl);
		goto end;
		}
	else
		{
		i=(al-bl);
		if ((i ==  1) && !BN_get_flags(b,BN_FLG_STATIC_DATA))
			{
			bn_wexpand(b,al);
			b->d[bl]=0;
			bl++;
			goto symetric;
			}
		else if ((i ==  -1) && !BN_get_flags(a,BN_FLG_STATIC_DATA))
			{
			bn_wexpand(a,bl);
			a->d[al]=0;
			al++;
			goto symetric;
			}
		}

	/* asymetric and >= 4 */ 
	if (bn_wexpand(rr,top) == NULL) return(0);
	rr->top=top;
	bn_mul_normal(rr->d,a->d,al,b->d,bl);

	if (0)
		{
symetric:
		/* symetric and > 4 */
		/* 16 or larger */
		j=BN_num_bits_word((BN_ULONG)al);
		j=1<<(j-1);
		k=j+j;
		t= &(ctx->bn[ctx->tos]);
		if (al == j) /* exact multiple */
			{
			bn_wexpand(t,k*2);
			bn_wexpand(rr,k*2);
			bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
			}
		else
			{
			bn_wexpand(a,k);
			bn_wexpand(b,k);
			bn_wexpand(t,k*4);
			bn_wexpand(rr,k*4);
			for (i=a->top; i<k; i++)
				a->d[i]=0;
			for (i=b->top; i<k; i++)
				b->d[i]=0;
			bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
			}
		rr->top=top;
		}
end:
	bn_fix_top(rr);
	if (r != rr) BN_copy(r,rr);
	return(1);
	}

void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
	{
	BN_ULONG *rr;

	if (na < nb)
		{
		int itmp;
		BN_ULONG *ltmp;

		itmp=na; na=nb; nb=itmp;
		ltmp=a;   a=b;   b=ltmp;

		}
	rr= &(r[na]);
	rr[0]=bn_mul_words(r,a,na,b[0]);

	for (;;)
		{
		if (--nb <= 0) return;
		rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
		if (--nb <= 0) return;
		rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
		if (--nb <= 0) return;
		rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
		if (--nb <= 0) return;
		rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
		rr+=4;
		r+=4;
		b+=4;
		}
	}

void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
	{
	bn_mul_words(r,a,n,b[0]);

	for (;;)
		{
		if (--n <= 0) return;
		bn_mul_add_words(&(r[1]),a,n,b[1]);
		if (--n <= 0) return;
		bn_mul_add_words(&(r[2]),a,n,b[2]);
		if (--n <= 0) return;
		bn_mul_add_words(&(r[3]),a,n,b[3]);
		if (--n <= 0) return;
		bn_mul_add_words(&(r[4]),a,n,b[4]);
		r+=4;
		b+=4;
		}
	}

/* crypto/bn/bn_print.c */

static const char *Hex="0123456789ABCDEF";

/* Must 'free' the returned data */
char *BN_bn2hex(const BIGNUM *a)
{
    int i,j,v,z=0;
    char *buf;
    char *p;

    buf = malloc (a->top*BN_BYTES*2+2);
    if (buf == NULL)
    {
        goto err;
    }
    p=buf;
    if (a->neg) *(p++)='-';
    if (a->top == 0) *(p++)='0';
    for (i=a->top-1; i >=0; i--)
    {
        for (j=BN_BITS2-8; j >= 0; j-=8)
        {
            /* strip leading zeros */
            v=((int)(a->d[i]>>(long)j))&0xff;
            if (z || (v != 0))
            {
                *(p++)=Hex[v>>4];
                *(p++)=Hex[v&0x0f];
                z=1;
            }
        }
    }
    *p='\0';
err:
    return(buf);
}

/* Must 'free' the returned data */
char *BN_bn2dec(const BIGNUM *a)
{
	int i=0,num;
	char *buf=NULL;
	char *p;
	BIGNUM *t=NULL;
	BN_ULONG *bn_data=NULL,*lp;

	i=BN_num_bits(a)*3;
	num=(i/10+i/1000+3)+1;
	bn_data=(BN_ULONG *)malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG));
	buf=(char *)malloc(num+3);
	if ((buf == NULL) || (bn_data == NULL))
		{
		goto err;
		}
	if ((t=BN_dup(a)) == NULL) goto err;

	p=buf;
	lp=bn_data;
	if (t->neg) *(p++)='-';
	if (t->top == 0)
		{
		*(p++)='0';
		*(p++)='\0';
		}
	else
		{
		i=0;
		while (!BN_is_zero(t))
			{
			*lp=BN_div_word(t,BN_DEC_CONV);
			lp++;
			}
		lp--;
		/* We now have a series of blocks, BN_DEC_NUM chars
		 * in length, where the last one needs trucation.
		 * The blocks need to be reversed in order. */
		sprintf(p,BN_DEC_FMT1,*lp);
		while (*p) p++;
		while (lp != bn_data)
			{
			lp--;
			sprintf(p,BN_DEC_FMT2,*lp);
			while (*p) p++;
			}
		}
err:
	if (bn_data != NULL) free(bn_data);
	if (t != NULL) BN_free(t);
	return(buf);
	}

int BN_hex2bn(BIGNUM **bn, const char *a)
{
    BIGNUM *ret=NULL;
    BN_ULONG l=0;
    int neg=0,h,m,i,j,k,c;
    int num;

    if ((a == NULL) || (*a == '\0')) return(0);

    if (*a == '-') { neg=1; a++; }

    for (i=0; isxdigit((unsigned char) a[i]); i++)
        ;

    num=i+neg;
    if (bn == NULL) return(num);

    /* a is the start of the hex digets, and it is 'i' long */
    if (*bn == NULL)
    {
        if ((ret=BN_new()) == NULL) return(0);
    }
    else
    {
        ret= *bn;
        BN_zero(ret);
    }

    /* i is the number of hex digests; */
    if (bn_expand(ret,i*4) == NULL) goto err;

    j=i; /* least significate 'hex' */
    m=0;
    h=0;
    while (j > 0)
    {
        m=((BN_BYTES*2) <= j)?(BN_BYTES*2):j;
        l=0;
        for (;;)
        {
            c=a[j-m];
            if ((c >= '0') && (c <= '9')) k=c-'0';
            else if ((c >= 'a') && (c <= 'f')) k=c-'a'+10;
            else if ((c >= 'A') && (c <= 'F')) k=c-'A'+10;
            else k=0; /* paranoia */
            l=(l<<4)|k;

            if (--m <= 0)
            {
                ret->d[h++]=l;
                break;
            }
        }
        j-=(BN_BYTES*2);
    }
    ret->top=h;
    bn_fix_top(ret);
    ret->neg=neg;

    *bn=ret;
    return(num);
err:
    if (*bn == NULL) BN_free(ret);
    return(0);
}

int BN_dec2bn(BIGNUM **bn, const char *a)
	{
	BIGNUM *ret=NULL;
	BN_ULONG l=0;
	int neg=0,i,j;
	int num;

	if ((a == NULL) || (*a == '\0')) return(0);
	if (*a == '-') { neg=1; a++; }

	for (i=0; isdigit((unsigned char) a[i]); i++)
		;

	num=i+neg;
	if (bn == NULL) return(num);

	/* a is the start of the digets, and it is 'i' long.
	 * We chop it into BN_DEC_NUM digets at a time */
	if (*bn == NULL)
		{
		if ((ret=BN_new()) == NULL) return(0);
		}
	else
		{
		ret= *bn;
		BN_zero(ret);
		}

	/* i is the number of digests, a bit of an over expand; */
	if (bn_expand(ret,i*4) == NULL) goto err;

	j=BN_DEC_NUM-(i%BN_DEC_NUM);
	if (j == BN_DEC_NUM) j=0;
	l=0;
	while (*a)
		{
		l*=10;
		l+= *a-'0';
		a++;
		if (++j == BN_DEC_NUM)
			{
			BN_mul_word(ret,BN_DEC_CONV);
			BN_add_word(ret,l);
			l=0;
			j=0;
			}
		}
	ret->neg=neg;

	bn_fix_top(ret);
	*bn=ret;
	return(num);
err:
	if (*bn == NULL) BN_free(ret);
	return(0);
	}

/* crypto/bn/bn_recp.c */

void BN_RECP_CTX_init(BN_RECP_CTX *recp)
	{
	BN_init(&(recp->N));
	BN_init(&(recp->Nr));
	recp->num_bits=0;
	recp->flags=0;
	}

BN_RECP_CTX *BN_RECP_CTX_new(void)
	{
	BN_RECP_CTX *ret;

	if ((ret=(BN_RECP_CTX *)malloc(sizeof(BN_RECP_CTX))) == NULL)
		return(NULL);

	BN_RECP_CTX_init(ret);
	ret->flags=BN_FLG_MALLOCED;
	return(ret);
	}

void BN_RECP_CTX_free(BN_RECP_CTX *recp)
	{
	if(recp == NULL)
	    return;

	BN_free(&(recp->N));
	BN_free(&(recp->Nr));
	if (recp->flags & BN_FLG_MALLOCED)
		free(recp);
	}

int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
	{
	BN_copy(&(recp->N),d);
	BN_zero(&(recp->Nr));
	recp->num_bits=BN_num_bits(d);
	recp->shift=0;
	return(1);
	}

int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y, BN_RECP_CTX *recp,
	     BN_CTX *ctx)
	{
	int ret=0;
	BIGNUM *a;

	a= &(ctx->bn[ctx->tos++]);
	if (y != NULL)
		{
		if (x == y)
			{ if (!BN_sqr(a,x,ctx)) goto err; }
		else
			{ if (!BN_mul(a,x,y,ctx)) goto err; }
		}
	else
		a=x; /* Just do the mod */

	BN_div_recp(NULL,r,a,recp,ctx);
	ret=1;
err:
	ctx->tos--;
	return(ret);
	}

int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, BN_RECP_CTX *recp,
	     BN_CTX *ctx)
	{
	int i,j,tos,ret=0,ex;
	BIGNUM *a,*b,*d,*r;

	tos=ctx->tos;
	a= &(ctx->bn[ctx->tos++]);
	b= &(ctx->bn[ctx->tos++]);
	if (dv != NULL)
		d=dv;
	else
		d= &(ctx->bn[ctx->tos++]);
	if (rem != NULL)
		r=rem;
	else
		r= &(ctx->bn[ctx->tos++]);

	if (BN_ucmp(m,&(recp->N)) < 0)
		{
		BN_zero(d);
		BN_copy(r,m);
		ctx->tos=tos;
		return(1);
		}

	/* We want the remainder
	 * Given input of ABCDEF / ab
	 * we need multiply ABCDEF by 3 digests of the reciprocal of ab
	 *
	 */
	i=BN_num_bits(m);

	j=recp->num_bits*2;
	if (j > i)
		{
		i=j;
		ex=0;
		}
	else
		{
		ex=(i-j)/2;
		}

	j=i/2;

	if (i != recp->shift)
		recp->shift=BN_reciprocal(&(recp->Nr),&(recp->N),
			i,ctx);

	if (!BN_rshift(a,m,j-ex)) goto err;
	if (!BN_mul(b,a,&(recp->Nr),ctx)) goto err;
	if (!BN_rshift(d,b,j+ex)) goto err;
	d->neg=0;
	if (!BN_mul(b,&(recp->N),d,ctx)) goto err;
	if (!BN_usub(r,m,b)) goto err;
	r->neg=0;

	j=0;
	while (BN_ucmp(r,&(recp->N)) >= 0)
		{
		if (j++ > 2)
			{
			goto err;
			}
		if (!BN_usub(r,r,&(recp->N))) goto err;
		if (!BN_add_word(d,1)) goto err;
		}

	r->neg=BN_is_zero(r)?0:m->neg;
	d->neg=m->neg^recp->N.neg;
	ret=1;
err:
	ctx->tos=tos;
	return(ret);
	} 

/* len is the expected size of the result
 * We actually calculate with an extra word of precision, so
 * we can do faster division if the remainder is not required.
 */
int BN_reciprocal(BIGNUM *r, BIGNUM *m, int len, BN_CTX *ctx)
	{
	int ret= -1;
	BIGNUM t;

	BN_init(&t);

	BN_zero(&t);
	if (!BN_set_bit(&t,len)) goto err;

	if (!BN_div(r,NULL,&t,m,ctx)) goto err;
	ret=len;
err:
	BN_free(&t);
	return(ret);
	}

/* crypto/bn/bn_shift.c */
int BN_lshift1(BIGNUM *r, BIGNUM *a)
	{
	register BN_ULONG *ap,*rp,t,c;
	int i;

	if (r != a)
		{
		r->neg=a->neg;
		if (bn_wexpand(r,a->top+1) == NULL) return(0);
		r->top=a->top;
		}
	else
		{
		if (bn_wexpand(r,a->top+1) == NULL) return(0);
		}
	ap=a->d;
	rp=r->d;
	c=0;
	for (i=0; i<a->top; i++)
		{
		t= *(ap++);
		*(rp++)=((t<<1)|c)&BN_MASK2;
		c=(t & BN_TBIT)?1:0;
		}
	if (c)
		{
		*rp=1;
		r->top++;
		}
	return(1);
	}

int BN_rshift1(BIGNUM *r, BIGNUM *a)
	{
	BN_ULONG *ap,*rp,t,c;
	int i;

	if (BN_is_zero(a))
		{
		BN_zero(r);
		return(1);
		}
	if (a != r)
		{
		if (bn_wexpand(r,a->top) == NULL) return(0);
		r->top=a->top;
		r->neg=a->neg;
		}
	ap=a->d;
	rp=r->d;
	c=0;
	for (i=a->top-1; i>=0; i--)
		{
		t=ap[i];
		rp[i]=((t>>1)&BN_MASK2)|c;
		c=(t&1)?BN_TBIT:0;
		}
	bn_fix_top(r);
	return(1);
	}

int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
	{
	int i,nw,lb,rb;
	BN_ULONG *t,*f;
	BN_ULONG l;

	r->neg=a->neg;
	if (bn_wexpand(r,a->top+(n/BN_BITS2)+1) == NULL) return(0);
	nw=n/BN_BITS2;
	lb=n%BN_BITS2;
	rb=BN_BITS2-lb;
	f=a->d;
	t=r->d;
	t[a->top+nw]=0;
	if (lb == 0)
		for (i=a->top-1; i>=0; i--)
			t[nw+i]=f[i];
	else
		for (i=a->top-1; i>=0; i--)
			{
			l=f[i];
			t[nw+i+1]|=(l>>rb)&BN_MASK2;
			t[nw+i]=(l<<lb)&BN_MASK2;
			}
	memset(t,0,nw*sizeof(t[0]));
/*	for (i=0; i<nw; i++)
		t[i]=0;*/
	r->top=a->top+nw+1;
	bn_fix_top(r);
	return(1);
	}

int BN_rshift(BIGNUM *r, BIGNUM *a, int n)
	{
	int i,j,nw,lb,rb;
	BN_ULONG *t,*f;
	BN_ULONG l,tmp;

	nw=n/BN_BITS2;
	rb=n%BN_BITS2;
	lb=BN_BITS2-rb;
	if (nw > a->top)
		{
		BN_zero(r);
		return(1);
		}
	if (r != a)
		{
		r->neg=a->neg;
		if (bn_wexpand(r,a->top-nw+1) == NULL) return(0);
		}

	f= &(a->d[nw]);
	t=r->d;
	j=a->top-nw;
	r->top=j;

	if (rb == 0)
		{
		for (i=j+1; i > 0; i--)
			*(t++)= *(f++);
		}
	else
		{
		l= *(f++);
		for (i=1; i<j; i++)
			{
			tmp =(l>>rb)&BN_MASK2;
			l= *(f++);
			*(t++) =(tmp|(l<<lb))&BN_MASK2;
			}
		*(t++) =(l>>rb)&BN_MASK2;
		}
	*t=0;
	bn_fix_top(r);
	return(1);
	}
/* crypto/bn/bn_sqr.c */
/* r must not be a */
/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
int BN_sqr(BIGNUM *r, BIGNUM *a, BN_CTX *ctx)
	{
	int max,al;
	BIGNUM *tmp,*rr;

	bn_check_top(a);
	tmp= &(ctx->bn[ctx->tos]);
	rr=(a != r)?r: (&ctx->bn[ctx->tos+1]);

	al=a->top;
	if (al <= 0)
		{
		r->top=0;
		return(1);
		}

	max=(al+al);
	if (bn_wexpand(rr,max+1) == NULL) return(0);

	r->neg=0;
	if (al == 4)
		{
		BN_ULONG t[8];
		bn_sqr_normal(rr->d,a->d,4,t);
		}
	else if (al == 8)
		{
		BN_ULONG t[16];
		bn_sqr_normal(rr->d,a->d,8,t);
		}
	else 
		{
		if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
			{
			BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
			bn_sqr_normal(rr->d,a->d,al,t);
			}
		else
			{
			int j,k;

			j=BN_num_bits_word((BN_ULONG)al);
			j=1<<(j-1);
			k=j+j;
			if (al == j)
				{
				if (bn_wexpand(a,k*2) == NULL) return(0);
				if (bn_wexpand(tmp,k*2) == NULL) return(0);
				bn_sqr_recursive(rr->d,a->d,al,tmp->d);
				}
			else
				{
				if (bn_wexpand(tmp,max) == NULL) return(0);
				bn_sqr_normal(rr->d,a->d,al,tmp->d);
				}
			}
		}

	rr->top=max;
	if ((max > 0) && (rr->d[max-1] == 0)) rr->top--;
	if (rr != r) BN_copy(r,rr);
	return(1);
	}

/* tmp must have 2*n words */
void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp)
	{
	int i,j,max;
	BN_ULONG *ap,*rp;

	max=n*2;
	ap=a;
	rp=r;
	rp[0]=rp[max-1]=0;
	rp++;
	j=n;

	if (--j > 0)
		{
		ap++;
		rp[j]=bn_mul_words(rp,ap,j,ap[-1]);
		rp+=2;
		}

	for (i=n-2; i>0; i--)
		{
		j--;
		ap++;
		rp[j]=bn_mul_add_words(rp,ap,j,ap[-1]);
		rp+=2;
		}

	bn_add_words(r,r,r,max);

	/* There will not be a carry */

	bn_sqr_words(tmp,a,n);

	bn_add_words(r,r,tmp,max);
	}

/* r is 2*n words in size,
 * a and b are both n words in size.
 * n must be a power of 2.
 * We multiply and return the result.
 * t must be 2*n words in size
 * We calulate
 * a[0]*b[0]
 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
 * a[1]*b[1]
 */
void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *t)
	{
	int n=n2/2;
	int zero,c1;
	BN_ULONG ln,lo,*p;

	if (n2 == 4)
		{
		bn_sqr_normal(r,a,4,t);
		return;
		}
	else if (n2 == 8)
		{
		bn_sqr_normal(r,a,8,t);
		return;
		}
	if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
		{
		bn_sqr_normal(r,a,n2,t);
		return;
		}
	/* r=(a[0]-a[1])*(a[1]-a[0]) */
	c1=bn_cmp_words(a,&(a[n]),n);
	zero=0;
	if (c1 > 0)
		bn_sub_words(t,a,&(a[n]),n);
	else if (c1 < 0)
		bn_sub_words(t,&(a[n]),a,n);
	else
		zero=1;

	/* The result will always be negative unless it is zero */
	p= &(t[n2*2]);

	if (!zero)
		bn_sqr_recursive(&(t[n2]),t,n,p);
	else
		memset(&(t[n2]),0,n*sizeof(BN_ULONG));
	bn_sqr_recursive(r,a,n,p);
	bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);

	/* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
	 * r[10] holds (a[0]*b[0])
	 * r[32] holds (b[1]*b[1])
	 */

	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));

	/* t[32] is negative */
	c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));

	/* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
	 * r[10] holds (a[0]*a[0])
	 * r[32] holds (a[1]*a[1])
	 * c1 holds the carry bits
	 */
	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
	if (c1)
		{
		p= &(r[n+n2]);
		lo= *p;
		ln=(lo+c1)&BN_MASK2;
		*p=ln;

		/* The overflow will stop before we over write
		 * words we should not overwrite */
		if (ln < (BN_ULONG)c1)
			{
			do	{
				p++;
				lo= *p;
				ln=(lo+1)&BN_MASK2;
				*p=ln;
				} while (ln == 0);
			}
		}
	}
/* crypto/bn/bn_word.c */

BN_ULONG BN_mod_word(BIGNUM *a, BN_ULONG w)
	{
	BN_ULONG ret=0;
	int i;

	w&=BN_MASK2;
	for (i=a->top-1; i>=0; i--)
		{
		ret=((ret<<BN_BITS4)|((a->d[i]>>BN_BITS4)&BN_MASK2l))%w;
		ret=((ret<<BN_BITS4)|(a->d[i]&BN_MASK2l))%w;
		}
	return((BN_ULONG)ret);
	}

BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
	{
	BN_ULONG ret;
	int i;

	if (a->top == 0) return(0);
	ret=0;
	w&=BN_MASK2;
	for (i=a->top-1; i>=0; i--)
		{
		BN_ULONG l,d;
		
		l=a->d[i];
		d=bn_div_words(ret,l,w);
		ret=(l-((d*w)&BN_MASK2))&BN_MASK2;
		a->d[i]=d;
		}
	if ((a->top > 0) && (a->d[a->top-1] == 0))
		a->top--;
	return(ret);
	}

int BN_add_word(BIGNUM *a, BN_ULONG w)
	{
	BN_ULONG l;
	int i;

	if (a->neg)
		{
		a->neg=0;
		i=BN_sub_word(a,w);
		if (!BN_is_zero(a))
			a->neg=1;
		return(i);
		}
	w&=BN_MASK2;
	if (bn_wexpand(a,a->top+1) == NULL) return(0);
	i=0;
	for (;;)
		{
		l=(a->d[i]+(BN_ULONG)w)&BN_MASK2;
		a->d[i]=l;
		if (w > l)
			w=1;
		else
			break;
		i++;
		}
	if (i >= a->top)
		a->top++;
	return(1);
	}

int BN_sub_word(BIGNUM *a, BN_ULONG w)
	{
	int i;

	if (a->neg)
		{
		a->neg=0;
		i=BN_add_word(a,w);
		a->neg=1;
		return(i);
		}

	w&=BN_MASK2;
	if ((a->top == 1) && (a->d[0] < w))
		{
		a->d[0]=w-a->d[0];
		a->neg=1;
		return(1);
		}
	i=0;
	for (;;)
		{
		if (a->d[i] >= w)
			{
			a->d[i]-=w;
			break;
			}
		else
			{
			a->d[i]=(a->d[i]-w)&BN_MASK2;
			i++;
			w=1;
			}
		}
	if ((a->d[i] == 0) && (i == (a->top-1)))
		a->top--;
	return(1);
	}

int BN_mul_word(BIGNUM *a, BN_ULONG w)
	{
	BN_ULONG ll;

	w&=BN_MASK2;
	if (a->top)
		{
		ll=bn_mul_words(a->d,a->d,a->top,w);
		if (ll)
			{
			if (bn_wexpand(a,a->top+1) == NULL) return(0);
			a->d[a->top++]=ll;
			}
		}
	return(1);
        }

int BN_print (const BIGNUM *a)
{
    int i,j,v,z=0;
    int ret=0;

    if (a->neg)      putc ('-', stdout);
    if (a->top == 0) putc ('0',stdout);

    for (i=a->top-1; i >=0; i--)
    {
        for (j=BN_BITS2-4; j >= 0; j-=4)
        {
            /* strip leading zeros */
            v=((int)(a->d[i]>>(long)j))&0x0f;
            if (z || (v != 0))
            {
                putc (Hex[v], stdout);
                z=1;
            }
        }
    }
    ret=1;
    return(ret);
}

