add directory gnu

2024-02-19 00:24:47 -05:00
parent 32616db5a4
commit a40f4cadb0
5086 changed files with 1860970 additions and 0 deletions
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/DEFS.h
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/DEFS.h
@@ -0,0 +1,4 @@
+#define	FUNC(name)	\
+	.global name;	\
+	.align 4;	\
+	name:
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/Dist
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/Dist
@@ -0,0 +1 @@
+DEFS.h divrem.m4 mul.S umul.S
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/Implies
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/Implies
@@ -0,0 +1,2 @@
+# SPARC uses IEEE 754 floating point.
+ieee754
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/Makefile
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/Makefile
@@ -0,0 +1,45 @@
+# Copyright (C) 1991 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public License
+# as published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+
+# You should have received a copy of the GNU Library General Public
+# License along with the GNU C Library; see the file COPYING.LIB.  If
+# not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+# Cambridge, MA 02139, USA.
+
+ifeq ($(subdir),gnulib)
+
+divrem := sdiv udiv rem urem
+routines := mul umul $(divrem) 
+nodist-routines = $(divrem)
+
+divrem-NAME-sdiv := div
+divrem-NAME-udiv := udiv
+divrem-NAME-rem := rem
+divrem-NAME-urem := urem
+divrem-NAME = $(+divrem-NAME-$(basename $(notdir $@)))
+divrem-OP-div := div
+divrem-OP-udiv := div
+divrem-OP-rem := rem
+divrem-OP-urem := rem
+divrem-S-div := true
+divrem-S-rem := true
+divrem-S-udiv := false
+divrem-S-urem := false
+$(addprefix $(objpfx),$(divrem:%=%.S)): divrem.m4
+	(echo "define(NAME,\`.$(+divrem-NAME)')\
+	       define(OP,\`$(+divrem-OP-$(+divrem-NAME))')\
+	       define(S,\`$(+divrem-S-$(+divrem-NAME))')"; \
+	 cat $<) | m4 > $@-tmp
+	mv $@-tmp $@
+
+endif	# gnulib
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/__longjmp.S
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/__longjmp.S
@@ -0,0 +1,41 @@
+/* Copyright (C) 1991 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include "DEFS.h"
+#include <machine/trap.h>
+
+FUNC (___longjmp)
+	/* Do a "flush register windows trap".  The trap handler in the
+	   kernel writes all the register windows to their stack slots, and
+	   marks them all as invalid (needing to be sucked up from the
+	   stack when used).  This ensures that all information needed to
+	   unwind to these callers is in memory, not in the register
+	   windows.  */
+	ta ST_FLUSH_WINDOWS
+	ld [%o0], %o7		/* Return PC.  */
+	ld [%o0 + 4], %fp
+	sub %fp, 64, %sp
+
+	/* if (%o1 == 0) %o1 = 1; */
+	tst %o1
+	be,a 0f
+	mov 1, %o1
+
+0:	retl
+	/* On the way out, put the return value in %o0.  */
+	restore %o1, 0, %o0
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/divrem.m4
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/divrem.m4
@@ -0,0 +1,228 @@
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  NAME	name of function to generate
+ *  OP		OP=div => %o0 / %o1; OP=rem => %o0 % %o1
+ *  S		S=true => signed; S=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top `decade' of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+define(N, `4')
+define(WORDSIZE, `32')
+define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))
+
+define(dividend, `%o0')
+define(divisor, `%o1')
+define(Q, `%o2')
+define(R, `%o3')
+define(ITER, `%o4')
+define(V, `%o5')
+
+/* m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d */
+define(T, `%g1')
+define(SC, `%g7')
+ifelse(S, `true', `define(SIGN, `%g6')')
+
+/*
+ * This is the recursive definition for developing quotient digits.
+ *
+ * Parameters:
+ *  $1	the current depth, 1 <= $1 <= N
+ *  $2	the current accumulation of quotient bits
+ *  N	max depth
+ *
+ * We add a new bit to $2 and either recurse or insert the bits in
+ * the quotient.  R, Q, and V are inputs and outputs as defined above;
+ * the condition codes are expected to reflect the input R, and are
+ * modified to reflect the output R.
+ */
+define(DEVELOP_QUOTIENT_BITS,
+`	! depth $1, accumulated bits $2
+	bl	L.$1.eval(2^N+$2)
+	srl	V,1,V
+	! remainder is positive
+	subcc	R,V,R
+	ifelse($1, N,
+	`	b	9f
+		add	Q, ($2*2+1), Q
+	', `	DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
+L.$1.eval(2^N+$2):
+	! remainder is negative
+	addcc	R,V,R
+	ifelse($1, N,
+	`	b	9f
+		add	Q, ($2*2-1), Q
+	', `	DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
+	ifelse($1, 1, `9:')')
+
+#include "DEFS.h"
+#include <machine/trap.h>
+
+FUNC(NAME)
+ifelse(S, `true',
+`	! compute sign of result; if neither is negative, no problem
+	orcc	divisor, dividend, %g0	! either negative?
+	bge	2f			! no, go do the divide
+	xor	divisor, dividend, SIGN	! compute sign in any case
+	tst	divisor
+	bge	1f
+	tst	dividend
+	! divisor is definitely negative; dividend might also be negative
+	bge	2f			! if dividend not negative...
+	sub	%g0, divisor, divisor	! in any case, make divisor nonneg
+1:	! dividend is negative, divisor is nonnegative
+	sub	%g0, dividend, dividend	! make dividend nonnegative
+2:
+')
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	divisor, %g0, V
+	bne	1f
+	mov	dividend, R
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	ST_DIV0
+		retl
+		clr	%o0
+
+1:
+	cmp	R, V			! if divisor exceeds dividend, done
+	blu	Lgot_result		! (and algorithm fails otherwise)
+	clr	Q
+	sethi	%hi(1 << (WORDSIZE - TOPBITS - 1)), T
+	cmp	R, T
+	blu	Lnot_really_big
+	clr	ITER
+
+	! `Here the dividend is >= 2^(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.'
+	1:
+		cmp	V, T
+		bgeu	3f
+		mov	1, SC
+		sll	V, N, V
+		b	1b
+		add	ITER, 1, ITER
+
+	! Now compute SC.
+	2:	addcc	V, V, V
+		bcc	Lnot_too_big
+		add	SC, 1, SC
+
+		! We get here if the divisor overflowed while shifting.
+		! This means that R has the high-order bit set.
+		! Restore V and subtract from R.
+		sll	T, TOPBITS, T	! high order bit
+		srl	V, 1, V		! rest of V
+		add	V, T, V
+		b	Ldo_single_div
+		sub	SC, 1, SC
+
+	Lnot_too_big:
+	3:	cmp	V, R
+		blu	2b
+		nop
+		be	Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! V > R: went too far: back up 1 step
+	!	srl	V, 1, V
+	!	dec	SC
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that R >= V, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if R >= 0.  Because both R and V may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	Ldo_single_div:
+		subcc	SC, 1, SC
+		bl	Lend_regular_divide
+		nop
+		sub	R, V, R
+		mov	1, Q
+		b	Lend_single_divloop
+		nop
+	Lsingle_divloop:
+		sll	Q, 1, Q
+		bl	1f
+		srl	V, 1, V
+		! R >= 0
+		sub	R, V, R
+		b	2f
+		add	Q, 1, Q
+	1:	! R < 0
+		add	R, V, R
+		sub	Q, 1, Q
+	2:
+	Lend_single_divloop:
+		subcc	SC, 1, SC
+		bge	Lsingle_divloop
+		tst	R
+		b,a	Lend_regular_divide
+
+Lnot_really_big:
+1:
+	sll	V, N, V
+	cmp	V, R
+	bleu	1b
+	addcc	ITER, 1, ITER
+	be	Lgot_result
+	sub	ITER, 1, ITER
+
+	tst	R	! set up for initial iteration
+Ldivloop:
+	sll	Q, N, Q
+	DEVELOP_QUOTIENT_BITS(1, 0)
+Lend_regular_divide:
+	subcc	ITER, 1, ITER
+	bge	Ldivloop
+	tst	R
+	bl,a	Lgot_result
+	! non-restoring fixup here (one instruction only!)
+ifelse(OP, `div',
+`	sub	Q, 1, Q
+', `	add	R, divisor, R
+')
+
+Lgot_result:
+ifelse(S, `true',
+`	! check to see if answer should be < 0
+	tst	SIGN
+	bl,a	1f
+	ifelse(OP, `div', `sub %g0, Q, Q', `sub %g0, R, R')
+1:')
+	retl
+	ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/jmp_buf.h
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/jmp_buf.h
@@ -0,0 +1,9 @@
+/* Define the machine-dependent type `jmp_buf'.  Sun 4 version.  */
+
+typedef struct
+  {
+    /* Return PC (register o7).  */
+    PTR __pc;
+    /* Saved FP.  */
+    PTR __fp;
+  } __jmp_buf[1];
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/memcopy.h
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/memcopy.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 1991 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include <sysdeps/generic/memcopy.h>
+#undef	reg_char
+#define	reg_char	int
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/mul.S
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/mul.S
@@ -0,0 +1,113 @@
+/*
+ * Signed multiply, from Appendix E of the Sparc Version 8
+ * Architecture Manual.
+ */
+
+/*
+ * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of
+ * the 64-bit product).
+ *
+ * This code optimizes short (less than 13-bit) multiplies.
+ */
+
+#include "DEFS.h"
+FUNC(.mul)
+	mov	%o0, %y		! multiplier -> Y
+	andncc	%o0, 0xfff, %g0	! test bits 12..31
+	be	Lmul_shortway	! if zero, can do it the short way
+	andcc	%g0, %g0, %o4	! zero the partial product and clear N and V
+
+	/*
+	 * Long multiply.  32 steps, followed by a final shift step.
+	 */
+	mulscc	%o4, %o1, %o4	! 1
+	mulscc	%o4, %o1, %o4	! 2
+	mulscc	%o4, %o1, %o4	! 3
+	mulscc	%o4, %o1, %o4	! 4
+	mulscc	%o4, %o1, %o4	! 5
+	mulscc	%o4, %o1, %o4	! 6
+	mulscc	%o4, %o1, %o4	! 7
+	mulscc	%o4, %o1, %o4	! 8
+	mulscc	%o4, %o1, %o4	! 9
+	mulscc	%o4, %o1, %o4	! 10
+	mulscc	%o4, %o1, %o4	! 11
+	mulscc	%o4, %o1, %o4	! 12
+	mulscc	%o4, %o1, %o4	! 13
+	mulscc	%o4, %o1, %o4	! 14
+	mulscc	%o4, %o1, %o4	! 15
+	mulscc	%o4, %o1, %o4	! 16
+	mulscc	%o4, %o1, %o4	! 17
+	mulscc	%o4, %o1, %o4	! 18
+	mulscc	%o4, %o1, %o4	! 19
+	mulscc	%o4, %o1, %o4	! 20
+	mulscc	%o4, %o1, %o4	! 21
+	mulscc	%o4, %o1, %o4	! 22
+	mulscc	%o4, %o1, %o4	! 23
+	mulscc	%o4, %o1, %o4	! 24
+	mulscc	%o4, %o1, %o4	! 25
+	mulscc	%o4, %o1, %o4	! 26
+	mulscc	%o4, %o1, %o4	! 27
+	mulscc	%o4, %o1, %o4	! 28
+	mulscc	%o4, %o1, %o4	! 29
+	mulscc	%o4, %o1, %o4	! 30
+	mulscc	%o4, %o1, %o4	! 31
+	mulscc	%o4, %o1, %o4	! 32
+	mulscc	%o4, %g0, %o4	! final shift
+
+	! If %o0 was negative, the result is
+	!	(%o0 * %o1) + (%o1 << 32))
+	! We fix that here.
+
+	tst	%o0
+	bge	1f
+	rd	%y, %o0
+
+	! %o0 was indeed negative; fix upper 32 bits of result by subtracting 
+	! %o1 (i.e., return %o4 - %o1 in %o1).
+	retl
+	sub	%o4, %o1, %o1
+
+1:
+	retl
+	mov	%o4, %o1
+
+Lmul_shortway:
+	/*
+	 * Short multiply.  12 steps, followed by a final shift step.
+	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+	 * but there is no problem with %o0 being negative (unlike above).
+	 */
+	mulscc	%o4, %o1, %o4	! 1
+	mulscc	%o4, %o1, %o4	! 2
+	mulscc	%o4, %o1, %o4	! 3
+	mulscc	%o4, %o1, %o4	! 4
+	mulscc	%o4, %o1, %o4	! 5
+	mulscc	%o4, %o1, %o4	! 6
+	mulscc	%o4, %o1, %o4	! 7
+	mulscc	%o4, %o1, %o4	! 8
+	mulscc	%o4, %o1, %o4	! 9
+	mulscc	%o4, %o1, %o4	! 10
+	mulscc	%o4, %o1, %o4	! 11
+	mulscc	%o4, %o1, %o4	! 12
+	mulscc	%o4, %g0, %o4	! final shift
+
+	/*
+	 *  %o4 has 20 of the bits that should be in the low part of the
+	 * result; %y has the bottom 12 (as %y's top 12).  That is:
+	 *
+	 *	  %o4		    %y
+	 * +----------------+----------------+
+	 * | -12- |   -20-  | -12- |   -20-  |
+	 * +------(---------+------)---------+
+	 *  --hi-- ----low-part----
+	 *
+	 * The upper 12 bits of %o4 should be sign-extended to form the
+	 * high part of the product (i.e., highpart = %o4 >> 20).
+	 */
+
+	rd	%y, %o5
+	sll	%o4, 12, %o0	! shift middle bits left 12
+	srl	%o5, 20, %o5	! shift low bits right 20, zero fill at left
+	or	%o5, %o0, %o0	! construct low part of result
+	retl
+	sra	%o4, 20, %o1	! ... and extract high part of result
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/setjmp.S
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/setjmp.S
@@ -0,0 +1,26 @@
+/* Copyright (C) 1991 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include "DEFS.h"
+
+FUNC (___setjmp)
+	/* Save our return PC and SP.  */
+	st %o7, [%o0]
+	st %sp, [%o0 + 4]
+	retl
+	clr %o0
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/sqrt.c
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/sqrt.c
@@ -0,0 +1,34 @@
+/* Copyright (C) 1991, 1992 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include <ansidecl.h>
+#include <errno.h>
+#include <math.h>
+
+#ifndef	__GNUC__
+  #error This file uses GNU C extensions; you must compile with GCC.
+#endif
+
+/* Return the square root of X.  */
+double
+DEFUN(sqrt, (x), double x)
+{
+  register double result;
+  asm("fsqrtd %1, %0" : "=f" (result) : "f" (x));
+  return result;
+}
--- a/gnu/glibc/glibc-1.03/sysdeps/sparc/umul.S
+++ b/gnu/glibc/glibc-1.03/sysdeps/sparc/umul.S
@@ -0,0 +1,144 @@
+/*
+ * Unsigned multiply.  Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
+ * upper 32 bits of the 64-bit product).
+ *
+ * This code optimizes short (less than 13-bit) multiplies.  Short
+ * multiplies require 25 instruction cycles, and long ones require
+ * 45 instruction cycles.
+ *
+ * On return, overflow has occurred (%o1 is not zero) if and only if
+ * the Z condition code is clear, allowing, e.g., the following:
+ *
+ *	call	.umul
+ *	nop
+ *	bnz	overflow	(or tnz)
+ */
+
+#include "DEFS.h"
+FUNC(.umul)
+	or	%o0, %o1, %o4
+	mov	%o0, %y		! multiplier -> Y
+	andncc	%o4, 0xfff, %g0	! test bits 12..31 of *both* args
+	be	Lmul_shortway	! if zero, can do it the short way
+	andcc	%g0, %g0, %o4	! zero the partial product and clear N and V
+
+	/*
+	 * Long multiply.  32 steps, followed by a final shift step.
+	 */
+	mulscc	%o4, %o1, %o4	! 1
+	mulscc	%o4, %o1, %o4	! 2
+	mulscc	%o4, %o1, %o4	! 3
+	mulscc	%o4, %o1, %o4	! 4
+	mulscc	%o4, %o1, %o4	! 5
+	mulscc	%o4, %o1, %o4	! 6
+	mulscc	%o4, %o1, %o4	! 7
+	mulscc	%o4, %o1, %o4	! 8
+	mulscc	%o4, %o1, %o4	! 9
+	mulscc	%o4, %o1, %o4	! 10
+	mulscc	%o4, %o1, %o4	! 11
+	mulscc	%o4, %o1, %o4	! 12
+	mulscc	%o4, %o1, %o4	! 13
+	mulscc	%o4, %o1, %o4	! 14
+	mulscc	%o4, %o1, %o4	! 15
+	mulscc	%o4, %o1, %o4	! 16
+	mulscc	%o4, %o1, %o4	! 17
+	mulscc	%o4, %o1, %o4	! 18
+	mulscc	%o4, %o1, %o4	! 19
+	mulscc	%o4, %o1, %o4	! 20
+	mulscc	%o4, %o1, %o4	! 21
+	mulscc	%o4, %o1, %o4	! 22
+	mulscc	%o4, %o1, %o4	! 23
+	mulscc	%o4, %o1, %o4	! 24
+	mulscc	%o4, %o1, %o4	! 25
+	mulscc	%o4, %o1, %o4	! 26
+	mulscc	%o4, %o1, %o4	! 27
+	mulscc	%o4, %o1, %o4	! 28
+	mulscc	%o4, %o1, %o4	! 29
+	mulscc	%o4, %o1, %o4	! 30
+	mulscc	%o4, %o1, %o4	! 31
+	mulscc	%o4, %o1, %o4	! 32
+	mulscc	%o4, %g0, %o4	! final shift
+
+
+	/*
+	 * Normally, with the shift-and-add approach, if both numbers are
+	 * positive you get the correct result.  WIth 32-bit two's-complement
+	 * numbers, -x is represented as
+	 *
+	 *		  x		    32
+	 *	( 2  -  ------ ) mod 2  *  2
+	 *		   32
+	 *		  2
+	 *
+	 * (the `mod 2' subtracts 1 from 1.bbbb).  To avoid lots of 2^32s,
+	 * we can treat this as if the radix point were just to the left
+	 * of the sign bit (multiply by 2^32), and get
+	 *
+	 *	-x  =  (2 - x) mod 2
+	 *
+	 * Then, ignoring the `mod 2's for convenience:
+	 *
+	 *   x *  y	= xy
+	 *  -x *  y	= 2y - xy
+	 *   x * -y	= 2x - xy
+	 *  -x * -y	= 4 - 2x - 2y + xy
+	 *
+	 * For signed multiplies, we subtract (x << 32) from the partial
+	 * product to fix this problem for negative multipliers (see mul.s).
+	 * Because of the way the shift into the partial product is calculated
+	 * (N xor V), this term is automatically removed for the multiplicand,
+	 * so we don't have to adjust.
+	 *
+	 * But for unsigned multiplies, the high order bit wasn't a sign bit,
+	 * and the correction is wrong.  So for unsigned multiplies where the
+	 * high order bit is one, we end up with xy - (y << 32).  To fix it
+	 * we add y << 32.
+	 */
+	tst	%o1
+	bl,a	1f		! if %o1 < 0 (high order bit = 1),
+	add	%o4, %o0, %o4	! %o4 += %o0 (add y to upper half)
+1:	rd	%y, %o0		! get lower half of product
+	retl
+	addcc	%o4, %g0, %o1	! put upper half in place and set Z for %o1==0
+
+Lmul_shortway:
+	/*
+	 * Short multiply.  12 steps, followed by a final shift step.
+	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+	 * but there is no problem with %o0 being negative (unlike above),
+	 * and overflow is impossible (the answer is at most 24 bits long).
+	 */
+	mulscc	%o4, %o1, %o4	! 1
+	mulscc	%o4, %o1, %o4	! 2
+	mulscc	%o4, %o1, %o4	! 3
+	mulscc	%o4, %o1, %o4	! 4
+	mulscc	%o4, %o1, %o4	! 5
+	mulscc	%o4, %o1, %o4	! 6
+	mulscc	%o4, %o1, %o4	! 7
+	mulscc	%o4, %o1, %o4	! 8
+	mulscc	%o4, %o1, %o4	! 9
+	mulscc	%o4, %o1, %o4	! 10
+	mulscc	%o4, %o1, %o4	! 11
+	mulscc	%o4, %o1, %o4	! 12
+	mulscc	%o4, %g0, %o4	! final shift
+
+	/*
+	 * %o4 has 20 of the bits that should be in the result; %y has
+	 * the bottom 12 (as %y's top 12).  That is:
+	 *
+	 *	  %o4		    %y
+	 * +----------------+----------------+
+	 * | -12- |   -20-  | -12- |   -20-  |
+	 * +------(---------+------)---------+
+	 *	   -----result-----
+	 *
+	 * The 12 bits of %o4 left of the `result' area are all zero;
+	 * in fact, all top 20 bits of %o4 are zero.
+	 */
+
+	rd	%y, %o5
+	sll	%o4, 12, %o0	! shift middle bits left 12
+	srl	%o5, 20, %o5	! shift low bits right 20
+	or	%o5, %o0, %o0
+	retl
+	addcc	%g0, %g0, %o1	! %o1 = zero, and set Z