*BSD News Article 8898

Newsgroups: comp.unix.bsd
Path: sserve!manuel.anu.edu.au!munnari.oz.au!metro!ipso!runxtsa!bde
From: bde@runx.oz.au (Bruce Evans)
Subject: Re: [386bsd]  How To Nuke 386bsd!!!
Message-ID: <1992Dec15.210710.28798@runx.oz.au>
Organization: RUNX Un*x Timeshare.  Sydney, Australia.
References: <ByntK9.1D6@news.chalmers.se> <1992Dec7.000113.20304@mel.dit.csiro.au> <1992Dec7.034604.14841@netcom.com>
Date: Tue, 15 Dec 92 21:07:10 GMT
Lines: 182

In article <1992Dec7.034604.14841@netcom.com> hasty@netcom.com (Amancio Hasty Jr) writes:
>[...]
>The problem maybe related to the way that you motherboard handles
>floating point exception. Try a simple program that does a floating
>point divide by 0.0. If the system re-boots you got a bad motherboard.

There are so many bugs in the kernel and libraries involving floating
point exceptions that it's hard to tell if the motherboard is bad.  I
seem to have a "bad" 486 motherboard (386BSD-0.1 panics) but I have
rewritten the exception handler so that there are no problems.

Here is a test program that does a lot of floating point divides by 0.0
and panics 386BSD-0.1 on a bad (any?) motherboard fairly quickly.  Compile
it with -DMAX_FAILURES=16 to limit the output.  Comment out any test that
always fails to emphasize the other tests.

I am interested in how non-386BSD 386-486 systems handle this test.  It
requires gcc to compile so I haven't been able to run it on any, but
previous tests suggest that failure of all the "no-wait" tests except
the one for fnclex is typical, and when the fnclex test doesn't fail all
the time, it fails intermittently.

---
#include <signal.h>
#include <stdio.h>

#define TEST(x, y) ( ++tests, setup(), ({ asm(x); 0; }), check(x, y) )
#define CW_ZM	(1 << 2)	/* divide by zero mask */
#define SW_BUSY	(1 << 15)	/* FPU busy */
#define SW_ES	(1 << 7)	/* exception summary */
#define SW_ZE	(1 << 2)	/* divide by zero (pending) exception seen */

static double double_in_mem;
static unsigned short fp_cw;
static unsigned fp_env[7];
static unsigned fp_state[7 + 8 * 10 / sizeof(unsigned)];
static unsigned short fp_sw;
static unsigned failures;
static unsigned long tests;
static volatile /* sig_atomic_t */ int sigfpe_handled;
static volatile /* sig_atomic_t */ int sigint_handled;

static void delay(void);
static int check(char *insn, int sigfpe_expected);
static void setup(void);
static void sigfpe_handler(int sig_num);
static void sigint_handler(int sig_num);

static int check(char *insn, int sigfpe_expected)
{
    if (sigfpe_handled)
    {
	signal(SIGFPE, sigfpe_handler);
	sigfpe_handled = 0;
	if (sigfpe_expected)
	    return 1;
    }
    else if (!sigfpe_expected)
	return 1;
    ++failures;
    fprintf(stderr, "T %lu F %u: %s SIGFPE for `%s'\n",
	    tests, failures, sigfpe_expected ? "no" : "  ", insn);
    return 0;
}

static void delay(void)
{
    volatile unsigned countdown;

    for (countdown = 100; countdown != 0; --countdown)
	;
}

int main(argc, argv)
int argc;
char **argv;
{
    signal(SIGINT, sigint_handler);
#ifdef MAX_FAILURES
    while (failures < MAX_FAILURES && !sigint_handled)
#else
    while (!sigint_handled)
#endif
    {
	/*
	 * Cause a divide by zero error.  This should not trigger an exception.
	 * The next no-wait FP instruction should trigger the exception.
	 */
	if (TEST("fldz; fld1; fdiv %st,%st(1)", 0))
	{
	    /*
	     * The wait instruction should always trigger a pending exception.
	     *
	     * One way for this to fail is if the kernel uses CR0_EM instead
	     * of CR0_TS | CR0_MP to handle FP context switching.  This fails
	     * to trap fwaits immeditatly after an FP context switch.  It is
	     * especially bad when FP is being emulated.  Then all fwaits are
	     * ignored!
	     */
	    TEST("fldz; fld1; fdiv %st,%st(1); call _delay; fwait", 1);

	    /*
	     * No-wait instructions should never trigger a pending exception.
	     *
	     * On my 486 system, they are all broken when the IRQ13 FP
	     * exception reporting method is used.  On at least one 386 system,
	     * fnclex usually works but some of the others are broken, and
	     * fnclex fails after a context switch, presumably because frstor
	     * fails in the kernel.
	     */
	    TEST("fldz; fld1; fdiv %st,%st(1); fninit", 0);
	    TEST("fldz; fld1; fdiv %st,%st(1); fnstcw _fp_cw", 0);
	    TEST("fldz; fld1; fdiv %st,%st(1); fnstsw _fp_sw", 0);
	    TEST("fldz; fld1; fdiv %st,%st(1); fnclex", 0);
	    TEST("fldz; fld1; fdiv %st,%st(1); fnstenv _fp_env", 0);
	    TEST("fldz; fld1; fdiv %st,%st(1); fnsave _fp_state", 0);
	}

	/*
	 * fldenv and frstor of an error state should not trigger an exception,
	 * and they should not lose the pending exception.  Fake the pending
	 * exception so that these tests can be done even if the tests for
	 * fnstenv and fnsave of the pending exception failed.
	 */
	setup();
	asm("fnstenv _fp_env");	/* an almost clean env */
	fp_env[1] |= SW_BUSY | SW_ES | SW_ZE;	/* fake excepttion */
	++tests;
	asm("fldenv _fp_env");
	if (check("fldenv of pending exception", 0))
	{
	    delay();
	    asm("fwait");
	    ++tests;
	    check("fwait after fldenv of pending exception", 1);
	}
	setup();
	asm("fnsave _fp_state");	/* an almost clean state */
	fp_state[1] |= SW_BUSY | SW_ES | SW_ZE;	/* fake excepttion */
	++tests;
	asm("frstor _fp_state");
	if (check("frstor of pending exception", 0))
	{
	    delay();
	    asm("fwait");
	    ++tests;
	    check("fwait after frstor of pending exception", 1);
	}

	/*
	 * fstpl to memory when the FP stack is empty sometimes causes an
	 * IRQ13 a little after the intstruction.  When the fstpl is traced,
	 * the exception appears to come from the trace trap handler!  frstor
	 * of a pending error may also cause an IRQ13 after the instruction.
	 */
	TEST("fstpl _double_in_mem", 0);
    }
    fprintf(stderr, "%lu tests, %u failures\n", tests, failures);
    return failures ? 1 : 0;
}

static void setup(void)
{
    asm("fwait; fninit; fnstcw _fp_cw");
    fp_cw &= ~CW_ZM;
    asm("fldcw _fp_cw");
    signal(SIGFPE, sigfpe_handler);
    sigfpe_handled = 0;
}

static void sigfpe_handler(int sig_num)
{
    sigfpe_handled = 1;
}

static void sigint_handler(int sig_num)
{
    sigint_handled = 1;
}
---
-- 
Bruce Evans  (bde@runx.oz.au)