 
Return to BSD News archive
Received: by minnie.vk1xwt.ampr.org with NNTP
	id AA882 ; Mon, 08 Feb 93 22:00:44 EST
Newsgroups: comp.unix.bsd
Path: sserve!manuel.anu.edu.au!munnari.oz.au!hp9000.csc.cuhk.hk!saimiri.primate.wisc.edu!ames!pacbell.com!amdahl!amdcad!BitBlocks.com!bvs
From: bvs@BitBlocks.com (Bakul Shah)
Subject: Re: 386BSD: i386/i386/in_cksum.c
Message-ID: <C23q7A.9tq@BitBlocks.com>
Organization: Bit Blocks, Inc.
References: <DERAADT.93Feb4032813@newt.newt.cuc.ab.ca>
Date: Sun, 7 Feb 1993 23:21:09 GMT
Lines: 238
deraadt@newt.cuc.ab.ca (Theo de Raadt) writes:
>RE: the discussion at the 386BSD BOF at Usenix, has anyone got a 386
>specific version of in_cksum.c?
>It does not have to be fully written in assembly. Actually, even just
>unrolling it with the carry-bit instructions as shown by the
>NET2 vax/vax/in_cksum.c should be sufficient, I <suspect>.
[I am reposting most of an old article.... ]
The enclosed in_cksum.c is about 5 times faster for 128 byte
mbufs (and upto 10 times faster for large mbufs).  However, its
effect on the overall throughput is rather small; which is not
too surprising.  The best ttcp performance I get with the new
in_cksum is about 264KB/s (SunOS3.5/Sun 3 -> 25Mhz i486 +
WE8013EPC) & 242KB/s (pc->sun), up from 256KB/s & 238KB/s.
This in_cksum is derived from the CCI version of the same from
the tahoe release.  All I had to do was replace the carry related
macros with i386 specific ones (+ I made some cosmetic changes).
Thanks to gcc I didn't have to guess which registers contained
the checksum and the mbuf data ptr!  Use at your own risk.  If
you do decide to use it, make sure you move the original in_cksum.c
out of the way or else this shar won't overwrite it.
-- Bakul Shah <bvs@BitBlocks.com>
#!/bin/sh
# This is a shell archive (produced by shar 3.49)
# To extract the files from this archive, save it to a file, remove
# everything above the "!/bin/sh" line above, and type "sh file_name".
#
# made 07/25/1992 22:47 UTC by bvs@bitblocks.com
# Source directory /kabul/src/sys.386bsd
#
# existing files will NOT be overwritten unless -c is specified
#
# This shar contains:
# length  mode       name
# ------ ---------- ------------------------------------------
#   5048 -rw-rw-r-- i386/i386/in_cksum.c
#
# ============= i386/i386/in_cksum.c ==============
if test ! -d 'i386'; then
    echo 'x - creating directory i386'
    mkdir 'i386'
fi
if test ! -d 'i386/i386'; then
    echo 'x - creating directory i386/i386'
    mkdir 'i386/i386'
fi
if test -f 'i386/i386/in_cksum.c' -a X"$1" != X"-c"; then
	echo 'x - skipping i386/i386/in_cksum.c (File already exists)'
else
echo 'x - extracting i386/i386/in_cksum.c (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'i386/i386/in_cksum.c' &&
/*-
X * Copyright (c) 1990 The Regents of the University of California.
X * All rights reserved.
X *
X * Redistribution and use in source and binary forms, with or without
X * modification, are permitted provided that the following conditions
X * are met:
X * 1. Redistributions of source code must retain the above copyright
X *    notice, this list of conditions and the following disclaimer.
X * 2. Redistributions in binary form must reproduce the above copyright
X *    notice, this list of conditions and the following disclaimer in the
X *    documentation and/or other materials provided with the distribution.
X * 3. All advertising materials mentioning features or use of this software
X *    must display the following acknowledgement:
X *	This product includes software developed by the University of
X *	California, Berkeley and its contributors.
X * 4. Neither the name of the University nor the names of its contributors
X *    may be used to endorse or promote products derived from this software
X *    without specific prior written permission.
X *
X * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
X * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
X * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
X * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
X * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
X * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
X * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
X * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
X * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
X * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
X * SUCH DAMAGE.
X *
X * from tahoe:	in_cksum.c	1.2	86/01/05
X *	@(#)in_cksum.c	1.3 (Berkeley) 1/19/91
X *
X * 920724 	i386 changes by Bakul Shah <bvs@bitblocks.com>
X */
X
#include "param.h"
#include "sys/mbuf.h"
X
/*
X * Checksum routine for Internet Protocol family headers.
X *
X * This routine is very heavily used in the network
X * code and should be modified for each CPU to be as fast as possible.
X * 
X * This implementation is 386 version.
X */
X
#undef	ADDCARRY
#define ADDCARRY(x)     if ((x) > 0xffff) (x) -= 0xffff
#define REDUCE          {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
X
/*
X * Thanks to gcc we don't have to guess
X * which registers contain sum & w.
X */
#define CLC     asm("clc")
#define ADD(n)  asm("adcl " #n "(%2), %0": "=r"(sum): "0"(sum), "r"(w))
#define MOP     asm("adcl $0, %0":         "=r"(sum): "0"(sum))
X
in_cksum(m, len)
X	register struct mbuf *m;
X	register int len;
{
X	register u_short *w;
X	register unsigned sum = 0;
X	register int mlen = 0;
X	int byte_swapped = 0;
X	union { char	c[2]; u_short	s; } su;
X
X	for (;m && len; m = m->m_next) {
X		if (m->m_len == 0)
X			continue;
X		w = mtod(m, u_short *);
X		if (mlen == -1) {
X			/*
X			 * The first byte of this mbuf is the continuation
X			 * of a word spanning between this mbuf and the
X			 * last mbuf.
X			 */
X
X			/* su.c[0] is already saved when scanning previous 
X			 * mbuf.  sum was REDUCEd when we found mlen == -1
X			 */
X			su.c[1] = *(u_char *)w;
X			sum += su.s;
X			w = (u_short *)((char *)w + 1);
X			mlen = m->m_len - 1;
X			len--;
X		} else
X			mlen = m->m_len;
X		if (len < mlen)
X			mlen = len;
X		len -= mlen;
X		/*
X		 * Force to long boundary so we do longword aligned
X		 * memory operations
X		 */
X		if (3 & (int) w) {
X			REDUCE;
X			if ((1 & (int) w) && (mlen > 0)) {
X				sum <<= 8;
X				su.c[0] = *(char *)w;
X				w = (u_short *)((char *)w + 1);
X				mlen--;
X				byte_swapped = 1;
X			}
X			if ((2 & (int) w) && (mlen >= 2)) {
X				sum += *w++;
X				mlen -= 2;
X			}
X		}
X		/*
X		 * Do as much of the checksum as possible 32 bits at at time.
X		 * In fact, this loop is unrolled to make overhead from
X		 * branches &c small.
X		 */
X		while ((mlen -= 32) >= 0) {
X			/*
X			 * Clear the carry flag, add with carry 16 words
X			 * and fold-in last carry by adding a 0 with carry.
X			 */
X			CLC;
X			ADD(0);  ADD(4);  ADD(8);  ADD(12);
X			ADD(16); ADD(20); ADD(24); ADD(28);
X			MOP; w += 16;
X		}
X		mlen += 32;
X		while ((mlen -= 8) >= 0) {
X			CLC;
X			ADD(0); ADD(4);
X			MOP;
X			w += 4;
X		}
X		mlen += 8;
X		if (mlen == 0 && byte_swapped == 0)
X			continue;       /* worth 1% maybe ?? */
X		REDUCE;
X		while ((mlen -= 2) >= 0) {
X			sum += *w++;
X		}
X		if (byte_swapped) {
X			sum <<= 8;
X			byte_swapped = 0;
X			if (mlen == -1) {
X				su.c[1] = *(char *)w;
X				sum += su.s;
X				mlen = 0;
X			} else
X				mlen = -1;
X		} else if (mlen == -1)
X			/*
X			 * This mbuf has odd number of bytes.
X			 * There could be a word split betwen
X			 * this mbuf and the next mbuf.
X			 * Save the last byte (to prepend to next mbuf).
X			 */
X			su.c[0] = *(char *)w;
X	}
X
X	if (len)
X		printf("cksum: out of data\n");
X	if (mlen == -1) {
X		/* The last mbuf has odd # of bytes. Follow the
X		   standard (the odd byte is shifted left by 8 bits) */
X		su.c[1] = 0;
X		sum += su.s;
X	}
X	REDUCE;
X	return (~sum & 0xffff);
}
SHAR_EOF
chmod 0664 i386/i386/in_cksum.c ||
echo 'restore of i386/i386/in_cksum.c failed'
Wc_c="`wc -c < 'i386/i386/in_cksum.c'`"
test 5048 -eq "$Wc_c" ||
	echo 'i386/i386/in_cksum.c: original size 5048, current size' "$Wc_c"
fi
exit 0