Received: from mail.onstor.com ([66.201.51.107]) by onstor-exch02.onstor.net with Microsoft SMTPSVC(6.0.3790.3959);
	 Fri, 19 Sep 2008 05:33:05 -0700
Received: from ausesmta2-2.messageone.com ([64.20.241.45]) by mail.onstor.com with Microsoft SMTPSVC(6.0.3790.3959);
	 Fri, 19 Sep 2008 05:33:03 -0700
Received: from ftp.linux-mips.org (ftp.linux-mips.org [213.58.128.207])
	by ausesmta2-2.messageone.com (8.13.8/8.13.8) with ESMTP id m8JCWvHR004863
	for <andy.sharp@onstor.com>; Fri, 19 Sep 2008 07:32:59 -0500
Received: from localhost.localdomain ([127.0.0.1]:34793 "EHLO
	ftp.linux-mips.org") by ftp.linux-mips.org with ESMTP
	id S29042102AbYISLsE (ORCPT <rfc822;andy.sharp@onstor.com>);
	Fri, 19 Sep 2008 12:48:04 +0100
Received: with ECARTIS (v1.0.0; list linux-mips); Fri, 19 Sep 2008 12:47:47 +0100 (BST)
Received: from localhost.localdomain ([127.0.0.1]:32233 "EHLO
	ditditdahdahdah-dahdahdahditdit.dl5rb.org.uk") by ftp.linux-mips.org
	with ESMTP id S29041902AbYISLrp (ORCPT
	<rfc822;linux-mips@linux-mips.org>); Fri, 19 Sep 2008 12:47:45 +0100
Received: from denk.linux-mips.net (denk.linux-mips.net [127.0.0.1])
	by ditditdahdahdah-dahdahdahditdit.dl5rb.org.uk (8.14.2/8.14.1) with ESMTP id m8JBli9l019467;
	Fri, 19 Sep 2008 13:47:44 +0200
Received: (from ralf@localhost)
	by denk.linux-mips.net (8.14.2/8.14.2/Submit) id m8JBlh4h019465;
	Fri, 19 Sep 2008 13:47:43 +0200
Date: 	Fri, 19 Sep 2008 13:47:43 +0200
From: Ralf Baechle <ralf@linux-mips.org>
To: "Maciej W. Rozycki" <macro@linux-mips.org>
Cc: Atsushi Nemoto <anemo@mba.ocn.ne.jp>, u1@terran.org,
        linux-mips@linux-mips.org, netdev@vger.kernel.org
Subject: [PATCH] MIPS checksum fix
Message-ID: <20080919114743.GA19359@linux-mips.org>
References: <Pine.LNX.4.55.0809171104290.17103@cliff.in.clinika.pl> <20080917.222350.41199051.anemo@mba.ocn.ne.jp> <Pine.LNX.4.55.0809171501450.17103@cliff.in.clinika.pl> <20080918.002705.78730226.anemo@mba.ocn.ne.jp> <Pine.LNX.4.55.0809171917580.17103@cliff.in.clinika.pl> <20080918220734.GA19222@linux-mips.org> <Pine.LNX.4.55.0809190112090.22686@cliff.in.clinika.pl> <20080919112304.GB13440@linux-mips.org>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
In-Reply-To: <20080919112304.GB13440@linux-mips.org>
User-Agent: Mutt/1.5.18 (2008-05-17)
X-archive-position: 20552
X-ecartis-version: Ecartis v1.0.0
Sender: linux-mips-bounce@linux-mips.org
Errors-to: linux-mips-bounce@linux-mips.org
X-original-sender: ralf@linux-mips.org
Precedence: bulk
X-list: 	linux-mips
X-MessageOne-Virus-Version: vendor=fsecure engine=4.65.7161:2.4.4,1.2.40,4.0.164 definitions=2008-09-19_08:2008-09-19,2008-09-19,2008-09-19 signatures=0
X-MessageOne-Virus-Scanned: Clean
X-MessageOne-Envelope-Sender: linux-mips-bounce@linux-mips.org
X-MessageOne-Spam-Details: rule=m773emszm_notspam policy=m773emszm score=0 spamscore=0 ipscore=0 phishscore=0 bulkscore=0 adultscore=0 classifier=spam adjust=0 reason=mlx engine=3.1.0-0805090000 definitions=main-0809190048
X-MessageOne-Spam-Score: 0
X-MessageOne-Spam-Bar: 
Return-Path: linux-mips-bounce@linux-mips.org
X-OriginalArrivalTime: 19 Sep 2008 12:33:03.0408 (UTC) FILETIME=[DC1A2B00:01C91A53]

On Fri, Sep 19, 2008 at 01:23:04PM +0200, Ralf Baechle wrote:

> >  I can see you have done the microoptimisation I had in mind meanwhile --
> > thanks for saving me the effort. ;)  There is a delay slot to fill left
> > though -- will you take care of it too?
> 
> Will do - just couldn't be bothered (too) late last night ...

Voila.

I'm interested in test reports of this on all sorts of configurations -
32-bit, 64-bit, big / little endian, R2 processors and pre-R2.  In
particular Cavium being the only MIPS64 R2 implementation would be
interesting.  This definately is stuff which should go upstream for 2.6.27.

  Ralf

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 8d77841..eac0d61 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -53,12 +53,14 @@
 #define UNIT(unit)  ((unit)*NBYTES)
 
 #define ADDC(sum,reg)						\
-	.set	push;						\
-	.set	noat;						\
 	ADD	sum, reg;					\
 	sltu	v1, sum, reg;					\
 	ADD	sum, v1;					\
-	.set	pop
+
+#define ADDC32(sum,reg)						\
+	addu	sum, reg;					\
+	sltu	v1, sum, reg;					\
+	addu	sum, v1;					\
 
 #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)	\
 	LOAD	_t0, (offset + UNIT(0))(src);			\
@@ -254,8 +256,6 @@ LEAF(csum_partial)
 1:	ADDC(sum, t1)
 
 	/* fold checksum */
-	.set	push
-	.set	noat
 #ifdef USE_DOUBLE
 	dsll32	v1, sum, 0
 	daddu	sum, v1
@@ -263,24 +263,25 @@ LEAF(csum_partial)
 	dsra32	sum, sum, 0
 	addu	sum, v1
 #endif
-	sll	v1, sum, 16
-	addu	sum, v1
-	sltu	v1, sum, v1
-	srl	sum, sum, 16
-	addu	sum, v1
 
 	/* odd buffer alignment? */
-	beqz	t7, 1f
-	 nop
-	sll	v1, sum, 8
+#ifdef CPU_MIPSR2
+	wsbh	sum, sum	
+	movn	sum, v1, t7
+#else
+	beqz	t7, 1f			/* odd buffer alignment? */
+	 lui	v1, 0x00ff
+	addu	v1, 0x00ff
+	and	t0, sum, v1
+	sll	t0, t0, 8
 	srl	sum, sum, 8
-	or	sum, v1
-	andi	sum, 0xffff
-	.set	pop
+	and	sum, sum, v1
+	or	sum, sum, t0
 1:
+#endif
 	.set	reorder
 	/* Add the passed partial csum.  */
-	ADDC(sum, a2)
+	ADDC32(sum, a2)
 	jr	ra
 	.set	noreorder
 	END(csum_partial)
@@ -656,8 +657,6 @@ EXC(	sb	t0, NBYTES-2(dst), .Ls_exc)
 	ADDC(sum, t2)
 .Ldone:
 	/* fold checksum */
-	.set	push
-	.set	noat
 #ifdef USE_DOUBLE
 	dsll32	v1, sum, 0
 	daddu	sum, v1
@@ -665,23 +664,23 @@ EXC(	sb	t0, NBYTES-2(dst), .Ls_exc)
 	dsra32	sum, sum, 0
 	addu	sum, v1
 #endif
-	sll	v1, sum, 16
-	addu	sum, v1
-	sltu	v1, sum, v1
-	srl	sum, sum, 16
-	addu	sum, v1
 
-	/* odd buffer alignment? */
-	beqz	odd, 1f
-	 nop
-	sll	v1, sum, 8
+#ifdef CPU_MIPSR2
+	wsbh	v1, sum
+	movn	sum, v1, odd
+#else
+	beqz	odd, 1f			/* odd buffer alignment? */
+	 lui	v1, 0x00ff
+	addu	v1, 0x00ff
+	and	t0, sum, v1
+	sll	t0, t0, 8
 	srl	sum, sum, 8
-	or	sum, v1
-	andi	sum, 0xffff
-	.set	pop
+	and	sum, sum, v1
+	or	sum, sum, t0
 1:
+#endif
 	.set reorder
-	ADDC(sum, psum)
+	ADDC32(sum, psum)
 	jr	ra
 	.set noreorder
 

