Index: Documentation/Configure.help
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/Documentation/Configure.help,v
retrieving revision 1.1.1.42
retrieving revision 1.1.1.42.2.1
diff -u -r1.1.1.42 -r1.1.1.42.2.1
--- a/Documentation/Configure.help	14 Apr 2004 13:05:24 -0000	1.1.1.42
+++ b/Documentation/Configure.help	16 Apr 2004 13:16:05 -0000	1.1.1.42.2.1
@@ -5916,6 +5916,14 @@
   and you should also say Y to "Kernel/User network link driver",
   below. If unsure, say N.
 
+PF_KEY sockets
+CONFIG_NET_KEY
+  PF_KEYv2 socket family, compatible to KAME ones.
+  They are required if you are going to use IPsec tools ported
+  from KAME.
+
+  Say Y unless you know what you are doing.
+
 TCP/IP networking
 CONFIG_INET
   These are the protocols used on the Internet and on most local
@@ -6176,6 +6184,32 @@
   gated-5). This routing protocol is not used widely, so say N unless
   you want to play with it.
 
+IP: AH transformation
+CONFIG_INET_AH
+  Support for IPsec AH.
+
+  If unsure, say Y.
+
+IP: ESP transformation
+CONFIG_INET_ESP
+  Support for IPsec ESP.
+
+  If unsure, say Y.
+
+IP: IPComp transformation
+CONFIG_INET_IPCOMP
+  Support for IP Paylod Compression (RFC3173), typically needed
+  for IPsec.
+
+  If unsure, say Y.
+
+IP: IPsec user configuration interface
+CONFIG_XFRM_USER
+  Support for IPsec user configuration interface used
+  by native Linux tools.
+
+  If unsure, say Y.
+
 Unix domain sockets
 CONFIG_UNIX
   If you say Y here, you will include support for Unix domain sockets;
@@ -6221,6 +6255,20 @@
 
   It is safe to say N here for now.
 
+IPv6: Privacy Extensions (RFC 3041) support
+CONFIG_IPV6_PRIVACY
+  Privacy Extensions for Stateless Address Autoconfiguration in IPv6
+  support.  With this option, additional periodically-alter 
+  pseudo-random global-scope unicast address(es) will assigned to
+  your interface(s).
+
+  By default, kernel do not generate temporary addresses.
+  To use temporary addresses, do
+
+        echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr 
+
+  See <file:Documentation/networking/ip-sysctl.txt> for details.
+
 The SCTP Protocol (EXPERIMENTAL)
 CONFIG_IP_SCTP
   Stream Control Transmission Protocol
Index: Documentation/networking/ip-sysctl.txt
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/Documentation/networking/ip-sysctl.txt,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/Documentation/networking/ip-sysctl.txt	14 Apr 2004 13:05:25 -0000	1.1.1.17
+++ b/Documentation/networking/ip-sysctl.txt	16 Apr 2004 13:16:15 -0000	1.1.1.17.2.1
@@ -668,6 +668,37 @@
 	0 to disable any limiting, otherwise the maximal rate in jiffies(1)
 	Default: 100
 
+use_tempaddr - INTEGER
+	Preference for Privacy Extensions (RFC3041).
+	  <= 0 : disable Privacy Extensions
+	  == 1 : enable Privacy Extensions, but prefer public
+	         addresses over temporary addresses.
+	  >  1 : enable Privacy Extensions and prefer temporary
+	         addresses over public addresses.
+	Default:  0 (for most devices)
+		 -1 (for point-to-point devices and loopback devices)
+
+temp_valid_lft - INTEGER
+	valid lifetime (in seconds) for temporary addresses.
+	Default: 604800 (7 days)
+
+temp_prefered_lft - INTEGER
+	Preferred lifetime (in seconds) for temorary addresses.
+	Default: 86400 (1 day)
+
+max_desync_factor - INTEGER
+	Maximum value for DESYNC_FACTOR, which is a random value
+	that ensures that clients don't synchronize with each 
+	other and generage new addresses at exactly the same time.
+	value is in seconds.
+	Default: 600
+	
+regen_max_retry - INTEGER
+	Number of attempts before give up attempting to generate
+	valid temporary addresses.
+	Default: 5
+
+
 IPv6 Update by:
 Pekka Savola <pekkas@netcore.fi>
 YOSHIFUJI Hideaki / USAGI Project <yoshfuji@linux-ipv6.org>
Index: arch/alpha/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/alpha/defconfig,v
retrieving revision 1.1.1.12
retrieving revision 1.1.1.12.2.1
diff -u -r1.1.1.12 -r1.1.1.12.2.1
--- a/arch/alpha/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.12
+++ b/arch/alpha/defconfig	16 Apr 2004 13:16:16 -0000	1.1.1.12.2.1
@@ -127,6 +127,7 @@
 # CONFIG_NETFILTER_DEBUG is not set
 # CONFIG_FILTER is not set
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/arm/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/arm/defconfig,v
retrieving revision 1.1.1.7
retrieving revision 1.1.1.7.2.1
diff -u -r1.1.1.7 -r1.1.1.7.2.1
--- a/arch/arm/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.7
+++ b/arch/arm/defconfig	16 Apr 2004 13:16:16 -0000	1.1.1.7.2.1
@@ -170,6 +170,7 @@
 # CONFIG_NETFILTER is not set
 # CONFIG_FILTER is not set
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/cris/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/cris/defconfig,v
retrieving revision 1.1.1.11
retrieving revision 1.1.1.11.2.1
diff -u -r1.1.1.11 -r1.1.1.11.2.1
--- a/arch/cris/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.11
+++ b/arch/cris/defconfig	16 Apr 2004 13:16:16 -0000	1.1.1.11.2.1
@@ -214,6 +214,7 @@
 # CONFIG_NETFILTER is not set
 # CONFIG_FILTER is not set
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/i386/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/i386/defconfig,v
retrieving revision 1.1.1.28
retrieving revision 1.1.1.28.2.1
diff -u -r1.1.1.28 -r1.1.1.28.2.1
--- a/arch/i386/defconfig	14 Apr 2004 13:05:25 -0000	1.1.1.28
+++ b/arch/i386/defconfig	16 Apr 2004 13:16:16 -0000	1.1.1.28.2.1
@@ -178,6 +178,7 @@
 # CONFIG_NETFILTER is not set
 # CONFIG_FILTER is not set
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/ia64/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/ia64/defconfig,v
retrieving revision 1.1.1.12
retrieving revision 1.1.1.12.2.1
diff -u -r1.1.1.12 -r1.1.1.12.2.1
--- a/arch/ia64/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.12
+++ b/arch/ia64/defconfig	16 Apr 2004 13:16:16 -0000	1.1.1.12.2.1
@@ -101,6 +101,7 @@
 # CONFIG_NETFILTER is not set
 CONFIG_FILTER=y
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/m68k/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/m68k/defconfig,v
retrieving revision 1.1.1.7
retrieving revision 1.1.1.7.2.1
diff -u -r1.1.1.7 -r1.1.1.7.2.1
--- a/arch/m68k/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.7
+++ b/arch/m68k/defconfig	16 Apr 2004 13:16:16 -0000	1.1.1.7.2.1
@@ -82,6 +82,7 @@
 # CONFIG_NETFILTER is not set
 # CONFIG_FILTER is not set
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/mips/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/mips/defconfig,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/arch/mips/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.15
+++ b/arch/mips/defconfig	16 Apr 2004 13:16:16 -0000	1.1.1.15.2.1
@@ -201,6 +201,7 @@
 # CONFIG_NETFILTER is not set
 # CONFIG_FILTER is not set
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/mips64/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/mips64/defconfig,v
retrieving revision 1.1.1.19
retrieving revision 1.1.1.19.2.1
diff -u -r1.1.1.19 -r1.1.1.19.2.1
--- a/arch/mips64/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.19
+++ b/arch/mips64/defconfig	16 Apr 2004 13:16:17 -0000	1.1.1.19.2.1
@@ -199,6 +199,7 @@
 # CONFIG_NETFILTER is not set
 # CONFIG_FILTER is not set
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/parisc/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/parisc/defconfig,v
retrieving revision 1.1.1.8
retrieving revision 1.1.1.8.2.1
diff -u -r1.1.1.8 -r1.1.1.8.2.1
--- a/arch/parisc/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.8
+++ b/arch/parisc/defconfig	16 Apr 2004 13:16:17 -0000	1.1.1.8.2.1
@@ -116,6 +116,7 @@
 # CONFIG_NETFILTER is not set
 CONFIG_FILTER=y
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/ppc/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/ppc/defconfig,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/arch/ppc/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.17
+++ b/arch/ppc/defconfig	16 Apr 2004 13:16:17 -0000	1.1.1.17.2.1
@@ -134,6 +134,7 @@
 # CONFIG_NETFILTER_DEBUG is not set
 # CONFIG_FILTER is not set
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/ppc64/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/ppc64/defconfig,v
retrieving revision 1.1.1.8
retrieving revision 1.1.1.8.2.1
diff -u -r1.1.1.8 -r1.1.1.8.2.1
--- a/arch/ppc64/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.8
+++ b/arch/ppc64/defconfig	16 Apr 2004 13:16:17 -0000	1.1.1.8.2.1
@@ -110,6 +110,7 @@
 # CONFIG_NETFILTER is not set
 CONFIG_FILTER=y
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/s390/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/s390/defconfig,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/arch/s390/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.15
+++ b/arch/s390/defconfig	16 Apr 2004 13:16:17 -0000	1.1.1.15.2.1
@@ -142,6 +142,7 @@
 # CONFIG_NETFILTER_DEBUG is not set
 CONFIG_FILTER=y
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/s390x/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/s390x/defconfig,v
retrieving revision 1.1.1.13
retrieving revision 1.1.1.13.2.1
diff -u -r1.1.1.13 -r1.1.1.13.2.1
--- a/arch/s390x/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.13
+++ b/arch/s390x/defconfig	16 Apr 2004 13:16:17 -0000	1.1.1.13.2.1
@@ -142,6 +142,7 @@
 # CONFIG_NETFILTER_DEBUG is not set
 CONFIG_FILTER=y
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/sh64/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/sh64/defconfig,v
retrieving revision 1.1.1.5
retrieving revision 1.1.1.5.2.1
diff -u -r1.1.1.5 -r1.1.1.5.2.1
--- a/arch/sh64/defconfig	18 Feb 2004 13:36:30 -0000	1.1.1.5
+++ b/arch/sh64/defconfig	16 Apr 2004 13:16:17 -0000	1.1.1.5.2.1
@@ -113,6 +113,7 @@
 # CONFIG_NETFILTER is not set
 # CONFIG_FILTER is not set
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/sparc/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/sparc/defconfig,v
retrieving revision 1.1.1.19
retrieving revision 1.1.1.19.2.1
diff -u -r1.1.1.19 -r1.1.1.19.2.1
--- a/arch/sparc/defconfig	14 Apr 2004 13:05:27 -0000	1.1.1.19
+++ b/arch/sparc/defconfig	16 Apr 2004 13:16:17 -0000	1.1.1.19.2.1
@@ -144,6 +144,7 @@
 # CONFIG_NETFILTER is not set
 # CONFIG_FILTER is not set
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/sparc64/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/sparc64/defconfig,v
retrieving revision 1.1.1.30
retrieving revision 1.1.1.30.2.1
diff -u -r1.1.1.30 -r1.1.1.30.2.1
--- a/arch/sparc64/defconfig	14 Apr 2004 13:05:27 -0000	1.1.1.30
+++ b/arch/sparc64/defconfig	16 Apr 2004 13:16:17 -0000	1.1.1.30.2.1
@@ -189,6 +189,7 @@
 # CONFIG_NETFILTER_DEBUG is not set
 CONFIG_FILTER=y
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: arch/x86_64/defconfig
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/arch/x86_64/defconfig,v
retrieving revision 1.1.1.7
retrieving revision 1.1.1.7.2.1
diff -u -r1.1.1.7 -r1.1.1.7.2.1
--- a/arch/x86_64/defconfig	14 Apr 2004 13:05:28 -0000	1.1.1.7
+++ b/arch/x86_64/defconfig	16 Apr 2004 13:16:18 -0000	1.1.1.7.2.1
@@ -144,6 +144,7 @@
 # CONFIG_NETFILTER is not set
 # CONFIG_FILTER is not set
 CONFIG_UNIX=y
+CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
 # CONFIG_IP_ADVANCED_ROUTER is not set
Index: crypto/Config.in
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/crypto/Config.in,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/crypto/Config.in	14 Apr 2004 13:05:28 -0000	1.1.1.17
+++ b/crypto/Config.in	16 Apr 2004 13:16:18 -0000	1.1.1.17.2.1
@@ -11,7 +11,8 @@
      "$CONFIG_INET6_AH" = "y" -o \
      "$CONFIG_INET6_AH" = "m" -o \
      "$CONFIG_INET6_ESP" = "y" -o \
-     "$CONFIG_INET6_ESP" = "m" ]; then
+     "$CONFIG_INET6_ESP" = "m" -o \
+     "$CONFIG_IPV6_PRIVACY" = "y" ]; then
   define_bool CONFIG_CRYPTO y
 else
   bool 'Cryptographic API' CONFIG_CRYPTO
@@ -25,7 +26,8 @@
        "$CONFIG_INET6_AH" = "y" -o \
        "$CONFIG_INET6_AH" = "m" -o \
        "$CONFIG_INET6_ESP" = "y" -o \
-       "$CONFIG_INET6_ESP" = "m" ]; then
+       "$CONFIG_INET6_ESP" = "m" -o \
+       "$CONFIG_IPV6_PRIVACY" = "y" ]; then
     define_bool CONFIG_CRYPTO_HMAC y
   else
     bool           '  HMAC support' CONFIG_CRYPTO_HMAC
@@ -33,38 +35,55 @@
   tristate       '  NULL algorithms' CONFIG_CRYPTO_NULL
   tristate       '  MD4 digest algorithm' CONFIG_CRYPTO_MD4
   if [ "$CONFIG_INET_AH" = "y" -o \
-       "$CONFIG_INET_AH" = "m" -o \
        "$CONFIG_INET_ESP" = "y" -o \
-       "$CONFIG_INET_ESP" = "m" -o \
        "$CONFIG_INET6_AH" = "y" -o \
-       "$CONFIG_INET6_AH" = "m" -o \
-       "$CONFIG_INET6_ESP" = "y" -o \
-       "$CONFIG_INET6_ESP" = "m" ]; then
-    define_bool CONFIG_CRYPTO_MD5 y
+       "$CONFIG_INET6_ESP" = "y" ]; then
+    define_tristate CONFIG_CRYPTO_MD5 y
   else
-    tristate       '  MD5 digest algorithm' CONFIG_CRYPTO_MD5
+    if [ "$CONFIG_IPV6" = "y" -a "$CONFIG_IPV6_PRIVACY" = "y" ]; then
+      define_tristate CONFIG_CRYPTO_MD5 y
+    else
+      if [ "$CONFIG_INET_AH" = "m" -o \
+	   "$CONFIG_INET_ESP" = "m" -o \
+	   "$CONFIG_INET6_AH" = "m" -o \
+	   "$CONFIG_INET6_ESP" = "m" ]; then
+	define_tristate CONFIG_CRYPTO_MD5 m
+      else
+	if [ "$CONFIG_IPV6" = "m" -a "$CONFIG_IPV6_PRIVACY" = "y" ]; then
+	  define_tristate CONFIG_CRYPTO_MD5 m
+	else
+	  tristate       '  MD5 digest algorithm' CONFIG_CRYPTO_MD5
+	fi
+      fi
+    fi
   fi
   if [ "$CONFIG_INET_AH" = "y" -o \
-       "$CONFIG_INET_AH" = "m" -o \
        "$CONFIG_INET_ESP" = "y" -o \
-       "$CONFIG_INET_ESP" = "m" -o \
        "$CONFIG_INET6_AH" = "y" -o \
-       "$CONFIG_INET6_AH" = "m" -o \
-       "$CONFIG_INET6_ESP" = "y" -o \
-       "$CONFIG_INET6_ESP" = "m" ]; then
-    define_bool CONFIG_CRYPTO_SHA1 y
+       "$CONFIG_INET6_ESP" = "y" ]; then
+    define_tristate CONFIG_CRYPTO_SHA1 y
   else
-    tristate       '  SHA1 digest algorithm' CONFIG_CRYPTO_SHA1
+    if [ "$CONFIG_INET_AH" = "m" -o \
+	 "$CONFIG_INET_ESP" = "m" -o \
+	 "$CONFIG_INET6_AH" = "m" -o \
+	 "$CONFIG_INET6_ESP" = "m" ]; then
+      define_tristate CONFIG_CRYPTO_SHA1 m
+    else
+      tristate       '  SHA1 digest algorithm' CONFIG_CRYPTO_SHA1
+    fi
   fi
   tristate       '  SHA256 digest algorithm' CONFIG_CRYPTO_SHA256
   tristate       '  SHA384 and SHA512 digest algorithms' CONFIG_CRYPTO_SHA512
   if [ "$CONFIG_INET_ESP" = "y" -o \
-       "$CONFIG_INET_ESP" = "m" -o \
-       "$CONFIG_INET6_ESP" = "y" -o \
-       "$CONFIG_INET6_ESP" = "m" ]; then
-    define_bool CONFIG_CRYPTO_DES y
+       "$CONFIG_INET6_ESP" = "y" ]; then
+    define_tristate CONFIG_CRYPTO_DES y
   else
-    tristate       '  DES and Triple DES EDE cipher algorithms' CONFIG_CRYPTO_DES
+    if [ "$CONFIG_INET_ESP" = "m" -o \
+	 "$CONFIG_INET6_ESP" = "m" ]; then
+      define_tristate CONFIG_CRYPTO_DES m
+    else
+      tristate       '  DES and Triple DES EDE cipher algorithms' CONFIG_CRYPTO_DES
+    fi
   fi
   tristate       '  Blowfish cipher algorithm' CONFIG_CRYPTO_BLOWFISH
   tristate       '  Twofish cipher algorithm' CONFIG_CRYPTO_TWOFISH
@@ -74,12 +93,15 @@
   tristate       '  CAST6 (CAST-256) cipher algorithm' CONFIG_CRYPTO_CAST6
   tristate       '  ARC4 cipher algorithm' CONFIG_CRYPTO_ARC4
   if [ "$CONFIG_INET_IPCOMP" = "y" -o \
-       "$CONFIG_INET_IPCOMP" = "m" -o \
-       "$CONFIG_INET6_IPCOMP" = "y" -o \
-       "$CONFIG_INET6_IPCOMP" = "m" ]; then
-    define_bool CONFIG_CRYPTO_DEFLATE y
+       "$CONFIG_INET6_IPCOMP" = "y" ]; then
+    define_tristate CONFIG_CRYPTO_DEFLATE y
   else
-    tristate       '  Deflate compression algorithm' CONFIG_CRYPTO_DEFLATE
+    if [ "$CONFIG_INET_IPCOMP" = "m" -o \
+	 "$CONFIG_INET6_IPCOMP" = "m" ]; then
+      define_tristate CONFIG_CRYPTO_DEFLATE m
+    else
+      tristate       '  Deflate compression algorithm' CONFIG_CRYPTO_DEFLATE
+    fi
   fi
   tristate       '  Testing module' CONFIG_CRYPTO_TEST
 fi
Index: drivers/net/3c59x.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/drivers/net/3c59x.c,v
retrieving revision 1.1.1.27
retrieving revision 1.1.1.27.2.1
diff -u -r1.1.1.27 -r1.1.1.27.2.1
--- a/drivers/net/3c59x.c	28 Nov 2003 18:26:20 -0000	1.1.1.27
+++ b/drivers/net/3c59x.c	16 Apr 2004 13:16:18 -0000	1.1.1.27.2.1
@@ -2029,7 +2029,7 @@
 	if (skb->ip_summed != CHECKSUM_HW)
 			vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
 	else
-			vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum);
+			vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum);
 
 	if (!skb_shinfo(skb)->nr_frags) {
 		vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data,
Index: drivers/net/ppp_generic.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/drivers/net/ppp_generic.c,v
retrieving revision 1.1.1.26
retrieving revision 1.1.1.26.2.1
diff -u -r1.1.1.26 -r1.1.1.26.2.1
--- a/drivers/net/ppp_generic.c	25 Aug 2003 11:44:42 -0000	1.1.1.26
+++ b/drivers/net/ppp_generic.c	16 Apr 2004 13:16:18 -0000	1.1.1.26.2.1
@@ -57,7 +57,9 @@
 #define NP_IPV6	1		/* Internet Protocol V6 */
 #define NP_IPX	2		/* IPX protocol */
 #define NP_AT	3		/* Appletalk protocol */
-#define NUM_NP	4		/* Number of NPs. */
+#define NP_MPLS_UC 4		/* MPLS unicast */
+#define NP_MPLS_MC 5		/* MPLS multicast */
+#define NUM_NP	6		/* Number of NPs. */
 
 #define MPHDRLEN	6	/* multilink protocol header length */
 #define MPHDRLEN_SSN	4	/* ditto with short sequence numbers */
@@ -281,6 +283,10 @@
 		return NP_IPX;
 	case PPP_AT:
 		return NP_AT;
+	case PPP_MPLS_UC:
+		return NP_MPLS_UC;
+	case PPP_MPLS_MC:
+		return NP_MPLS_MC;
 	}
 	return -EINVAL;
 }
@@ -291,6 +297,8 @@
 	PPP_IPV6,
 	PPP_IPX,
 	PPP_AT,
+	PPP_MPLS_UC,
+	PPP_MPLS_MC,
 };
 	
 /* Translates an ethertype into an NP index */
@@ -306,6 +314,10 @@
 	case ETH_P_PPPTALK:
 	case ETH_P_ATALK:
 		return NP_AT;
+	case ETH_P_MPLS_UC:
+		return NP_MPLS_UC;
+	case ETH_P_MPLS_MC:
+		return NP_MPLS_MC;
 	}
 	return -1;
 }
@@ -316,6 +328,8 @@
 	ETH_P_IPV6,
 	ETH_P_IPX,
 	ETH_P_PPPTALK,
+	ETH_P_MPLS_UC,
+	ETH_P_MPLS_MC,
 };
 
 /*
Index: include/asm-alpha/scatterlist.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/asm-alpha/scatterlist.h,v
retrieving revision 1.1.1.14
retrieving revision 1.1.1.14.2.1
diff -u -r1.1.1.14 -r1.1.1.14.2.1
--- a/include/asm-alpha/scatterlist.h	12 Oct 2001 22:35:54 -0000	1.1.1.14
+++ b/include/asm-alpha/scatterlist.h	16 Apr 2004 13:16:18 -0000	1.1.1.14.2.1
@@ -2,6 +2,7 @@
 #define _ALPHA_SCATTERLIST_H
 
 #include <asm/page.h>
+#include <linux/types.h>
   
 struct scatterlist {
 	/* This will disappear in 2.5.x */
Index: include/linux/if_arp.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/if_arp.h,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/include/linux/if_arp.h	25 Feb 2002 19:38:13 -0000	1.1.1.17
+++ b/include/linux/if_arp.h	16 Apr 2004 13:16:18 -0000	1.1.1.17.2.1
@@ -59,7 +59,7 @@
 #define ARPHRD_RAWHDLC	518		/* Raw HDLC			*/
 
 #define ARPHRD_TUNNEL	768		/* IPIP tunnel			*/
-#define ARPHRD_TUNNEL6	769		/* IPIP6 tunnel			*/
+#define ARPHRD_TUNNEL6	769		/* IP6IP6 tunnel       		*/
 #define ARPHRD_FRAD	770             /* Frame Relay Access Device    */
 #define ARPHRD_SKIP	771		/* SKIP vif			*/
 #define ARPHRD_LOOPBACK	772		/* Loopback device		*/
Index: include/linux/if_ether.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/if_ether.h,v
retrieving revision 1.1.1.19
retrieving revision 1.1.1.19.2.1
diff -u -r1.1.1.19 -r1.1.1.19.2.1
--- a/include/linux/if_ether.h	25 Aug 2003 11:44:44 -0000	1.1.1.19
+++ b/include/linux/if_ether.h	16 Apr 2004 13:16:18 -0000	1.1.1.19.2.1
@@ -61,6 +61,8 @@
 #define ETH_P_IPV6	0x86DD		/* IPv6 over bluebook		*/
 #define ETH_P_PPP_DISC	0x8863		/* PPPoE discovery messages     */
 #define ETH_P_PPP_SES	0x8864		/* PPPoE session messages	*/
+#define ETH_P_MPLS_UC	0x8847		/* MPLS Unicast traffic		*/
+#define ETH_P_MPLS_MC	0x8848		/* MPLS Multicast traffic	*/
 #define ETH_P_ATMMPOA	0x884c		/* MultiProtocol Over ATM	*/
 #define ETH_P_ATMFATE	0x8884		/* Frame-based ATM Transport
 					 * over Ethernet
Index: include/linux/in.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/in.h,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/include/linux/in.h	28 Nov 2003 18:26:21 -0000	1.1.1.17
+++ b/include/linux/in.h	16 Apr 2004 13:16:18 -0000	1.1.1.17.2.1
@@ -18,6 +18,7 @@
 #ifndef _LINUX_IN_H
 #define _LINUX_IN_H
 
+#include <linux/socket.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 
@@ -69,6 +70,8 @@
 #define	IP_RECVTOS	13
 #define IP_MTU		14
 #define IP_FREEBIND	15
+#define IP_IPSEC_POLICY	16
+#define IP_XFRM_POLICY	17
 
 /* BSD compatibility */
 #define IP_RECVRETOPTS	IP_RETOPTS
Index: include/linux/in6.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/in6.h,v
retrieving revision 1.1.1.16
retrieving revision 1.1.1.16.2.1
diff -u -r1.1.1.16 -r1.1.1.16.2.1
--- a/include/linux/in6.h	13 Jun 2003 14:51:38 -0000	1.1.1.16
+++ b/include/linux/in6.h	16 Apr 2004 13:16:18 -0000	1.1.1.16.2.1
@@ -180,5 +180,8 @@
 #define IPV6_FLOWLABEL_MGR	32
 #define IPV6_FLOWINFO_SEND	33
 
+#define IPV6_IPSEC_POLICY	34
+#define IPV6_XFRM_POLICY	35
+
 
 #endif
Index: include/linux/inetdevice.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/inetdevice.h,v
retrieving revision 1.1.1.16
retrieving revision 1.1.1.16.2.1
diff -u -r1.1.1.16 -r1.1.1.16.2.1
--- a/include/linux/inetdevice.h	14 Apr 2004 13:05:40 -0000	1.1.1.16
+++ b/include/linux/inetdevice.h	16 Apr 2004 13:16:18 -0000	1.1.1.16.2.1
@@ -21,6 +21,8 @@
 	int	arp_announce;
 	int	arp_ignore;
 	int	medium_id;
+	int	no_xfrm;
+	int	no_policy;
 	int	force_igmp_version;
 	void	*sysctl;
 };
Index: include/linux/ip.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/ip.h,v
retrieving revision 1.1.1.13
retrieving revision 1.1.1.13.2.1
diff -u -r1.1.1.13 -r1.1.1.13.2.1
--- a/include/linux/ip.h	22 Nov 2001 19:47:11 -0000	1.1.1.13
+++ b/include/linux/ip.h	16 Apr 2004 13:16:18 -0000	1.1.1.13.2.1
@@ -18,8 +18,6 @@
 #define _LINUX_IP_H
 #include <asm/byteorder.h>
 
-/* SOL_IP socket options */
-
 #define IPTOS_TOS_MASK		0x1E
 #define IPTOS_TOS(tos)		((tos)&IPTOS_TOS_MASK)
 #define	IPTOS_LOWDELAY		0x10
@@ -67,14 +65,6 @@
 #define MAXTTL		255
 #define IPDEFTTL	64
 
-/* struct timestamp, struct route and MAX_ROUTES are removed.
-
-   REASONS: it is clear that nobody used them because:
-   - MAX_ROUTES value was wrong.
-   - "struct route" was wrong.
-   - "struct timestamp" had fatally misaligned bitfields and was completely unusable.
- */
-
 #define IPOPT_OPTVAL 0
 #define IPOPT_OLEN   1
 #define IPOPT_OFFSET 2
@@ -135,4 +125,25 @@
 	/*The options start here. */
 };
 
+struct ip_auth_hdr {
+	__u8  nexthdr;
+	__u8  hdrlen;		/* This one is measured in 32 bit units! */
+	__u16 reserved;
+	__u32 spi;
+	__u32 seq_no;		/* Sequence number */
+	__u8  auth_data[0];	/* Variable len but >=4. Mind the 64 bit alignment! */
+};
+
+struct ip_esp_hdr {
+	__u32 spi;
+	__u32 seq_no;		/* Sequence number */
+	__u8  enc_data[0];	/* Variable len but >=8. Mind the 64 bit alignment! */
+};
+
+struct ip_comp_hdr {
+	__u8 nexthdr;
+	__u8 flags;
+	__u16 cpi;
+};
+
 #endif	/* _LINUX_IP_H */
Index: include/linux/ip6_tunnel.h
===================================================================
RCS file: include/linux/ip6_tunnel.h
diff -N include/linux/ip6_tunnel.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/include/linux/ip6_tunnel.h	16 Apr 2004 13:16:18 -0000	1.2.18.1
@@ -0,0 +1,32 @@
+/*
+ * $Id$
+ */
+
+#ifndef _IP6_TUNNEL_H
+#define _IP6_TUNNEL_H
+
+#define IPV6_TLV_TNL_ENCAP_LIMIT 4
+#define IPV6_DEFAULT_TNL_ENCAP_LIMIT 4
+
+/* don't add encapsulation limit if one isn't present in inner packet */
+#define IP6_TNL_F_IGN_ENCAP_LIMIT 0x1
+/* copy the traffic class field from the inner packet */
+#define IP6_TNL_F_USE_ORIG_TCLASS 0x2
+/* copy the flowlabel from the inner packet */
+#define IP6_TNL_F_USE_ORIG_FLOWLABEL 0x4
+/* being used for Mobile IPv6 */
+#define IP6_TNL_F_MIP6_DEV 0x8
+
+struct ip6_tnl_parm {
+	char name[IFNAMSIZ];	/* name of tunnel device */
+	int link;		/* ifindex of underlying L2 interface */
+	__u8 proto;		/* tunnel protocol */
+	__u8 encap_limit;	/* encapsulation limit for tunnel */
+	__u8 hop_limit;		/* hop limit for tunnel */
+	__u32 flowinfo;		/* traffic class and flowlabel for tunnel */
+	__u32 flags;		/* tunnel flags */
+	struct in6_addr laddr;	/* local tunnel end-point address */
+	struct in6_addr raddr;	/* remote tunnel end-point address */
+};
+
+#endif
Index: include/linux/ipsec.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/ipsec.h,v
retrieving revision 1.1.1.14
retrieving revision 1.1.1.14.2.1
diff -u -r1.1.1.14 -r1.1.1.14.2.1
--- a/include/linux/ipsec.h	22 Nov 2001 19:47:15 -0000	1.1.1.14
+++ b/include/linux/ipsec.h	16 Apr 2004 13:16:18 -0000	1.1.1.14.2.1
@@ -1,69 +1,46 @@
-/*
- *	Definitions for the SECurity layer
- *
- *	Author:
- *		Robert Muchsel <muchsel@acm.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- */
- 
 #ifndef _LINUX_IPSEC_H
 #define _LINUX_IPSEC_H
 
-#include <linux/config.h>
-#include <linux/socket.h>
-#include <net/sock.h>
-#include <linux/skbuff.h>
-
-/* Values for the set/getsockopt calls */
-
-/* These defines are compatible with NRL IPv6, however their semantics
-   is different */
-
-#define IPSEC_LEVEL_NONE	-1	/* send plaintext, accept any */
-#define IPSEC_LEVEL_DEFAULT	0	/* encrypt/authenticate if possible */
-					/* the default MUST be 0, because a */
-					/* socket is initialized with 0's */
-#define IPSEC_LEVEL_USE		1	/* use outbound, don't require inbound */
-#define IPSEC_LEVEL_REQUIRE	2	/* require both directions */
-#define IPSEC_LEVEL_UNIQUE	2	/* for compatibility only */
-
-#ifdef __KERNEL__
-
-/* skb bit flags set on packet input processing */
-
-#define RCV_SEC			0x0f	/* options on receive */
-#define RCV_AUTH		0x01	/* was authenticated */
-#define RCV_CRYPT		0x02	/* was encrypted */
-#define RCV_TUNNEL		0x04	/* was tunneled */
-#define SND_SEC			0xf0	/* options on send, these are */
-#define SND_AUTH		0x10	/* currently unused */
-#define SND_CRYPT		0x20
-#define SND_TUNNEL		0x40
-
-/*
- *	FIXME: ignores network encryption for now..
- */
- 
-#ifdef CONFIG_NET_SECURITY
-static __inline__ int ipsec_sk_policy(struct sock *sk, struct sk_buff *skb)
-{
-	return ((sk->authentication < IPSEC_LEVEL_REQUIRE) ||
-		(skb->security & RCV_AUTH)) &&
-		((sk->encryption < IPSEC_LEVEL_REQUIRE) ||
-		(skb->security & RCV_CRYPT));
-}
-
-#else
-
-static __inline__ int ipsec_sk_policy(struct sock *sk, struct sk_buff *skb)
-{
-	return 1;
-}
-#endif /* CONFIG */
+/* The definitions, required to talk to KAME racoon IKE. */
+
+#include <linux/pfkeyv2.h>
+
+#define IPSEC_PORT_ANY		0
+#define IPSEC_ULPROTO_ANY	255
+#define IPSEC_PROTO_ANY		255
+
+enum {
+	IPSEC_MODE_ANY		= 0,	/* We do not support this for SA */
+	IPSEC_MODE_TRANSPORT	= 1,
+	IPSEC_MODE_TUNNEL	= 2
+};
+
+enum {
+	IPSEC_DIR_ANY		= 0,
+	IPSEC_DIR_INBOUND	= 1,
+	IPSEC_DIR_OUTBOUND	= 2,
+	IPSEC_DIR_FWD		= 3,	/* It is our own */
+	IPSEC_DIR_MAX		= 4,
+	IPSEC_DIR_INVALID	= 5
+};
+
+enum {
+	IPSEC_POLICY_DISCARD	= 0,
+	IPSEC_POLICY_NONE	= 1,
+	IPSEC_POLICY_IPSEC	= 2,
+	IPSEC_POLICY_ENTRUST	= 3,
+	IPSEC_POLICY_BYPASS	= 4
+};
+
+enum {
+	IPSEC_LEVEL_DEFAULT	= 0,
+	IPSEC_LEVEL_USE		= 1,
+	IPSEC_LEVEL_REQUIRE	= 2,
+	IPSEC_LEVEL_UNIQUE	= 3
+};
+
+#define IPSEC_MANUAL_REQID_MAX	0x3fff
+
+#define IPSEC_REPLAYWSIZE  32
 
-#endif	/* __KERNEL__ */
 #endif	/* _LINUX_IPSEC_H */
Index: include/linux/ipv6.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/ipv6.h,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/include/linux/ipv6.h	28 Nov 2003 18:26:21 -0000	1.1.1.15
+++ b/include/linux/ipv6.h	16 Apr 2004 13:16:18 -0000	1.1.1.15.2.1
@@ -73,6 +73,27 @@
 #define rt0_type		rt_hdr.type
 };
 
+struct ipv6_auth_hdr {
+	__u8  nexthdr;
+	__u8  hdrlen;           /* This one is measured in 32 bit units! */
+	__u16 reserved;
+	__u32 spi;
+	__u32 seq_no;           /* Sequence number */
+	__u8  auth_data[0];     /* Length variable but >=4. Mind the 64 bit alignment! */
+};
+
+struct ipv6_esp_hdr {
+	__u32 spi;
+	__u32 seq_no;           /* Sequence number */
+	__u8  enc_data[0];      /* Length variable but >=8. Mind the 64 bit alignment! */
+};
+
+struct ipv6_comp_hdr {
+	__u8 nexthdr;
+	__u8 flags;
+	__u16 cpi;
+};
+
 /*
  *	IPv6 fixed header
  *
Index: include/linux/ipv6_route.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/ipv6_route.h,v
retrieving revision 1.1.1.13
retrieving revision 1.1.1.13.2.1
diff -u -r1.1.1.13 -r1.1.1.13.2.1
--- a/include/linux/ipv6_route.h	28 Nov 2003 18:26:21 -0000	1.1.1.13
+++ b/include/linux/ipv6_route.h	16 Apr 2004 13:16:18 -0000	1.1.1.13.2.1
@@ -13,15 +13,6 @@
 #ifndef _LINUX_IPV6_ROUTE_H
 #define _LINUX_IPV6_ROUTE_H
 
-enum
-{
-	RTA_IPV6_UNSPEC,
-	RTA_IPV6_HOPLIMIT,
-};
-
-#define	RTA_IPV6_MAX RTA_IPV6_HOPLIMIT
-
-
 #define RTF_DEFAULT	0x00010000	/* default - learned via ND	*/
 #define RTF_ALLONLINK	0x00020000	/* fallback, no routers on link	*/
 #define RTF_ADDRCONF	0x00040000	/* addrconf route - RA		*/
@@ -33,6 +24,7 @@
 #define RTF_CACHE	0x01000000	/* cache entry			*/
 #define RTF_FLOW	0x02000000	/* flow significant route	*/
 #define RTF_POLICY	0x04000000	/* policy route			*/
+#define RTF_NDISC	0x08000000	/* ndisc route			*/
 
 #define RTF_LOCAL	0x80000000
 
Index: include/linux/netdevice.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/netdevice.h,v
retrieving revision 1.1.1.27
retrieving revision 1.1.1.27.2.1
diff -u -r1.1.1.27 -r1.1.1.27.2.1
--- a/include/linux/netdevice.h	18 Feb 2004 13:36:32 -0000	1.1.1.27
+++ b/include/linux/netdevice.h	16 Apr 2004 13:16:18 -0000	1.1.1.27.2.1
@@ -95,6 +95,11 @@
 #define MAX_HEADER (LL_MAX_HEADER + 48)
 #endif
 
+/* Reserve 16byte aligned hard_header_len, but at least 16.
+ * Alternative is: dev->hard_header_len ? (dev->hard_header_len + 15)&~15 : 0
+ */
+#define LL_RESERVED_SPACE(dev) (((dev)->hard_header_len&~15) + 16)
+
 /*
  *	Network device statistics. Akin to the 2.0 ether stats but
  *	with byte counters.
@@ -494,6 +499,7 @@
 extern int		dev_queue_xmit(struct sk_buff *skb);
 extern int		register_netdevice(struct net_device *dev);
 extern int		unregister_netdevice(struct net_device *dev);
+extern void		synchronize_net(void);
 extern int 		register_netdevice_notifier(struct notifier_block *nb);
 extern int		unregister_netdevice_notifier(struct notifier_block *nb);
 extern int		dev_new_index(void);
Index: include/linux/netlink.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/netlink.h,v
retrieving revision 1.1.1.19
retrieving revision 1.1.1.19.2.1
diff -u -r1.1.1.19 -r1.1.1.19.2.1
--- a/include/linux/netlink.h	28 Nov 2002 23:53:15 -0000	1.1.1.19
+++ b/include/linux/netlink.h	16 Apr 2004 13:16:18 -0000	1.1.1.19.2.1
@@ -7,6 +7,7 @@
 #define NETLINK_FIREWALL	3	/* Firewalling hook				*/
 #define NETLINK_TCPDIAG		4	/* TCP socket monitoring			*/
 #define NETLINK_NFLOG		5	/* netfilter/iptables ULOG */
+#define NETLINK_XFRM		6	/* ipsec */
 #define NETLINK_ARPD		8
 #define NETLINK_ROUTE6		11	/* af_inet6 route comm channel */
 #define NETLINK_IP6_FW		13
@@ -86,6 +87,8 @@
 
 #ifdef __KERNEL__
 
+#include <linux/capability.h>
+
 struct netlink_skb_parms
 {
 	struct ucred		creds;		/* Skb credentials	*/
@@ -107,8 +110,8 @@
 extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len));
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
-extern void netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
-			      __u32 group, int allocation);
+extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
+			     __u32 group, int allocation);
 extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
 extern int netlink_register_notifier(struct notifier_block *nb);
 extern int netlink_unregister_notifier(struct notifier_block *nb);
Index: include/linux/pfkeyv2.h
===================================================================
RCS file: include/linux/pfkeyv2.h
diff -N include/linux/pfkeyv2.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/include/linux/pfkeyv2.h	16 Apr 2004 13:16:19 -0000	1.4.18.1
@@ -0,0 +1,335 @@
+/* PF_KEY user interface, this is defined by rfc2367 so
+ * do not make arbitrary modifications or else this header
+ * file will not be compliant.
+ */
+
+#ifndef _LINUX_PFKEY2_H
+#define _LINUX_PFKEY2_H
+
+#include <linux/types.h>
+
+#define PF_KEY_V2		2
+#define PFKEYV2_REVISION	199806L
+
+struct sadb_msg {
+	uint8_t		sadb_msg_version;
+	uint8_t		sadb_msg_type;
+	uint8_t		sadb_msg_errno;
+	uint8_t		sadb_msg_satype;
+	uint16_t	sadb_msg_len;
+	uint16_t	sadb_msg_reserved;
+	uint32_t	sadb_msg_seq;
+	uint32_t	sadb_msg_pid;
+} __attribute__((packed));
+/* sizeof(struct sadb_msg) == 16 */
+
+struct sadb_ext {
+	uint16_t	sadb_ext_len;
+	uint16_t	sadb_ext_type;
+} __attribute__((packed));
+/* sizeof(struct sadb_ext) == 4 */
+
+struct sadb_sa {
+	uint16_t	sadb_sa_len;
+	uint16_t	sadb_sa_exttype;
+	uint32_t	sadb_sa_spi;
+	uint8_t		sadb_sa_replay;
+	uint8_t		sadb_sa_state;
+	uint8_t		sadb_sa_auth;
+	uint8_t		sadb_sa_encrypt;
+	uint32_t	sadb_sa_flags;
+} __attribute__((packed));
+/* sizeof(struct sadb_sa) == 16 */
+
+struct sadb_lifetime {
+	uint16_t	sadb_lifetime_len;
+	uint16_t	sadb_lifetime_exttype;
+	uint32_t	sadb_lifetime_allocations;
+	uint64_t	sadb_lifetime_bytes;
+	uint64_t	sadb_lifetime_addtime;
+	uint64_t	sadb_lifetime_usetime;
+} __attribute__((packed));
+/* sizeof(struct sadb_lifetime) == 32 */
+
+struct sadb_address {
+	uint16_t	sadb_address_len;
+	uint16_t	sadb_address_exttype;
+	uint8_t		sadb_address_proto;
+	uint8_t		sadb_address_prefixlen;
+	uint16_t	sadb_address_reserved;
+} __attribute__((packed));
+/* sizeof(struct sadb_address) == 8 */
+
+struct sadb_key {
+	uint16_t	sadb_key_len;
+	uint16_t	sadb_key_exttype;
+	uint16_t	sadb_key_bits;
+	uint16_t	sadb_key_reserved;
+} __attribute__((packed));
+/* sizeof(struct sadb_key) == 8 */
+
+struct sadb_ident {
+	uint16_t	sadb_ident_len;
+	uint16_t	sadb_ident_exttype;
+	uint16_t	sadb_ident_type;
+	uint16_t	sadb_ident_reserved;
+	uint64_t	sadb_ident_id;
+} __attribute__((packed));
+/* sizeof(struct sadb_ident) == 16 */
+
+struct sadb_sens {
+	uint16_t	sadb_sens_len;
+	uint16_t	sadb_sens_exttype;
+	uint32_t	sadb_sens_dpd;
+	uint8_t		sadb_sens_sens_level;
+	uint8_t		sadb_sens_sens_len;
+	uint8_t		sadb_sens_integ_level;
+	uint8_t		sadb_sens_integ_len;
+	uint32_t	sadb_sens_reserved;
+} __attribute__((packed));
+/* sizeof(struct sadb_sens) == 16 */
+
+/* followed by:
+	uint64_t	sadb_sens_bitmap[sens_len];
+	uint64_t	sadb_integ_bitmap[integ_len];  */
+
+struct sadb_prop {
+	uint16_t	sadb_prop_len;
+	uint16_t	sadb_prop_exttype;
+	uint8_t		sadb_prop_replay;
+	uint8_t		sadb_prop_reserved[3];
+} __attribute__((packed));
+/* sizeof(struct sadb_prop) == 8 */
+
+/* followed by:
+	struct sadb_comb sadb_combs[(sadb_prop_len +
+		sizeof(uint64_t) - sizeof(struct sadb_prop)) /
+		sizeof(strut sadb_comb)]; */
+
+struct sadb_comb {
+	uint8_t		sadb_comb_auth;
+	uint8_t		sadb_comb_encrypt;
+	uint16_t	sadb_comb_flags;
+	uint16_t	sadb_comb_auth_minbits;
+	uint16_t	sadb_comb_auth_maxbits;
+	uint16_t	sadb_comb_encrypt_minbits;
+	uint16_t	sadb_comb_encrypt_maxbits;
+	uint32_t	sadb_comb_reserved;
+	uint32_t	sadb_comb_soft_allocations;
+	uint32_t	sadb_comb_hard_allocations;
+	uint64_t	sadb_comb_soft_bytes;
+	uint64_t	sadb_comb_hard_bytes;
+	uint64_t	sadb_comb_soft_addtime;
+	uint64_t	sadb_comb_hard_addtime;
+	uint64_t	sadb_comb_soft_usetime;
+	uint64_t	sadb_comb_hard_usetime;
+} __attribute__((packed));
+/* sizeof(struct sadb_comb) == 72 */
+
+struct sadb_supported {
+	uint16_t	sadb_supported_len;
+	uint16_t	sadb_supported_exttype;
+	uint32_t	sadb_supported_reserved;
+} __attribute__((packed));
+/* sizeof(struct sadb_supported) == 8 */
+
+/* followed by:
+	struct sadb_alg sadb_algs[(sadb_supported_len +
+		sizeof(uint64_t) - sizeof(struct sadb_supported)) /
+		sizeof(struct sadb_alg)]; */
+
+struct sadb_alg {
+	uint8_t		sadb_alg_id;
+	uint8_t		sadb_alg_ivlen;
+	uint16_t	sadb_alg_minbits;
+	uint16_t	sadb_alg_maxbits;
+	uint16_t	sadb_alg_reserved;
+} __attribute__((packed));
+/* sizeof(struct sadb_alg) == 8 */
+
+struct sadb_spirange {
+	uint16_t	sadb_spirange_len;
+	uint16_t	sadb_spirange_exttype;
+	uint32_t	sadb_spirange_min;
+	uint32_t	sadb_spirange_max;
+	uint32_t	sadb_spirange_reserved;
+} __attribute__((packed));
+/* sizeof(struct sadb_spirange) == 16 */
+
+struct sadb_x_kmprivate {
+	uint16_t	sadb_x_kmprivate_len;
+	uint16_t	sadb_x_kmprivate_exttype;
+	u_int32_t	sadb_x_kmprivate_reserved;
+} __attribute__((packed));
+/* sizeof(struct sadb_x_kmprivate) == 8 */
+
+struct sadb_x_sa2 {
+	uint16_t	sadb_x_sa2_len;
+	uint16_t	sadb_x_sa2_exttype;
+	uint8_t		sadb_x_sa2_mode;
+	uint8_t		sadb_x_sa2_reserved1;
+	uint16_t	sadb_x_sa2_reserved2;
+	uint32_t	sadb_x_sa2_sequence;
+	uint32_t	sadb_x_sa2_reqid;
+} __attribute__((packed));
+/* sizeof(struct sadb_x_sa2) == 16 */
+
+struct sadb_x_policy {
+	uint16_t	sadb_x_policy_len;
+	uint16_t	sadb_x_policy_exttype;
+	uint16_t	sadb_x_policy_type;
+	uint8_t		sadb_x_policy_dir;
+	uint8_t		sadb_x_policy_reserved;
+	uint32_t	sadb_x_policy_id;
+	uint32_t	sadb_x_policy_reserved2;
+} __attribute__((packed));
+/* sizeof(struct sadb_x_policy) == 16 */
+
+struct sadb_x_ipsecrequest {
+	uint16_t	sadb_x_ipsecrequest_len;
+	uint16_t	sadb_x_ipsecrequest_proto;
+	uint8_t		sadb_x_ipsecrequest_mode;
+	uint8_t		sadb_x_ipsecrequest_level;
+	uint16_t	sadb_x_ipsecrequest_reserved1;
+	uint32_t	sadb_x_ipsecrequest_reqid;
+	uint32_t	sadb_x_ipsecrequest_reserved2;
+} __attribute__((packed));
+/* sizeof(struct sadb_x_ipsecrequest) == 16 */
+
+/* This defines the TYPE of Nat Traversal in use.  Currently only one
+ * type of NAT-T is supported, draft-ietf-ipsec-udp-encaps-06
+ */
+struct sadb_x_nat_t_type {
+	uint16_t	sadb_x_nat_t_type_len;
+	uint16_t	sadb_x_nat_t_type_exttype;
+	uint8_t		sadb_x_nat_t_type_type;
+	uint8_t		sadb_x_nat_t_type_reserved[3];
+} __attribute__((packed));
+/* sizeof(struct sadb_x_nat_t_type) == 8 */
+
+/* Pass a NAT Traversal port (Source or Dest port) */
+struct sadb_x_nat_t_port {
+	uint16_t	sadb_x_nat_t_port_len;
+	uint16_t	sadb_x_nat_t_port_exttype;
+	uint16_t	sadb_x_nat_t_port_port;
+	uint16_t	sadb_x_nat_t_port_reserved;
+} __attribute__((packed));
+/* sizeof(struct sadb_x_nat_t_port) == 8 */
+
+/* Message types */
+#define SADB_RESERVED		0
+#define SADB_GETSPI		1
+#define SADB_UPDATE		2
+#define SADB_ADD		3
+#define SADB_DELETE		4
+#define SADB_GET		5
+#define SADB_ACQUIRE		6
+#define SADB_REGISTER		7
+#define SADB_EXPIRE		8
+#define SADB_FLUSH		9
+#define SADB_DUMP		10
+#define SADB_X_PROMISC		11
+#define SADB_X_PCHANGE		12
+#define SADB_X_SPDUPDATE	13
+#define SADB_X_SPDADD		14
+#define SADB_X_SPDDELETE	15
+#define SADB_X_SPDGET		16
+#define SADB_X_SPDACQUIRE	17
+#define SADB_X_SPDDUMP		18
+#define SADB_X_SPDFLUSH		19
+#define SADB_X_SPDSETIDX	20
+#define SADB_X_SPDEXPIRE	21
+#define SADB_X_SPDDELETE2	22
+#define SADB_X_NAT_T_NEW_MAPPING	23
+#define SADB_MAX		23
+
+/* Security Association flags */
+#define SADB_SAFLAGS_PFS	1
+#define SADB_SAFLAGS_NOECN	0x80000000
+
+/* Security Association states */
+#define SADB_SASTATE_LARVAL	0
+#define SADB_SASTATE_MATURE	1
+#define SADB_SASTATE_DYING	2
+#define SADB_SASTATE_DEAD	3
+#define SADB_SASTATE_MAX	3
+
+/* Security Association types */
+#define SADB_SATYPE_UNSPEC	0
+#define SADB_SATYPE_AH		2
+#define SADB_SATYPE_ESP		3
+#define SADB_SATYPE_RSVP	5
+#define SADB_SATYPE_OSPFV2	6
+#define SADB_SATYPE_RIPV2	7
+#define SADB_SATYPE_MIP		8
+#define SADB_X_SATYPE_IPCOMP	9
+#define SADB_SATYPE_MAX		9
+
+/* Authentication algorithms */
+#define SADB_AALG_NONE			0
+#define SADB_AALG_MD5HMAC		2
+#define SADB_AALG_SHA1HMAC		3
+#define SADB_X_AALG_SHA2_256HMAC	5
+#define SADB_X_AALG_SHA2_384HMAC	6
+#define SADB_X_AALG_SHA2_512HMAC	7
+#define SADB_X_AALG_RIPEMD160HMAC	8
+#define SADB_X_AALG_NULL		251	/* kame */
+#define SADB_AALG_MAX			251
+
+/* Encryption algorithms */
+#define SADB_EALG_NONE			0
+#define SADB_EALG_DESCBC		2
+#define SADB_EALG_3DESCBC		3
+#define SADB_X_EALG_CASTCBC		6
+#define SADB_X_EALG_BLOWFISHCBC		7
+#define SADB_EALG_NULL			11
+#define SADB_X_EALG_AESCBC		12
+#define SADB_EALG_MAX                   253 /* last EALG */
+/* private allocations should use 249-255 (RFC2407) */
+#define SADB_X_EALG_SERPENTCBC  252     /* draft-ietf-ipsec-ciph-aes-cbc-00 */
+#define SADB_X_EALG_TWOFISHCBC  253     /* draft-ietf-ipsec-ciph-aes-cbc-00 */
+
+/* Compression algorithms */
+#define SADB_X_CALG_NONE		0
+#define SADB_X_CALG_OUI			1
+#define SADB_X_CALG_DEFLATE		2
+#define SADB_X_CALG_LZS			3
+#define SADB_X_CALG_LZJH		4
+#define SADB_X_CALG_MAX			4
+
+/* Extension Header values */
+#define SADB_EXT_RESERVED		0
+#define SADB_EXT_SA			1
+#define SADB_EXT_LIFETIME_CURRENT	2
+#define SADB_EXT_LIFETIME_HARD		3
+#define SADB_EXT_LIFETIME_SOFT		4
+#define SADB_EXT_ADDRESS_SRC		5
+#define SADB_EXT_ADDRESS_DST		6
+#define SADB_EXT_ADDRESS_PROXY		7
+#define SADB_EXT_KEY_AUTH		8
+#define SADB_EXT_KEY_ENCRYPT		9
+#define SADB_EXT_IDENTITY_SRC		10
+#define SADB_EXT_IDENTITY_DST		11
+#define SADB_EXT_SENSITIVITY		12
+#define SADB_EXT_PROPOSAL		13
+#define SADB_EXT_SUPPORTED_AUTH		14
+#define SADB_EXT_SUPPORTED_ENCRYPT	15
+#define SADB_EXT_SPIRANGE		16
+#define SADB_X_EXT_KMPRIVATE		17
+#define SADB_X_EXT_POLICY		18
+#define SADB_X_EXT_SA2			19
+/* The next four entries are for setting up NAT Traversal */
+#define SADB_X_EXT_NAT_T_TYPE		20
+#define SADB_X_EXT_NAT_T_SPORT		21
+#define SADB_X_EXT_NAT_T_DPORT		22
+#define SADB_X_EXT_NAT_T_OA		23
+#define SADB_EXT_MAX			23
+
+/* Identity Extension values */
+#define SADB_IDENTTYPE_RESERVED	0
+#define SADB_IDENTTYPE_PREFIX	1
+#define SADB_IDENTTYPE_FQDN	2
+#define SADB_IDENTTYPE_USERFQDN	3
+#define SADB_IDENTTYPE_MAX	3
+
+#endif /* !(_LINUX_PFKEY2_H) */
Index: include/linux/ppp_defs.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/ppp_defs.h,v
retrieving revision 1.1.1.13
retrieving revision 1.1.1.13.2.1
diff -u -r1.1.1.13 -r1.1.1.13.2.1
--- a/include/linux/ppp_defs.h	13 Mar 2000 05:12:37 -0000	1.1.1.13
+++ b/include/linux/ppp_defs.h	16 Apr 2004 13:16:19 -0000	1.1.1.13.2.1
@@ -74,12 +74,15 @@
 #define PPP_IPV6	0x57	/* Internet Protocol Version 6 */
 #define PPP_COMPFRAG	0xfb	/* fragment compressed below bundle */
 #define PPP_COMP	0xfd	/* compressed packet */
+#define PPP_MPLS_UC	0x0281	/* Multi Protocol Label Switching - Unicast */
+#define PPP_MPLS_MC	0x0283	/* Multi Protocol Label Switching - Multicast */
 #define PPP_IPCP	0x8021	/* IP Control Protocol */
 #define PPP_ATCP	0x8029	/* AppleTalk Control Protocol */
 #define PPP_IPXCP	0x802b	/* IPX Control Protocol */
 #define PPP_IPV6CP	0x8057	/* IPv6 Control Protocol */
 #define PPP_CCPFRAG	0x80fb	/* CCP at link level (below MP bundle) */
 #define PPP_CCP		0x80fd	/* Compression Control Protocol */
+#define PPP_MPLSCP	0x80fd	/* MPLS Control Protocol */
 #define PPP_LCP		0xc021	/* Link Control Protocol */
 #define PPP_PAP		0xc023	/* Password Authentication Protocol */
 #define PPP_LQR		0xc025	/* Link Quality Report protocol */
Index: include/linux/rtnetlink.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/rtnetlink.h,v
retrieving revision 1.1.1.21
retrieving revision 1.1.1.21.2.1
diff -u -r1.1.1.21 -r1.1.1.21.2.1
--- a/include/linux/rtnetlink.h	18 Feb 2004 13:36:32 -0000	1.1.1.21
+++ b/include/linux/rtnetlink.h	16 Apr 2004 13:16:19 -0000	1.1.1.21.2.1
@@ -200,10 +200,11 @@
 	RTA_MULTIPATH,
 	RTA_PROTOINFO,
 	RTA_FLOW,
-	RTA_CACHEINFO
+	RTA_CACHEINFO,
+	RTA_SESSION,
 };
 
-#define RTA_MAX RTA_CACHEINFO
+#define RTA_MAX RTA_SESSION
 
 #define RTM_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg))))
 #define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg))
@@ -282,10 +283,39 @@
 #define RTAX_ADVMSS RTAX_ADVMSS
 	RTAX_REORDERING,
 #define RTAX_REORDERING RTAX_REORDERING
-};
-
-#define RTAX_MAX RTAX_REORDERING
+	RTAX_HOPLIMIT,
+#define RTAX_HOPLIMIT RTAX_HOPLIMIT
+	RTAX_INITCWND,
+#define RTAX_INITCWND RTAX_INITCWND
+	RTAX_FEATURES,
+#define RTAX_FEATURES RTAX_FEATURES
+};
+
+#define RTAX_MAX RTAX_FEATURES
+
+#define RTAX_FEATURE_ECN	0x00000001
+#define RTAX_FEATURE_SACK	0x00000002
+#define RTAX_FEATURE_TIMESTAMP	0x00000004
+
+struct rta_session
+{
+	__u8	proto;
+
+	union {
+		struct {
+			__u16	sport;
+			__u16	dport;
+		} ports;
+
+		struct {
+			__u8	type;
+			__u8	code;
+			__u16	ident;
+		} icmpt;
 
+		__u32		spi;
+	} u;
+};
 
 
 /*********************************************************
@@ -317,6 +347,7 @@
 /* ifa_flags */
 
 #define IFA_F_SECONDARY		0x01
+#define IFA_F_TEMPORARY		IFA_F_SECONDARY
 
 #define IFA_F_DEPRECATED	0x20
 #define IFA_F_TENTATIVE		0x40
@@ -575,7 +606,7 @@
 extern struct rtnetlink_link * rtnetlink_links[NPROTO];
 extern int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb);
 extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
-extern int rtnetlink_put_metrics(struct sk_buff *skb, unsigned *metrics);
+extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 
 extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data);
 
Index: include/linux/skbuff.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/skbuff.h,v
retrieving revision 1.1.1.26
retrieving revision 1.1.1.26.2.1
diff -u -r1.1.1.26 -r1.1.1.26.2.1
--- a/include/linux/skbuff.h	14 Apr 2004 13:05:40 -0000	1.1.1.26
+++ b/include/linux/skbuff.h	16 Apr 2004 13:16:19 -0000	1.1.1.26.2.1
@@ -148,6 +148,7 @@
 		struct icmphdr	*icmph;
 		struct igmphdr	*igmph;
 		struct iphdr	*ipiph;
+		struct ipv6hdr	*ipv6h;
 		struct spxhdr	*spxh;
 		unsigned char	*raw;
 	} h;
@@ -169,7 +170,8 @@
 	  	unsigned char 	*raw;
 	} mac;
 
-	struct  dst_entry *dst;
+	struct  dst_entry	*dst;
+	struct	sec_path	*sp;
 
 	/* 
 	 * This is the control buffer. It is free to use for every
@@ -182,7 +184,7 @@
 	unsigned int 	len;			/* Length of actual data			*/
  	unsigned int 	data_len;
 	unsigned int	csum;			/* Checksum 					*/
-	unsigned char 	__unused,		/* Dead field, may be reused			*/
+	unsigned char 	local_df,
 			cloned, 		/* head may be cloned (check refcnt to be sure). */
   			pkt_type,		/* Packet class					*/
   			ip_summed;		/* Driver fed us an IP checksum			*/
@@ -758,6 +760,24 @@
 	return skb->len - skb->data_len;
 }
 
+static inline int skb_pagelen(const struct sk_buff *skb)
+{
+	int i, len = 0;
+
+	for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
+		len += skb_shinfo(skb)->frags[i].size;
+	return len + skb_headlen(skb);
+}
+
+static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
+{
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+	frag->page = page;
+	frag->page_offset = off;
+	frag->size = size;
+	skb_shinfo(skb)->nr_frags = i+1;
+}
+
 #define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0)
 #define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0)
 #define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0)
Index: include/linux/sysctl.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/sysctl.h,v
retrieving revision 1.1.1.26
retrieving revision 1.1.1.26.2.1
diff -u -r1.1.1.26 -r1.1.1.26.2.1
--- a/include/linux/sysctl.h	14 Apr 2004 13:05:40 -0000	1.1.1.26
+++ b/include/linux/sysctl.h	16 Apr 2004 13:16:19 -0000	1.1.1.26.2.1
@@ -361,6 +361,8 @@
 	NET_IPV4_CONF_TAG=12,
 	NET_IPV4_CONF_ARPFILTER=13,
 	NET_IPV4_CONF_MEDIUM_ID=14,
+	NET_IPV4_CONF_NOXFRM=15,
+	NET_IPV4_CONF_NOPOLICY=16,
 	NET_IPV4_CONF_FORCE_IGMP_VERSION=17,
 	NET_IPV4_CONF_ARP_ANNOUNCE=18,
 	NET_IPV4_CONF_ARP_IGNORE=19,
@@ -417,7 +419,12 @@
 	NET_IPV6_DAD_TRANSMITS=7,
 	NET_IPV6_RTR_SOLICITS=8,
 	NET_IPV6_RTR_SOLICIT_INTERVAL=9,
-	NET_IPV6_RTR_SOLICIT_DELAY=10
+	NET_IPV6_RTR_SOLICIT_DELAY=10,
+	NET_IPV6_USE_TEMPADDR=11,
+	NET_IPV6_TEMP_VALID_LFT=12,
+	NET_IPV6_TEMP_PREFERED_LFT=13,
+	NET_IPV6_REGEN_MAX_RETRY=14,
+	NET_IPV6_MAX_DESYNC_FACTOR=15
 };
 
 /* /proc/sys/net/ipv6/icmp */
Index: include/linux/timer.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/timer.h,v
retrieving revision 1.1.1.19
retrieving revision 1.1.1.19.2.1
diff -u -r1.1.1.19 -r1.1.1.19.2.1
--- a/include/linux/timer.h	22 Nov 2001 19:46:19 -0000	1.1.1.19
+++ b/include/linux/timer.h	16 Apr 2004 13:16:19 -0000	1.1.1.19.2.1
@@ -3,6 +3,7 @@
 
 #include <linux/config.h>
 #include <linux/list.h>
+#include <linux/stddef.h>
 
 /*
  * In Linux 2.4, static timers have been removed from the kernel.
Index: include/linux/udp.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/linux/udp.h,v
retrieving revision 1.1.1.14
retrieving revision 1.1.1.14.2.1
diff -u -r1.1.1.14 -r1.1.1.14.2.1
--- a/include/linux/udp.h	7 Sep 1997 21:00:24 -0000	1.1.1.14
+++ b/include/linux/udp.h	16 Apr 2004 13:16:19 -0000	1.1.1.14.2.1
@@ -17,6 +17,7 @@
 #ifndef _LINUX_UDP_H
 #define _LINUX_UDP_H
 
+#include <linux/types.h>
 
 struct udphdr {
 	__u16	source;
@@ -25,5 +26,11 @@
 	__u16	check;
 };
 
+/* UDP socket options */
+#define UDP_CORK	1	/* Never send partially complete segments */
+#define UDP_ENCAP	100	/* Set the socket to accept encapsulated packets */
+
+/* UDP encapsulation types */
+#define UDP_ENCAP_ESPINUDP	2 /* draft-ietf-ipsec-udp-encaps-06 */
 
 #endif	/* _LINUX_UDP_H */
Index: include/linux/xfrm.h
===================================================================
RCS file: include/linux/xfrm.h
diff -N include/linux/xfrm.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/include/linux/xfrm.h	16 Apr 2004 13:16:19 -0000	1.6.18.1
@@ -0,0 +1,233 @@
+#ifndef _LINUX_XFRM_H
+#define _LINUX_XFRM_H
+
+#include <linux/types.h>
+
+/* All of the structures in this file may not change size as they are
+ * passed into the kernel from userspace via netlink sockets.
+ */
+
+/* Structure to encapsulate addresses. I do not want to use
+ * "standard" structure. My apologies.
+ */
+typedef union
+{
+	__u32		a4;
+	__u32		a6[4];
+} xfrm_address_t;
+
+/* Ident of a specific xfrm_state. It is used on input to lookup
+ * the state by (spi,daddr,ah/esp) or to store information about
+ * spi, protocol and tunnel address on output.
+ */
+struct xfrm_id
+{
+	xfrm_address_t	daddr;
+	__u32		spi;
+	__u8		proto;
+};
+
+/* Selector, used as selector both on policy rules (SPD) and SAs. */
+
+struct xfrm_selector
+{
+	xfrm_address_t	daddr;
+	xfrm_address_t	saddr;
+	__u16	dport;
+	__u16	dport_mask;
+	__u16	sport;
+	__u16	sport_mask;
+	__u16	family;
+	__u8	prefixlen_d;
+	__u8	prefixlen_s;
+	__u8	proto;
+	int	ifindex;
+	uid_t	user;
+};
+
+#define XFRM_INF (~(__u64)0)
+
+struct xfrm_lifetime_cfg
+{
+	__u64	soft_byte_limit;
+	__u64	hard_byte_limit;
+	__u64	soft_packet_limit;
+	__u64	hard_packet_limit;
+	__u64	soft_add_expires_seconds;
+	__u64	hard_add_expires_seconds;
+	__u64	soft_use_expires_seconds;
+	__u64	hard_use_expires_seconds;
+};
+
+struct xfrm_lifetime_cur
+{
+	__u64	bytes;
+	__u64	packets;
+	__u64	add_time;
+	__u64	use_time;
+};
+
+struct xfrm_replay_state
+{
+	__u32	oseq;
+	__u32	seq;
+	__u32	bitmap;
+};
+
+struct xfrm_algo {
+	char	alg_name[64];
+	int	alg_key_len;    /* in bits */
+	char	alg_key[0];
+};
+
+struct xfrm_stats {
+	__u32	replay_window;
+	__u32	replay;
+	__u32	integrity_failed;
+};
+
+enum
+{
+	XFRM_POLICY_IN	= 0,
+	XFRM_POLICY_OUT	= 1,
+	XFRM_POLICY_FWD	= 2,
+	XFRM_POLICY_MAX	= 3
+};
+
+enum
+{
+	XFRM_SHARE_ANY,		/* No limitations */
+	XFRM_SHARE_SESSION,	/* For this session only */
+	XFRM_SHARE_USER,	/* For this user only */
+	XFRM_SHARE_UNIQUE	/* Use once */
+};
+
+/* Netlink configuration messages.  */
+#define XFRM_MSG_BASE		0x10
+
+#define XFRM_MSG_NEWSA		(XFRM_MSG_BASE + 0)
+#define XFRM_MSG_DELSA		(XFRM_MSG_BASE + 1)
+#define XFRM_MSG_GETSA		(XFRM_MSG_BASE + 2)
+
+#define XFRM_MSG_NEWPOLICY	(XFRM_MSG_BASE + 3)
+#define XFRM_MSG_DELPOLICY	(XFRM_MSG_BASE + 4)
+#define XFRM_MSG_GETPOLICY	(XFRM_MSG_BASE + 5)
+
+#define XFRM_MSG_ALLOCSPI	(XFRM_MSG_BASE + 6)
+#define XFRM_MSG_ACQUIRE	(XFRM_MSG_BASE + 7)
+#define XFRM_MSG_EXPIRE		(XFRM_MSG_BASE + 8)
+
+#define XFRM_MSG_UPDPOLICY	(XFRM_MSG_BASE + 9)
+#define XFRM_MSG_UPDSA		(XFRM_MSG_BASE + 10)
+
+#define XFRM_MSG_POLEXPIRE	(XFRM_MSG_BASE + 11)
+
+#define XFRM_MSG_MAX		(XFRM_MSG_POLEXPIRE+1)
+
+struct xfrm_user_tmpl {
+	struct xfrm_id		id;
+	__u16			family;
+	xfrm_address_t		saddr;
+	__u32			reqid;
+	__u8			mode;
+	__u8			share;
+	__u8			optional;
+	__u32			aalgos;
+	__u32			ealgos;
+	__u32			calgos;
+};
+
+struct xfrm_encap_tmpl {
+	__u16		encap_type;
+	__u16		encap_sport;
+	__u16		encap_dport;
+	xfrm_address_t	encap_oa;
+};
+
+/* Netlink message attributes.  */
+enum xfrm_attr_type_t {
+	XFRMA_UNSPEC,
+	XFRMA_ALG_AUTH,		/* struct xfrm_algo */
+	XFRMA_ALG_CRYPT,	/* struct xfrm_algo */
+	XFRMA_ALG_COMP,		/* struct xfrm_algo */
+	XFRMA_ENCAP,		/* struct xfrm_algo + struct xfrm_encap_tmpl */
+	XFRMA_TMPL,		/* 1 or more struct xfrm_user_tmpl */
+
+#define XFRMA_MAX XFRMA_TMPL
+};
+
+struct xfrm_usersa_info {
+	struct xfrm_selector		sel;
+	struct xfrm_id			id;
+	xfrm_address_t			saddr;
+	struct xfrm_lifetime_cfg	lft;
+	struct xfrm_lifetime_cur	curlft;
+	struct xfrm_stats		stats;
+	__u32				seq;
+	__u32				reqid;
+	__u16				family;
+	__u8				mode; /* 0=transport,1=tunnel */
+	__u8				replay_window;
+	__u8				flags;
+#define XFRM_STATE_NOECN	1
+};
+
+struct xfrm_usersa_id {
+	xfrm_address_t			daddr;
+	__u32				spi;
+	__u16				family;
+	__u8				proto;
+};
+
+struct xfrm_userspi_info {
+	struct xfrm_usersa_info		info;
+	__u32				min;
+	__u32				max;
+};
+
+struct xfrm_userpolicy_info {
+	struct xfrm_selector		sel;
+	struct xfrm_lifetime_cfg	lft;
+	struct xfrm_lifetime_cur	curlft;
+	__u32				priority;
+	__u32				index;
+	__u8				dir;
+	__u8				action;
+#define XFRM_POLICY_ALLOW	0
+#define XFRM_POLICY_BLOCK	1
+	__u8				flags;
+#define XFRM_POLICY_LOCALOK	1	/* Allow user to override global policy */
+	__u8				share;
+};
+
+struct xfrm_userpolicy_id {
+	struct xfrm_selector		sel;
+	__u32				index;
+	__u8				dir;
+};
+
+struct xfrm_user_acquire {
+	struct xfrm_id			id;
+	xfrm_address_t			saddr;
+	struct xfrm_selector		sel;
+	struct xfrm_userpolicy_info	policy;
+	__u32				aalgos;
+	__u32				ealgos;
+	__u32				calgos;
+	__u32				seq;
+};
+
+struct xfrm_user_expire {
+	struct xfrm_usersa_info		state;
+	__u8				hard;
+};
+
+struct xfrm_user_polexpire {
+	struct xfrm_userpolicy_info	pol;
+	__u8				hard;
+};
+
+#define XFRMGRP_ACQUIRE		1
+#define XFRMGRP_EXPIRE		2
+
+#endif /* _LINUX_XFRM_H */
Index: include/net/addrconf.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/addrconf.h,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/include/net/addrconf.h	25 Aug 2003 11:44:44 -0000	1.1.1.17
+++ b/include/net/addrconf.h	16 Apr 2004 13:16:19 -0000	1.1.1.17.2.1
@@ -6,6 +6,13 @@
 #define MAX_RTR_SOLICITATIONS		3
 #define RTR_SOLICITATION_INTERVAL	(4*HZ)
 
+#define MIN_VALID_LIFETIME		(2*3600)	/* 2 hours */
+
+#define TEMP_VALID_LIFETIME		(7*86400)
+#define TEMP_PREFERRED_LIFETIME		(86400)
+#define REGEN_MAX_RETRY			(5)
+#define MAX_DESYNC_FACTOR		(600)
+
 #define ADDR_CHECK_FREQUENCY		(120*HZ)
 
 struct prefix_info {
Index: include/net/ah.h
===================================================================
RCS file: include/net/ah.h
diff -N include/net/ah.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/include/net/ah.h	16 Apr 2004 13:16:19 -0000	1.3.2.1
@@ -0,0 +1,35 @@
+#ifndef _NET_AH_H
+#define _NET_AH_H
+
+#include <net/xfrm.h>
+
+/* This is the maximum truncated ICV length that we know of. */
+#define MAX_AH_AUTH_LEN	12
+
+struct ah_data
+{
+	u8			*key;
+	int			key_len;
+	u8			*work_icv;
+	int			icv_full_len;
+	int			icv_trunc_len;
+
+	void			(*icv)(struct ah_data*,
+	                               struct sk_buff *skb, u8 *icv);
+
+	struct crypto_tfm	*tfm;
+};
+
+static inline void
+ah_hmac_digest(struct ah_data *ahp, struct sk_buff *skb, u8 *auth_data)
+{
+	struct crypto_tfm *tfm = ahp->tfm;
+
+	memset(auth_data, 0, ahp->icv_trunc_len);
+	crypto_hmac_init(tfm, ahp->key, &ahp->key_len);
+	skb_icv_walk(skb, tfm, 0, skb->len, crypto_hmac_update);
+	crypto_hmac_final(tfm, ahp->key, &ahp->key_len, ahp->work_icv);
+	memcpy(auth_data, ahp->work_icv, ahp->icv_trunc_len);
+}
+
+#endif
Index: include/net/dn_fib.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/dn_fib.h,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/include/net/dn_fib.h	21 Dec 2001 17:42:04 -0000	1.1.1.15
+++ b/include/net/dn_fib.h	16 Apr 2004 13:16:19 -0000	1.1.1.15.2.1
@@ -7,6 +7,9 @@
 
 #include <linux/rtnetlink.h>
 
+/* WARNING: The ordering of these elements must match ordering
+ *          of RTA_* rtnetlink attribute numbers.
+ */
 struct dn_kern_rta
 {
         void            *rta_dst;
@@ -19,8 +22,9 @@
         struct rtattr   *rta_mx;
         struct rtattr   *rta_mp;
         unsigned char   *rta_protoinfo;
-        unsigned char   *rta_flow;
+        u32             *rta_flow;
         struct rta_cacheinfo *rta_ci;
+	struct rta_session *rta_sess;
 };
 
 struct dn_fib_key {
Index: include/net/dn_route.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/dn_route.h,v
retrieving revision 1.1.1.14
retrieving revision 1.1.1.14.2.1
diff -u -r1.1.1.14 -r1.1.1.14.2.1
--- a/include/net/dn_route.h	11 Dec 2000 21:33:56 -0000	1.1.1.14
+++ b/include/net/dn_route.h	16 Apr 2004 13:16:19 -0000	1.1.1.14.2.1
@@ -122,7 +122,7 @@
 	if ((dst = sk->dst_cache) && !dst->obsolete) {
 try_again:
 		skb->dst = dst_clone(dst);
-		dst->output(skb);
+		dst_output(skb);
 		return;
 	}
 
Index: include/net/dst.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/dst.h,v
retrieving revision 1.1.1.18
retrieving revision 1.1.1.18.2.1
diff -u -r1.1.1.18 -r1.1.1.18.2.1
--- a/include/net/dst.h	25 Aug 2003 11:44:44 -0000	1.1.1.18
+++ b/include/net/dst.h	16 Apr 2004 13:16:19 -0000	1.1.1.18.2.1
@@ -9,6 +9,8 @@
 #define _NET_DST_H
 
 #include <linux/config.h>
+#include <linux/rtnetlink.h>
+#include <linux/netdevice.h>
 #include <net/neighbour.h>
 
 /*
@@ -22,6 +24,13 @@
 #define DST_GC_INC	(HZ/2)
 #define DST_GC_MAX	(120*HZ)
 
+/* Each dst_entry has reference count and sits in some parent list(s).
+ * When it is removed from parent list, it is "freed" (dst_free).
+ * After this it enters dead state (dst->obsolete > 0) and if its refcnt
+ * is zero, it can be destroyed immediately, otherwise it is added
+ * to gc list and garbage collector periodically checks the refcnt.
+ */
+
 struct sk_buff;
 
 struct dst_entry
@@ -29,22 +38,22 @@
 	struct dst_entry        *next;
 	atomic_t		__refcnt;	/* client references	*/
 	int			__use;
+	struct dst_entry	*child;
 	struct net_device       *dev;
 	int			obsolete;
 	int			flags;
 #define DST_HOST		1
+#define DST_NOXFRM		2
+#define DST_NOPOLICY		4
+#define DST_NOHASH		8
 	unsigned long		lastuse;
 	unsigned long		expires;
 
-	unsigned		mxlock;
-	unsigned		pmtu;
-	unsigned		window;
-	unsigned		rtt;
-	unsigned		rttvar;
-	unsigned		ssthresh;
-	unsigned		cwnd;
-	unsigned		advmss;
-	unsigned		reordering;
+	unsigned short		header_len;	/* more space at head required */
+	unsigned short		trailer_len;	/* space to reserve at tail */
+
+	u32			metrics[RTAX_MAX];
+	struct dst_entry	*path;
 
 	unsigned long		rate_last;	/* rate limiting for ICMP */
 	unsigned long		rate_tokens;
@@ -53,6 +62,7 @@
 
 	struct neighbour	*neighbour;
 	struct hh_cache		*hh;
+	struct xfrm_state	*xfrm;
 
 	int			(*input)(struct sk_buff*);
 	int			(*output)(struct sk_buff*);
@@ -75,11 +85,11 @@
 
 	int			(*gc)(void);
 	struct dst_entry *	(*check)(struct dst_entry *, __u32 cookie);
-	struct dst_entry *	(*reroute)(struct dst_entry *,
-					   struct sk_buff *);
 	void			(*destroy)(struct dst_entry *);
 	struct dst_entry *	(*negative_advice)(struct dst_entry *);
 	void			(*link_failure)(struct sk_buff *);
+	void			(*update_pmtu)(struct dst_entry *dst, u32 mtu);
+	int			(*get_mss)(struct dst_entry *dst, u32 mtu);
 	int			entry_size;
 
 	atomic_t		entries;
@@ -88,6 +98,33 @@
 
 #ifdef __KERNEL__
 
+static inline u32
+dst_metric(struct dst_entry *dst, int metric)
+{
+	return dst->metrics[metric-1];
+}
+
+static inline u32
+dst_path_metric(struct dst_entry *dst, int metric)
+{
+	return dst->path->metrics[metric-1];
+}
+
+static inline u32
+dst_pmtu(struct dst_entry *dst)
+{
+	u32 mtu = dst_path_metric(dst, RTAX_MTU);
+	/* Yes, _exactly_. This is paranoia. */
+	barrier();
+	return mtu;
+}
+
+static inline int
+dst_metric_locked(struct dst_entry *dst, int metric)
+{
+	return dst_metric(dst, RTAX_LOCK) & (1<<metric);
+}
+
 static inline void dst_hold(struct dst_entry * dst)
 {
 	atomic_inc(&dst->__refcnt);
@@ -104,22 +141,40 @@
 static inline
 void dst_release(struct dst_entry * dst)
 {
-	if (dst)
+	if (dst) {
+		if (atomic_read(&dst->__refcnt) < 1) {
+			printk("BUG: dst underflow %d: %p\n",
+			       atomic_read(&dst->__refcnt),
+			       current_text_addr());
+		}
 		atomic_dec(&dst->__refcnt);
+	}
+}
+
+/* Children define the path of the packet through the
+ * Linux networking.  Thus, destinations are stackable.
+ */
+
+static inline struct dst_entry *dst_pop(struct dst_entry *dst)
+{
+	struct dst_entry *child = dst_clone(dst->child);
+
+	dst_release(dst);
+	return child;
 }
 
 extern void * dst_alloc(struct dst_ops * ops);
 extern void __dst_free(struct dst_entry * dst);
-extern void dst_destroy(struct dst_entry * dst);
+extern struct dst_entry *dst_destroy(struct dst_entry * dst);
 
-static inline
-void dst_free(struct dst_entry * dst)
+static inline void dst_free(struct dst_entry * dst)
 {
 	if (dst->obsolete > 1)
 		return;
 	if (!atomic_read(&dst->__refcnt)) {
-		dst_destroy(dst);
-		return;
+		dst = dst_destroy(dst);
+		if (!dst)
+			return;
 	}
 	__dst_free(dst);
 }
@@ -155,8 +210,50 @@
 		dst->expires = expires;
 }
 
+/* Output packet to network from transport.  */
+static inline int dst_output(struct sk_buff *skb)
+{
+	int err;
+
+	for (;;) {
+		err = skb->dst->output(skb);
+
+		if (likely(err == 0))
+			return err;
+		if (unlikely(err != NET_XMIT_BYPASS))
+			return err;
+	}
+}
+
+/* Input packet from network to transport.  */
+static inline int dst_input(struct sk_buff *skb)
+{
+	int err;
+
+	for (;;) {
+		err = skb->dst->input(skb);
+
+		if (likely(err == 0))
+			return err;
+		/* Oh, Jamal... Seems, I will not forgive you this mess. :-) */
+		if (unlikely(err != NET_XMIT_BYPASS))
+			return err;
+	}
+}
+
 extern void		dst_init(void);
 
+struct flowi;
+#ifndef CONFIG_XFRM
+static inline int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
+		       struct sock *sk, int flags)
+{
+	return 0;
+} 
+#else
+extern int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
+		       struct sock *sk, int flags);
+#endif
 #endif
 
 #endif /* _NET_DST_H */
Index: include/net/esp.h
===================================================================
RCS file: include/net/esp.h
diff -N include/net/esp.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/include/net/esp.h	16 Apr 2004 13:16:19 -0000	1.2.18.1
@@ -0,0 +1,54 @@
+#ifndef _NET_ESP_H
+#define _NET_ESP_H
+
+#include <net/xfrm.h>
+
+struct esp_data
+{
+	/* Confidentiality */
+	struct {
+		u8			*key;		/* Key */
+		int			key_len;	/* Key length */
+		u8			*ivec;		/* ivec buffer */
+		/* ivlen is offset from enc_data, where encrypted data start.
+		 * It is logically different of crypto_tfm_alg_ivsize(tfm).
+		 * We assume that it is either zero (no ivec), or
+		 * >= crypto_tfm_alg_ivsize(tfm). */
+		int			ivlen;
+		int			padlen;		/* 0..255 */
+		struct crypto_tfm	*tfm;		/* crypto handle */
+	} conf;
+
+	/* Integrity. It is active when icv_full_len != 0 */
+	struct {
+		u8			*key;		/* Key */
+		int			key_len;	/* Length of the key */
+		u8			*work_icv;
+		int			icv_full_len;
+		int			icv_trunc_len;
+		void			(*icv)(struct esp_data*,
+		                               struct sk_buff *skb,
+		                               int offset, int len, u8 *icv);
+		struct crypto_tfm	*tfm;
+	} auth;
+};
+
+extern int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len);
+extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
+extern void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
+
+static inline void
+esp_hmac_digest(struct esp_data *esp, struct sk_buff *skb, int offset,
+                int len, u8 *auth_data)
+{
+	struct crypto_tfm *tfm = esp->auth.tfm;
+	char *icv = esp->auth.work_icv;
+
+	memset(auth_data, 0, esp->auth.icv_trunc_len);
+	crypto_hmac_init(tfm, esp->auth.key, &esp->auth.key_len);
+	skb_icv_walk(skb, tfm, offset, len, crypto_hmac_update);
+	crypto_hmac_final(tfm, esp->auth.key, &esp->auth.key_len, icv);
+	memcpy(auth_data, icv, esp->auth.icv_trunc_len);
+}
+
+#endif
Index: include/net/flow.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/flow.h,v
retrieving revision 1.1.1.16
retrieving revision 1.1.1.16.2.1
diff -u -r1.1.1.16 -r1.1.1.16.2.1
--- a/include/net/flow.h	23 Apr 1999 02:45:19 -0000	1.1.1.16
+++ b/include/net/flow.h	16 Apr 2004 13:16:19 -0000	1.1.1.16.2.1
@@ -1,24 +1,31 @@
 /*
  *
- *	Flow based forwarding rules (usage: firewalling, etc)
+ *	Generic internet FLOW.
  *
  */
 
 #ifndef _NET_FLOW_H
 #define _NET_FLOW_H
 
+#include <linux/in6.h>
+#include <asm/atomic.h>
+
 struct flowi {
-	int	proto;		/*	{TCP, UDP, ICMP}	*/
+	int	oif;
+	int	iif;
 
 	union {
 		struct {
 			__u32			daddr;
 			__u32			saddr;
+			__u32			fwmark;
+			__u8			tos;
+			__u8			scope;
 		} ip4_u;
 		
 		struct {
-			struct in6_addr *	daddr;
-			struct in6_addr *	saddr;
+			struct in6_addr		daddr;
+			struct in6_addr		saddr;
 			__u32			flowlabel;
 		} ip6_u;
 	} nl_u;
@@ -27,9 +34,12 @@
 #define fl6_flowlabel	nl_u.ip6_u.flowlabel
 #define fl4_dst		nl_u.ip4_u.daddr
 #define fl4_src		nl_u.ip4_u.saddr
+#define fl4_fwmark	nl_u.ip4_u.fwmark
+#define fl4_tos		nl_u.ip4_u.tos
+#define fl4_scope	nl_u.ip4_u.scope
 
-	int	oif;
-
+	__u8	proto;
+	__u8	flags;
 	union {
 		struct {
 			__u16	sport;
@@ -41,61 +51,27 @@
 			__u8	code;
 		} icmpt;
 
-		unsigned long	data;
+		__u32		spi;
 	} uli_u;
-};
-
-#define FLOWR_NODECISION	0	/* rule not appliable to flow	*/
-#define FLOWR_SELECT		1	/* flow must follow this rule	*/
-#define FLOWR_CLEAR		2	/* priority level clears flow	*/
-#define FLOWR_ERROR		3
-
-struct fl_acc_args {
-	int	type;
-
-
-#define FL_ARG_FORWARD	1
-#define FL_ARG_ORIGIN	2
-
-	union {
-		struct sk_buff		*skb;
-		struct {
-			struct sock	*sk;
-			struct flowi	*flow;
-		} fl_o;
-	} fl_u;
-};
-
-
-struct pkt_filter {
-	atomic_t		refcnt;
-	unsigned int		offset;
-	__u32			value;
-	__u32			mask;
-	struct pkt_filter	*next;
-};
-
-#define FLR_INPUT		1
-#define FLR_OUTPUT		2
-
-struct flow_filter {
-	int				type;
-	union {
-		struct pkt_filter	*filter;
-		struct sock		*sk;
-	} u;
-};
-
-struct flow_rule {
-	struct flow_rule_ops		*ops;
-	unsigned char			private[0];
-};
-
-struct flow_rule_ops {
-	int			(*accept)(struct rt6_info *rt,
-					  struct rt6_info *rule,
-					  struct fl_acc_args *args,
-					  struct rt6_info **nrt);
-};
+#define fl_ip_sport	uli_u.ports.sport
+#define fl_ip_dport	uli_u.ports.dport
+#define fl_icmp_type	uli_u.icmpt.type
+#define fl_icmp_code	uli_u.icmpt.code
+#define fl_ipsec_spi	uli_u.spi
+
+	u32 __pad;
+} __attribute__((__aligned__(BITS_PER_LONG/8)));
+
+#define FLOW_DIR_IN	0
+#define FLOW_DIR_OUT	1
+#define FLOW_DIR_FWD	2
+
+typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir,
+			       void **objp, atomic_t **obj_refp);
+
+extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
+			       flow_resolve_t resolver);
+extern void flow_cache_flush(void);
+extern atomic_t flow_cache_genid;
 
 #endif
Index: include/net/if_inet6.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/if_inet6.h,v
retrieving revision 1.1.1.14
retrieving revision 1.1.1.14.2.1
diff -u -r1.1.1.14 -r1.1.1.14.2.1
--- a/include/net/if_inet6.h	28 Nov 2003 18:26:21 -0000	1.1.1.14
+++ b/include/net/if_inet6.h	16 Apr 2004 13:16:19 -0000	1.1.1.14.2.1
@@ -47,6 +47,12 @@
 	struct inet6_ifaddr	*lst_next;      /* next addr in addr_lst */
 	struct inet6_ifaddr	*if_next;       /* next addr in inet6_dev */
 
+#ifdef CONFIG_IPV6_PRIVACY
+	struct inet6_ifaddr	*tmp_next;	/* next addr in tempaddr_lst */
+	struct inet6_ifaddr	*ifpub;
+	int			regen_count;
+#endif
+
 	int			dead;
 };
 
@@ -151,6 +157,15 @@
 	__u32			if_flags;
 	int			dead;
 
+#ifdef CONFIG_IPV6_PRIVACY
+	u8			rndid[8];
+	u8			entropy[8];
+	struct timer_list	regen_timer;
+	struct inet6_ifaddr	*tempaddr_list;
+	__u8			work_eui64[8];
+	__u8			work_digest[16];
+#endif
+
 	struct neigh_parms	*nd_parms;
 	struct inet6_dev	*next;
 	struct ipv6_devconf	cnf;
Index: include/net/inet_ecn.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/inet_ecn.h,v
retrieving revision 1.1.1.12
retrieving revision 1.1.1.12.2.1
diff -u -r1.1.1.12 -r1.1.1.12.2.1
--- a/include/net/inet_ecn.h	30 Oct 2001 23:08:12 -0000	1.1.1.12
+++ b/include/net/inet_ecn.h	16 Apr 2004 13:16:19 -0000	1.1.1.12.2.1
@@ -1,6 +1,8 @@
 #ifndef _INET_ECN_H_
 #define _INET_ECN_H_
 
+#include <linux/ip.h>
+
 static inline int INET_ECN_is_ce(__u8 dsfield)
 {
 	return (dsfield&3) == 3;
@@ -44,6 +46,11 @@
 	iph->tos |= 1;
 }
 
+static inline void IP_ECN_clear(struct iphdr *iph)
+{
+	iph->tos &= ~3;
+}
+
 struct ipv6hdr;
 
 static inline void IP6_ECN_set_ce(struct ipv6hdr *iph)
@@ -51,6 +58,11 @@
 	*(u32*)iph |= htonl(1<<20);
 }
 
+static inline void IP6_ECN_clear(struct ipv6hdr *iph)
+{
+	*(u32*)iph &= ~htonl(3<<20);
+}
+
 #define ip6_get_dsfield(iph) ((ntohs(*(u16*)(iph)) >> 4) & 0xFF)
 
 #endif
Index: include/net/ip.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/ip.h,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/include/net/ip.h	28 Nov 2003 18:26:21 -0000	1.1.1.17
+++ b/include/net/ip.h	16 Apr 2004 13:16:19 -0000	1.1.1.17.2.1
@@ -29,6 +29,7 @@
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
 #include <linux/in_route.h>
+#include <linux/sysctl.h>
 #include <net/route.h>
 #include <net/arp.h>
 
@@ -46,6 +47,7 @@
 #define IPSKB_MASQUERADED	1
 #define IPSKB_TRANSLATED	2
 #define IPSKB_FORWARDED		4
+#define IPSKB_XFRM_TUNNEL_SIZE	8
 };
 
 struct ipcm_cookie
@@ -98,16 +100,19 @@
 extern void		ip_send_check(struct iphdr *ip);
 extern int		ip_queue_xmit(struct sk_buff *skb, int ipfragok);
 extern void		ip_init(void);
-extern int		ip_build_xmit(struct sock *sk,
-				      int getfrag (const void *,
-						   char *,
-						   unsigned int,
-						   unsigned int),
-				      const void *frag,
-				      unsigned length,
-				      struct ipcm_cookie *ipc,
-				      struct rtable *rt,
-				      int flags);
+extern int		ip_append_data(struct sock *sk,
+				       int getfrag(void *from, char *to, int offset, int len,
+						   int odd, struct sk_buff *skb),
+				void *from, int len, int protolen,
+				struct ipcm_cookie *ipc,
+				struct rtable *rt,
+				unsigned int flags);
+extern int		ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb);
+extern ssize_t		ip_append_page(struct sock *sk, struct page *page,
+				int offset, size_t size, int flags);
+extern int		ip_push_pending_frames(struct sock *sk);
+extern void		ip_flush_pending_frames(struct sock *sk);
+
 
 /*
  *	Map a multicast IP onto multicast MAC for type Token Ring.
@@ -127,8 +132,7 @@
 }
 
 struct ip_reply_arg {
-	struct iovec iov[2];   
-	int          n_iov;    /* redundant */
+	struct iovec iov[1];   
 	u32 	     csum; 
 	int	     csumoffset; /* u16 offset of csum in iov[0].iov_base */
 				 /* -1 if not needed */ 
@@ -160,14 +164,6 @@
 extern int sysctl_ip_default_ttl;
 
 #ifdef CONFIG_INET
-static inline int ip_send(struct sk_buff *skb)
-{
-	if (skb->len > skb->dst->pmtu)
-		return ip_fragment(skb, ip_finish_output);
-	else
-		return ip_finish_output(skb);
-}
-
 /* The function in 2.2 was invalid, producing wrong result for
  * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */
 static inline
@@ -184,7 +180,7 @@
 {
 	return (sk->protinfo.af_inet.pmtudisc == IP_PMTUDISC_DO ||
 		(sk->protinfo.af_inet.pmtudisc == IP_PMTUDISC_WANT &&
-		 !(dst->mxlock&(1<<RTAX_MTU))));
+		 !(dst_metric(dst, RTAX_LOCK)&(1<<RTAX_MTU))));
 }
 
 extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst);
@@ -267,4 +263,15 @@
 extern void	ip_local_error(struct sock *sk, int err, u32 daddr, u16 dport,
 			       u32 info);
 
+/* sysctl helpers - any sysctl which holds a value that ends up being
+ * fed into the routing cache should use these handlers.
+ */
+int ipv4_doint_and_flush(ctl_table *ctl, int write,
+			 struct file* filp, void *buffer,
+			 size_t *lenp);
+int ipv4_doint_and_flush_strategy(ctl_table *table, int *name, int nlen,
+				  void *oldval, size_t *oldlenp,
+				  void *newval, size_t newlen, 
+				  void **context);
+
 #endif	/* _IP_H */
Index: include/net/ip6_fib.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/ip6_fib.h,v
retrieving revision 1.1.1.16
retrieving revision 1.1.1.16.2.1
diff -u -r1.1.1.16 -r1.1.1.16.2.1
--- a/include/net/ip6_fib.h	25 Aug 2003 11:44:44 -0000	1.1.1.16
+++ b/include/net/ip6_fib.h	16 Apr 2004 13:16:19 -0000	1.1.1.16.2.1
@@ -67,17 +67,8 @@
 	
 	u32				rt6i_flags;
 	u32				rt6i_metric;
-	u8				rt6i_hoplimit;
 	atomic_t			rt6i_ref;
 
-	union {
-		struct flow_rule	*rt6iu_flowr;
-		struct flow_filter	*rt6iu_filter;
-	} flow_u;
-
-#define rt6i_flowr			flow_u.rt6iu_flowr
-#define rt6i_filter			flow_u.rt6iu_filter
-
 	struct rt6key			rt6i_dst;
 	struct rt6key			rt6i_src;
 
@@ -171,10 +162,12 @@
 
 extern int			fib6_add(struct fib6_node *root,
 					 struct rt6_info *rt,
-					 struct nlmsghdr *nlh);
+					 struct nlmsghdr *nlh,
+					 void *rtattr);
 
 extern int			fib6_del(struct rt6_info *rt,
-					 struct nlmsghdr *nlh);
+					 struct nlmsghdr *nlh,
+					 void *rtattr);
 
 extern void			inet6_rt_notify(int event, struct rt6_info *rt,
 						struct nlmsghdr *nlh);
Index: include/net/ip6_fw.h
===================================================================
RCS file: include/net/ip6_fw.h
diff -N include/net/ip6_fw.h
--- a/include/net/ip6_fw.h	27 Mar 1997 22:40:11 -0000	1.1.1.3
+++ /dev/null	1 Jan 1970 00:00:00 -0000
@@ -1,54 +0,0 @@
-#ifndef __NET_IP6_FW_H
-#define __NET_IP6_FW_H
-
-#define IP6_FW_LISTHEAD		0x1000
-#define IP6_FW_ACCEPT		0x0001
-#define IP6_FW_REJECT		0x0002
-
-#define IP6_FW_DEBUG	2
-
-#define IP6_FW_MSG_ADD		1
-#define IP6_FW_MSG_DEL		2
-#define IP6_FW_MSG_REPORT	3
-
-
-/*
- *	Fast "hack" user interface
- */
-struct ip6_fw_msg {
-	struct in6_addr		dst;
-	struct in6_addr		src;
-	int			dst_len;
-	int			src_len;
-	int			action;
-	int			policy;
-	int			proto;
-	union {
-		struct {
-			__u16	sport;
-			__u16	dport;
-		} transp;
-
-		unsigned long	data;
-
-		int		icmp_type;
-	} u;
-
-	int			msg_len;
-};
-
-#ifdef __KERNEL__
-
-#include <net/flow.h>
-
-struct ip6_fw_rule {
-	struct flow_rule	flowr;
-	struct ip6_fw_rule	*next;
-	struct ip6_fw_rule	*prev;
-	struct flowi		info;
-	unsigned long		policy;
-};
-
-#endif
-
-#endif
Index: include/net/ip6_route.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/ip6_route.h,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/include/net/ip6_route.h	13 Jun 2003 14:51:39 -0000	1.1.1.17
+++ b/include/net/ip6_route.h	16 Apr 2004 13:16:19 -0000	1.1.1.17.2.1
@@ -39,12 +39,15 @@
 extern int			ipv6_route_ioctl(unsigned int cmd, void *arg);
 
 extern int			ip6_route_add(struct in6_rtmsg *rtmsg,
-					      struct nlmsghdr *);
+					      struct nlmsghdr *,
+					      void *rtattr);
 extern int			ip6_del_rt(struct rt6_info *,
-					   struct nlmsghdr *);
+					   struct nlmsghdr *,
+					   void *rtattr);
 
 extern int			ip6_rt_addr_add(struct in6_addr *addr,
-						struct net_device *dev);
+						struct net_device *dev,
+						int anycast);
 
 extern int			ip6_rt_addr_del(struct in6_addr *addr,
 						struct net_device *dev);
@@ -60,6 +63,12 @@
 					    struct in6_addr *saddr,
 					    int oif, int flags);
 
+extern struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
+					 struct neighbour *neigh,
+					 int (*output)(struct sk_buff *));
+extern int ndisc_dst_gc(int *more);
+extern void fib6_force_start_gc(void);
+
 /*
  *	support functions for ND
  *
@@ -111,5 +120,12 @@
 	write_unlock(&sk->dst_lock);
 }
 
+static inline int ipv6_unicast_destination(struct sk_buff *skb)
+{
+	struct rt6_info *rt = (struct rt6_info *) skb->dst;
+
+	return rt->rt6i_flags & RTF_LOCAL;
+}
+
 #endif
 #endif
Index: include/net/ip6_tunnel.h
===================================================================
RCS file: include/net/ip6_tunnel.h
diff -N include/net/ip6_tunnel.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/include/net/ip6_tunnel.h	16 Apr 2004 13:16:19 -0000	1.2.18.1
@@ -0,0 +1,44 @@
+/*
+ * $Id$
+ */
+
+#ifndef _NET_IP6_TUNNEL_H
+#define _NET_IP6_TUNNEL_H
+
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <linux/ip6_tunnel.h>
+
+/* capable of sending packets */
+#define IP6_TNL_F_CAP_XMIT 0x10000
+/* capable of receiving packets */
+#define IP6_TNL_F_CAP_RCV 0x20000
+
+#define IP6_TNL_MAX 128
+
+/* IPv6 tunnel */
+
+struct ip6_tnl {
+	struct ip6_tnl *next;	/* next tunnel in list */
+	struct net_device *dev;	/* virtual device associated with tunnel */
+	struct net_device_stats stat;	/* statistics for tunnel device */
+	int recursion;		/* depth of hard_start_xmit recursion */
+	struct ip6_tnl_parm parms;	/* tunnel configuration paramters */
+	struct flowi fl;	/* flowi template for xmit */
+};
+
+/* Tunnel encapsulation limit destination sub-option */
+
+struct ipv6_tlv_tnl_enc_lim {
+	__u8 type;		/* type-code for option         */
+	__u8 length;		/* option length                */
+	__u8 encap_limit;	/* tunnel encapsulation limit   */
+} __attribute__ ((packed));
+
+#ifdef __KERNEL__
+#ifdef CONFIG_IPV6_TUNNEL
+extern int __init ip6_tunnel_init(void);
+extern void ip6_tunnel_cleanup(void);
+#endif
+#endif
+#endif
Index: include/net/ip_fib.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/ip_fib.h,v
retrieving revision 1.1.1.16
retrieving revision 1.1.1.16.2.1
diff -u -r1.1.1.16 -r1.1.1.16.2.1
--- a/include/net/ip_fib.h	9 Feb 2001 19:34:13 -0000	1.1.1.16
+++ b/include/net/ip_fib.h	16 Apr 2004 13:16:19 -0000	1.1.1.16.2.1
@@ -17,7 +17,11 @@
 #define _NET_IP_FIB_H
 
 #include <linux/config.h>
+#include <net/flow.h>
 
+/* WARNING: The ordering of these elements must match ordering
+ *          of RTA_* rtnetlink attribute numbers.
+ */
 struct kern_rta
 {
 	void		*rta_dst;
@@ -30,8 +34,9 @@
 	struct rtattr	*rta_mx;
 	struct rtattr	*rta_mp;
 	unsigned char	*rta_protoinfo;
-	unsigned char	*rta_flow;
+	u32		*rta_flow;
 	struct rta_cacheinfo *rta_ci;
+	struct rta_session *rta_sess;
 };
 
 struct fib_nh
@@ -65,7 +70,7 @@
 	int			fib_protocol;
 	u32			fib_prefsrc;
 	u32			fib_priority;
-	unsigned		fib_metrics[RTAX_MAX];
+	u32			fib_metrics[RTAX_MAX];
 #define fib_mtu fib_metrics[RTAX_MTU-1]
 #define fib_window fib_metrics[RTAX_WINDOW-1]
 #define fib_rtt fib_metrics[RTAX_RTT-1]
@@ -117,7 +122,7 @@
 {
 	unsigned char	tb_id;
 	unsigned	tb_stamp;
-	int		(*tb_lookup)(struct fib_table *tb, const struct rt_key *key, struct fib_result *res);
+	int		(*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res);
 	int		(*tb_insert)(struct fib_table *table, struct rtmsg *r,
 				     struct kern_rta *rta, struct nlmsghdr *n,
 				     struct netlink_skb_parms *req);
@@ -130,7 +135,7 @@
 	int		(*tb_get_info)(struct fib_table *table, char *buf,
 				       int first, int count);
 	void		(*tb_select_default)(struct fib_table *table,
-					     const struct rt_key *key, struct fib_result *res);
+					     const struct flowi *flp, struct fib_result *res);
 
 	unsigned char	tb_data[0];
 };
@@ -152,18 +157,18 @@
 	return fib_get_table(id);
 }
 
-static inline int fib_lookup(const struct rt_key *key, struct fib_result *res)
+static inline int fib_lookup(const struct flowi *flp, struct fib_result *res)
 {
-	if (local_table->tb_lookup(local_table, key, res) &&
-	    main_table->tb_lookup(main_table, key, res))
+	if (local_table->tb_lookup(local_table, flp, res) &&
+	    main_table->tb_lookup(main_table, flp, res))
 		return -ENETUNREACH;
 	return 0;
 }
 
-static inline void fib_select_default(const struct rt_key *key, struct fib_result *res)
+static inline void fib_select_default(const struct flowi *flp, struct fib_result *res)
 {
 	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
-		main_table->tb_select_default(main_table, key, res);
+		main_table->tb_select_default(main_table, flp, res);
 }
 
 #else /* CONFIG_IP_MULTIPLE_TABLES */
@@ -171,7 +176,7 @@
 #define main_table (fib_tables[RT_TABLE_MAIN])
 
 extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
-extern int fib_lookup(const struct rt_key *key, struct fib_result *res);
+extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
 extern struct fib_table *__fib_new_table(int id);
 extern void fib_rule_put(struct fib_rule *r);
 
@@ -191,7 +196,7 @@
 	return fib_tables[id] ? : __fib_new_table(id);
 }
 
-extern void fib_select_default(const struct rt_key *key, struct fib_result *res);
+extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
 
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
@@ -204,13 +209,13 @@
 extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb);
 extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
 			       struct net_device *dev, u32 *spec_dst, u32 *itag);
-extern void fib_select_multipath(const struct rt_key *key, struct fib_result *res);
+extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res);
 
 /* Exported by fib_semantics.c */
 extern int 		ip_fib_check_default(u32 gw, struct net_device *dev);
 extern void		fib_release_info(struct fib_info *);
 extern int		fib_semantic_match(int type, struct fib_info *,
-					   const struct rt_key *, struct fib_result*);
+					   const struct flowi *, struct fib_result*);
 extern struct fib_info	*fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
 					 const struct nlmsghdr *, int *err);
 extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, struct kern_rta *rta, struct fib_info *fi);
Index: include/net/ip_vs.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/ip_vs.h,v
retrieving revision 1.1.1.7
retrieving revision 1.1.1.7.2.1
diff -u -r1.1.1.7 -r1.1.1.7.2.1
--- a/include/net/ip_vs.h	14 Apr 2004 13:05:40 -0000	1.1.1.7
+++ b/include/net/ip_vs.h	16 Apr 2004 13:16:19 -0000	1.1.1.7.2.1
@@ -279,6 +279,13 @@
 #define LeaveFunction(level)   do {} while (0)
 #endif
 
+#define IP_VS_XMIT(skb, rt)				\
+do {							\
+	skb->nfcache |= NFC_IPVS_PROPERTY;		\
+	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL,	\
+		(rt)->u.dst.dev, dst_output);		\
+} while (0)
+
 
 /*
  *      The port number of FTP service (in network order).
@@ -859,7 +866,16 @@
 		spin_lock(&dest->dst_lock);
 		if (!(rt = (struct rtable *)
 		      __ip_vs_dst_check(dest, rtos, 0))) {
-			if (ip_route_output(&rt, dest->addr, 0, rtos, 0)) {
+			struct flowi fl = {
+				.oif = 0,
+				.nl_u = {
+					.ip4_u = {
+						.daddr = dest->addr,
+						.saddr = 0,
+						.tos = rtos, } },
+			};
+
+			if (ip_route_output_key(&rt, &fl)) {
 				spin_unlock(&dest->dst_lock);
 				IP_VS_DBG_RL("ip_route_output error, "
 					     "dest: %u.%u.%u.%u\n",
@@ -873,7 +889,16 @@
 		}
 		spin_unlock(&dest->dst_lock);
 	} else {
-		if (ip_route_output(&rt, cp->daddr, 0, rtos, 0)) {
+		struct flowi fl = {
+			.oif = 0,
+			.nl_u = {
+				.ip4_u = {
+					.daddr = cp->daddr,
+					.saddr = 0,
+					.tos = rtos, } },
+		};
+
+		if (ip_route_output_key(&rt, &fl)) {
 			IP_VS_DBG_RL("ip_route_output error, dest: "
 				     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
 			return NULL;
Index: include/net/ipcomp.h
===================================================================
RCS file: include/net/ipcomp.h
diff -N include/net/ipcomp.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/include/net/ipcomp.h	16 Apr 2004 13:16:19 -0000	1.2.18.1
@@ -0,0 +1,12 @@
+#ifndef _NET_IPCOMP_H
+#define _NET_IPCOMP_H
+
+#define IPCOMP_SCRATCH_SIZE     65400
+
+struct ipcomp_data {
+	u16 threshold;
+	u8 *scratch;
+	struct crypto_tfm *tfm;
+};
+
+#endif
Index: include/net/ipip.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/ipip.h,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/include/net/ipip.h	12 Apr 2001 19:11:39 -0000	1.1.1.15
+++ b/include/net/ipip.h	16 Apr 2004 13:16:19 -0000	1.1.1.15.2.1
@@ -34,7 +34,7 @@
 	ip_select_ident(iph, &rt->u.dst, NULL);				\
 	ip_send_check(iph);						\
 									\
-	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, do_ip_send); \
+	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output);\
 	if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) {		\
 		stats->tx_bytes += pkt_len;				\
 		stats->tx_packets++;					\
Index: include/net/ipv6.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/ipv6.h,v
retrieving revision 1.1.1.20
retrieving revision 1.1.1.20.2.1
diff -u -r1.1.1.20 -r1.1.1.20.2.1
--- a/include/net/ipv6.h	14 Apr 2004 13:05:40 -0000	1.1.1.20
+++ b/include/net/ipv6.h	16 Apr 2004 13:16:19 -0000	1.1.1.20.2.1
@@ -22,6 +22,8 @@
 
 #define SIN6_LEN_RFC2133	24
 
+#define IPV6_MAXPLEN		65535
+
 /*
  *	NextHeader field of IPv6 header
  */
@@ -48,7 +50,7 @@
 /*
  *	Addr type
  *	
- *	type	-	unicast | multicast | anycast
+ *	type	-	unicast | multicast
  *	scope	-	local	| site	    | global
  *	v4	-	compat
  *	v4mapped
@@ -60,7 +62,6 @@
 
 #define IPV6_ADDR_UNICAST      	0x0001U	
 #define IPV6_ADDR_MULTICAST    	0x0002U	
-#define IPV6_ADDR_ANYCAST	0x0004U
 
 #define IPV6_ADDR_LOOPBACK	0x0010U
 #define IPV6_ADDR_LINKLOCAL	0x0020U
@@ -98,6 +99,8 @@
 	__u32		identification;
 };
 
+#define	IP6_MF	0x0001
+
 #ifdef __KERNEL__
 
 #include <net/sock.h>
@@ -199,12 +202,8 @@
 
 extern int			ip6_call_ra_chain(struct sk_buff *skb, int sel);
 
-extern int			ipv6_reassembly(struct sk_buff **skb, int);
-
 extern int			ipv6_parse_hopopts(struct sk_buff *skb, int);
 
-extern int			ipv6_parse_exthdrs(struct sk_buff **skb, int);
-
 extern struct ipv6_txoptions *  ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt);
 
 extern int ip6_frag_nqueues;
@@ -239,6 +238,23 @@
 	memcpy((void *) a1, (const void *) a2, sizeof(struct in6_addr));
 }
 
+static inline void ipv6_addr_prefix(struct in6_addr *pfx, 
+				    const struct in6_addr *addr,
+				    int plen)
+{
+	/* caller must guarantee 0 <= plen <= 128 */
+	int o = plen >> 3,
+	    b = plen & 0x7;
+
+	memcpy(pfx->s6_addr, addr, o);
+	if (b != 0) {
+		pfx->s6_addr[o] = addr->s6_addr[o] & (0xff00 >> b);
+		o++;
+	}
+	if (o < 16)
+		memset(pfx->s6_addr + o, 0, 16 - o);
+}
+
 #ifndef __HAVE_ARCH_ADDR_SET
 static inline void ipv6_addr_set(struct in6_addr *addr, 
 				     __u32 w1, __u32 w2,
@@ -291,6 +307,26 @@
 					       unsigned length,
 					       struct ipv6_txoptions *opt,
 					       int hlimit, int flags);
+extern int			ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr);
+
+extern int			ip6_append_data(struct sock *sk,
+						int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
+		    				void *from,
+						int length,
+						int transhdrlen,
+		      				int hlimit,
+						struct ipv6_txoptions *opt,
+						struct flowi *fl,
+						struct rt6_info *rt,
+						unsigned int flags);
+
+extern int			ip6_push_pending_frames(struct sock *sk);
+
+extern void			ip6_flush_pending_frames(struct sock *sk);
+
+extern int			ip6_dst_lookup(struct sock *sk,
+					       struct dst_entry **dst,
+					       struct flowi *fl);
 
 /*
  *	skb processing functions
Index: include/net/ndisc.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/ndisc.h,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/include/net/ndisc.h	28 Nov 2002 23:53:15 -0000	1.1.1.15
+++ b/include/net/ndisc.h	16 Apr 2004 13:16:19 -0000	1.1.1.15.2.1
@@ -56,20 +56,6 @@
 	__u8		nd_opt_len;
 } __attribute__((__packed__));
 
-struct ndisc_options {
-	struct nd_opt_hdr *nd_opt_array[7];
-	struct nd_opt_hdr *nd_opt_piend;
-};
-
-#define nd_opts_src_lladdr	nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
-#define nd_opts_tgt_lladdr	nd_opt_array[ND_OPT_TARGET_LL_ADDR]
-#define nd_opts_pi		nd_opt_array[ND_OPT_PREFIX_INFO]
-#define nd_opts_pi_end		nd_opt_piend
-#define nd_opts_rh		nd_opt_array[ND_OPT_REDIRECT_HDR]
-#define nd_opts_mtu		nd_opt_array[ND_OPT_MTU]
-
-extern struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, struct nd_opt_hdr *end);
-extern struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, struct ndisc_options *ndopts);
 
 extern int			ndisc_init(struct net_proto_family *ops);
 
Index: include/net/protocol.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/protocol.h,v
retrieving revision 1.1.1.16
retrieving revision 1.1.1.16.2.1
diff -u -r1.1.1.16 -r1.1.1.16.2.1
--- a/include/net/protocol.h	22 Nov 2001 19:47:11 -0000	1.1.1.16
+++ b/include/net/protocol.h	16 Apr 2004 13:16:19 -0000	1.1.1.16.2.1
@@ -30,7 +30,7 @@
 #include <linux/ipv6.h>
 #endif
 
-#define MAX_INET_PROTOS	32		/* Must be a power of 2		*/
+#define MAX_INET_PROTOS	256		/* Must be a power of 2		*/
 
 
 /* This is used to register protocols. */
@@ -38,29 +38,23 @@
 {
 	int			(*handler)(struct sk_buff *skb);
 	void			(*err_handler)(struct sk_buff *skb, u32 info);
-	struct inet_protocol	*next;
-	unsigned char		protocol;
-	unsigned char		copy:1;
-	void			*data;
-	const char		*name;
+	int			no_policy;
 };
 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 struct inet6_protocol 
 {
-	int	(*handler)(struct sk_buff *skb);
+	int	(*handler)(struct sk_buff **skb, unsigned int *nhoffp);
 
 	void	(*err_handler)(struct sk_buff *skb,
 			       struct inet6_skb_parm *opt,
 			       int type, int code, int offset,
 			       __u32 info);
-	struct inet6_protocol *next;
-	unsigned char	protocol;
-	unsigned char	copy:1;
-	void		*data;
-	const char	*name;
+	unsigned int	flags;	/* INET6_PROTO_xxx */
 };
 
+#define INET6_PROTO_NOPOLICY	0x1
+#define INET6_PROTO_FINAL	0x2
 #endif
 
 /* This is used to register socket interfaces for IP protocols.  */
@@ -93,14 +87,14 @@
 extern struct list_head inetsw6[SOCK_MAX];
 #endif
 
-extern void	inet_add_protocol(struct inet_protocol *prot);
-extern int	inet_del_protocol(struct inet_protocol *prot);
+extern int	inet_add_protocol(struct inet_protocol *prot, unsigned char num);
+extern int	inet_del_protocol(struct inet_protocol *prot, unsigned char num);
 extern void	inet_register_protosw(struct inet_protosw *p);
 extern void	inet_unregister_protosw(struct inet_protosw *p);
 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-extern void	inet6_add_protocol(struct inet6_protocol *prot);
-extern int	inet6_del_protocol(struct inet6_protocol *prot);
+extern int	inet6_add_protocol(struct inet6_protocol *prot, unsigned char num);
+extern int	inet6_del_protocol(struct inet6_protocol *prot, unsigned char num);
 extern void	inet6_register_protosw(struct inet_protosw *p);
 extern void	inet6_unregister_protosw(struct inet_protosw *p);
 #endif
Index: include/net/raw.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/raw.h,v
retrieving revision 1.1.1.14
retrieving revision 1.1.1.14.2.1
diff -u -r1.1.1.14 -r1.1.1.14.2.1
--- a/include/net/raw.h	12 Apr 2001 19:11:39 -0000	1.1.1.14
+++ b/include/net/raw.h	16 Apr 2004 13:16:19 -0000	1.1.1.14.2.1
@@ -37,6 +37,6 @@
 				    unsigned long raddr, unsigned long laddr,
 				    int dif);
 
-extern struct sock *raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
+extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
 
 #endif	/* _RAW_H */
Index: include/net/rawv6.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/rawv6.h,v
retrieving revision 1.1.1.14
retrieving revision 1.1.1.14.2.1
diff -u -r1.1.1.14 -r1.1.1.14.2.1
--- a/include/net/rawv6.h	12 Apr 2001 19:11:39 -0000	1.1.1.14
+++ b/include/net/rawv6.h	16 Apr 2004 13:16:19 -0000	1.1.1.14.2.1
@@ -7,9 +7,7 @@
 extern struct sock *raw_v6_htable[RAWV6_HTABLE_SIZE];
 extern rwlock_t raw_v6_lock;
 
-extern struct sock * ipv6_raw_deliver(struct sk_buff *skb,
-				      int nexthdr);
-
+extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr);
 
 extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
 				    struct in6_addr *loc_addr, struct in6_addr *rmt_addr);
Index: include/net/route.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/route.h,v
retrieving revision 1.1.1.21
retrieving revision 1.1.1.21.2.1
diff -u -r1.1.1.21 -r1.1.1.21.2.1
--- a/include/net/route.h	25 Aug 2003 11:44:44 -0000	1.1.1.21
+++ b/include/net/route.h	16 Apr 2004 13:16:19 -0000	1.1.1.21.2.1
@@ -27,6 +27,7 @@
 #include <linux/config.h>
 #include <net/dst.h>
 #include <net/inetpeer.h>
+#include <net/flow.h>
 #include <linux/in_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/route.h>
@@ -45,19 +46,6 @@
 
 #define RT_CONN_FLAGS(sk)   (RT_TOS(sk->protinfo.af_inet.tos) | sk->localroute)
 
-struct rt_key
-{
-	__u32			dst;
-	__u32			src;
-	int			iif;
-	int			oif;
-#ifdef CONFIG_IP_ROUTE_FWMARK
-	__u32			fwmark;
-#endif
-	__u8			tos;
-	__u8			scope;
-};
-
 struct inet_peer;
 struct rtable
 {
@@ -78,7 +66,7 @@
 	__u32			rt_gateway;
 
 	/* Cache lookup keys */
-	struct rt_key		key;
+	struct flowi		fl;
 
 	/* Miscellaneous cached information */
 	__u32			rt_spec_dst; /* RFC1122 specific destination */
@@ -126,10 +114,11 @@
 				       u32 src, u8 tos, struct net_device *dev);
 extern void		ip_rt_advice(struct rtable **rp, int advice);
 extern void		rt_cache_flush(int how);
-extern int		ip_route_output_key(struct rtable **, const struct rt_key *key);
+extern int		__ip_route_output_key(struct rtable **, const struct flowi *flp);
+extern int		ip_route_output_key(struct rtable **, struct flowi *flp);
+extern int		ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
 extern int		ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin);
 extern unsigned short	ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu);
-extern void		ip_rt_update_pmtu(struct dst_entry *dst, unsigned mtu);
 extern void		ip_rt_send_redirect(struct sk_buff *skb);
 
 extern unsigned		inet_addr_type(u32 addr);
@@ -138,16 +127,6 @@
 extern void		ip_rt_get_source(u8 *src, struct rtable *rt);
 extern int		ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb);
 
-/* Deprecated: use ip_route_output_key directly */
-static inline int ip_route_output(struct rtable **rp,
-				      u32 daddr, u32 saddr, u32 tos, int oif)
-{
-	struct rt_key key = { dst:daddr, src:saddr, oif:oif, tos:tos };
-
-	return ip_route_output_key(rp, &key);
-}
-
-
 static inline void ip_rt_put(struct rtable * rt)
 {
 	if (rt)
@@ -163,17 +142,47 @@
 	return ip_tos2prio[IPTOS_TOS(tos)>>1];
 }
 
-static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif)
-{
+static inline int ip_route_connect(struct rtable **rp, u32 dst,
+				   u32 src, u32 tos, int oif, u8 protocol,
+				   u16 sport, u16 dport, struct sock *sk)
+{
+	struct flowi fl = { .oif = oif,
+			    .nl_u = { .ip4_u = { .daddr = dst,
+						 .saddr = src,
+						 .tos   = tos } },
+			    .proto = protocol,
+			    .uli_u = { .ports =
+				       { .sport = sport,
+					 .dport = dport } } };
+
 	int err;
-	err = ip_route_output(rp, dst, src, tos, oif);
-	if (err || (dst && src))
-		return err;
-	dst = (*rp)->rt_dst;
-	src = (*rp)->rt_src;
-	ip_rt_put(*rp);
-	*rp = NULL;
-	return ip_route_output(rp, dst, src, tos, oif);
+	if (!dst || !src) {
+		err = __ip_route_output_key(rp, &fl);
+		if (err)
+			return err;
+		fl.fl4_dst = (*rp)->rt_dst;
+		fl.fl4_src = (*rp)->rt_src;
+		ip_rt_put(*rp);
+		*rp = NULL;
+	}
+	return ip_route_output_flow(rp, &fl, sk, 0);
+}
+
+static inline int ip_route_newports(struct rtable **rp, u16 sport, u16 dport,
+				    struct sock *sk)
+{
+	if (sport != (*rp)->fl.fl_ip_sport ||
+	    dport != (*rp)->fl.fl_ip_dport) {
+		struct flowi fl;
+
+		memcpy(&fl, &(*rp)->fl, sizeof(fl));
+		fl.fl_ip_sport = sport;
+		fl.fl_ip_dport = dport;
+		ip_rt_put(*rp);
+		*rp = NULL;
+		return ip_route_output_flow(rp, &fl, sk, 0);
+	}
+	return 0;
 }
 
 extern void rt_bind_peer(struct rtable *rt, int create);
Index: include/net/sock.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/sock.h,v
retrieving revision 1.1.1.27
retrieving revision 1.1.1.27.2.1
diff -u -r1.1.1.27 -r1.1.1.27.2.1
--- a/include/net/sock.h	14 Apr 2004 13:05:40 -0000	1.1.1.27
+++ b/include/net/sock.h	16 Apr 2004 13:16:19 -0000	1.1.1.27.2.1
@@ -45,6 +45,8 @@
 #include <net/if_inet6.h>	/* struct ipv6_mc_socklist */
 #endif
 
+#include <net/flow.h>
+
 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
 #include <linux/icmp.h>
 #endif
@@ -184,6 +186,12 @@
 
 	struct ipv6_txoptions	*opt;
 	struct sk_buff		*pktoptions;
+	struct {
+		struct ipv6_txoptions *opt;
+		struct rt6_info	*rt;
+		struct flowi fl;
+		int hop_limit;
+	} cork;
 };
 
 struct raw6_opt {
@@ -210,7 +218,7 @@
 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
 struct inet_opt
 {
-	int			ttl;			/* TTL setting */
+	int			uc_ttl;			/* Unicast TTL */
 	int			tos;			/* TOS */
 	unsigned	   	cmsg_flags;
 	struct ip_options	*opt;
@@ -224,7 +232,24 @@
 	int			mc_index;		/* Multicast device index */
 	__u32			mc_addr;
 	struct ip_mc_socklist	*mc_list;		/* Group array */
+	struct page		*sndmsg_page;	/* Cached page for sendmsg */
+	u32			sndmsg_off;	/* Cached offset for sendmsg */
+	/*
+	 * Following members are used to retain the infomation to build
+	 * an ip header on each ip fragmentation while the socket is corked.
+	 */
+	struct {
+		unsigned int		flags;
+		unsigned int		fragsize;
+		struct ip_options	*opt;
+		struct rtable		*rt;
+		int			length; /* Total length of all frames */
+		u32			addr;
+	} cork;
 };
+
+#define IPCORK_OPT	1	/* ip-options has been held in ipcork.opt */
+
 #endif
 
 #if defined(CONFIG_PPPOE) || defined (CONFIG_PPPOE_MODULE)
@@ -250,6 +275,14 @@
 #define pppoe_relay	proto.pppoe.relay
 #endif
 
+#if defined(CONFIG_NET_KEY) || defined(CONFIG_NET_KEY_MODULE)
+struct pfkey_opt {
+	int	registered;
+	int	promisc;
+};
+#define pfkey_sk(__sk) ((__sk)->protinfo.pf_key)
+#endif
+
 /* This defines a selective acknowledgement block. */
 struct tcp_sack_block {
 	__u32	start_seq;
@@ -307,6 +340,7 @@
 	__u16	mss_cache;	/* Cached effective mss, not including SACKS */
 	__u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 	__u16	ext_header_len;	/* Network protocol overhead (IP/IPv6 options) */
+	__u16	ext2_header_len;/* Options depending on route */
 	__u8	ca_state;	/* State of fast-retransmit machine 	*/
 	__u8	retransmits;	/* Number of unrecovered RTO timeouts.	*/
 
@@ -347,8 +381,6 @@
 
 	struct tcp_func		*af_specific;	/* Operations which are AF_INET{4,6} specific	*/
 	struct sk_buff		*send_head;	/* Front of stuff to transmit			*/
-	struct page		*sndmsg_page;	/* Cached page for sendmsg			*/
-	u32			sndmsg_off;	/* Cached offset for sendmsg			*/
 
  	__u32	rcv_wnd;	/* Current receiver window		*/
 	__u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
@@ -447,6 +479,20 @@
         } westwood;
 };
 
+struct udp_opt {
+	int		pending;	/* Any pending frames ? */
+	unsigned int	corkflag;	/* Cork is required */
+	__u16		encap_type;	/* Is this an Encapsulation socket? */
+	/*
+	 * Following members retains the infomation to create a UDP header
+	 * when the socket is uncorked.
+	 */
+	u32		saddr;		/* source address */
+	u32		daddr;		/* destination address */
+	__u16		sport;		/* source port */
+	__u16		dport;		/* destination port */
+	__u16		len;		/* total length of pending frames */
+};
  	
 /*
  * This structure really needs to be cleaned up.
@@ -542,6 +588,7 @@
 	wait_queue_head_t	*sleep;		/* Sock wait queue			*/
 	struct dst_entry	*dst_cache;	/* Destination cache			*/
 	rwlock_t		dst_lock;
+	struct xfrm_policy	*policy[2];
 	atomic_t		rmem_alloc;	/* Receive queue bytes committed	*/
 	struct sk_buff_head	receive_queue;	/* Incoming packets			*/
 	atomic_t		wmem_alloc;	/* Transmit queue bytes committed	*/
@@ -598,10 +645,12 @@
 	union {
 		struct ipv6_pinfo	af_inet6;
 	} net_pinfo;
+#define inet6_sk(sk)	(&(sk)->net_pinfo.af_inet6)
 #endif
 
 	union {
 		struct tcp_opt		af_tcp;
+		struct udp_opt		af_udp;
 #if defined(CONFIG_IP_SCTP) || defined (CONFIG_IP_SCTP_MODULE)
 		struct sctp_opt		af_sctp;
 #endif
@@ -616,6 +665,10 @@
 #endif /* CONFIG_SPX */
 
 	} tp_pinfo;
+#define tcp_sk(sk)		(&(sk)->tp_pinfo.af_tcp)
+#define udp_sk(sk) 		(&(sk)->tp_pinfo.af_udp)
+#define raw_sk(sk)		(&(sk)->tp_pinfo.tp_raw4)
+#define raw6_sk(sk)		(&(sk)->tp_pinfo.tp_raw)
 
 	int			err, err_soft;	/* Soft holds errors that don't
 						   cause failure but are the cause
@@ -686,8 +739,11 @@
 #if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
                struct wanpipe_opt      *af_wanpipe;
 #endif
+#if defined(CONFIG_NET_KEY) || defined(CONFIG_NET_KEY_MODULE)
+		struct pfkey_opt	*pf_key;
+#endif
 	} protinfo;  		
-
+#define inet_sk(sk)	(&(sk)->protinfo.af_inet)
 
 	/* This part is used for the timeout functions. */
 	struct timer_list	timer;		/* This is the sock cleanup timer. */
@@ -751,6 +807,8 @@
 	int			(*recvmsg)(struct sock *sk, struct msghdr *msg,
 					int len, int noblock, int flags, 
 					int *addr_len);
+	int			(*sendpage)(struct sock *sk, struct page *page,
+					int offset, size_t size, int flags);
 	int			(*bind)(struct sock *sk, 
 					struct sockaddr *uaddr, int addr_len);
 
Index: include/net/tcp.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/tcp.h,v
retrieving revision 1.1.1.24
retrieving revision 1.1.1.24.2.1
diff -u -r1.1.1.24 -r1.1.1.24.2.1
--- a/include/net/tcp.h	14 Apr 2004 13:05:40 -0000	1.1.1.24
+++ b/include/net/tcp.h	16 Apr 2004 13:16:19 -0000	1.1.1.24.2.1
@@ -546,13 +546,6 @@
 /*
  *	Pointers to address related TCP functions
  *	(i.e. things that depend on the address family)
- *
- * 	BUGGG_FUTURE: all the idea behind this struct is wrong.
- *	It mixes socket frontend with transport function.
- *	With port sharing between IPv6/v4 it gives the only advantage,
- *	only poor IPv6 needs to permanently recheck, that it
- *	is still IPv6 8)8) It must be cleaned up as soon as possible.
- *						--ANK (980802)
  */
 
 struct tcp_func {
@@ -909,9 +902,12 @@
 	struct dst_entry *dst = __sk_dst_get(sk);
 	int mss_now = tp->mss_cache; 
 
-	if (dst && dst->pmtu != tp->pmtu_cookie)
-		mss_now = tcp_sync_mss(sk, dst->pmtu);
-
+	if (dst) {
+		u32 mtu = dst_pmtu(dst);
+		if (mtu != tp->pmtu_cookie ||
+		    tp->ext2_header_len != dst->header_len)
+			mss_now = tcp_sync_mss(sk, mtu);
+	}
 	if (tp->eff_sacks)
 		mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
 			    (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
@@ -1152,7 +1148,7 @@
 	}
 }
 
-extern __u32 tcp_init_cwnd(struct tcp_opt *tp);
+extern __u32 tcp_init_cwnd(struct tcp_opt *tp, struct dst_entry *dst);
 
 /* Slow start with delack produces 3 packets of burst, so that
  * it is safe "de facto".
Index: include/net/transp_v6.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/transp_v6.h,v
retrieving revision 1.1.1.14
retrieving revision 1.1.1.14.2.1
diff -u -r1.1.1.14 -r1.1.1.14.2.1
--- a/include/net/transp_v6.h	13 Jun 2003 14:51:39 -0000	1.1.1.14
+++ b/include/net/transp_v6.h	16 Apr 2004 13:16:19 -0000	1.1.1.14.2.1
@@ -17,6 +17,13 @@
 
 extern void				ipv6_frag_init(void);
 
+/* extention headers */
+extern void				ipv6_rthdr_init(void);
+extern void				ipv6_frag_init(void);
+extern void				ipv6_nodata_init(void);
+extern void				ipv6_destopt_init(void);
+
+/* transport protocols */
 extern void				rawv6_init(void);
 extern void				udpv6_init(void);
 extern void				tcpv6_init(void);
Index: include/net/xfrm.h
===================================================================
RCS file: include/net/xfrm.h
diff -N include/net/xfrm.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/include/net/xfrm.h	16 Apr 2004 13:16:19 -0000	1.9.8.1
@@ -0,0 +1,893 @@
+#ifndef _NET_XFRM_H
+#define _NET_XFRM_H
+
+#include <linux/xfrm.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/in6.h>
+
+#include <net/sock.h>
+#include <net/dst.h>
+#include <net/route.h>
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+
+#define XFRM_ALIGN8(len)	(((len) + 7) & ~7)
+
+extern struct semaphore xfrm_cfg_sem;
+
+/* Organization of SPD aka "XFRM rules"
+   ------------------------------------
+
+   Basic objects:
+   - policy rule, struct xfrm_policy (=SPD entry)
+   - bundle of transformations, struct dst_entry == struct xfrm_dst (=SA bundle)
+   - instance of a transformer, struct xfrm_state (=SA)
+   - template to clone xfrm_state, struct xfrm_tmpl
+
+   SPD is plain linear list of xfrm_policy rules, ordered by priority.
+   (To be compatible with existing pfkeyv2 implementations,
+   many rules with priority of 0x7fffffff are allowed to exist and
+   such rules are ordered in an unpredictable way, thanks to bsd folks.)
+
+   Lookup is plain linear search until the first match with selector.
+
+   If "action" is "block", then we prohibit the flow, otherwise:
+   if "xfrms_nr" is zero, the flow passes untransformed. Otherwise,
+   policy entry has list of up to XFRM_MAX_DEPTH transformations,
+   described by templates xfrm_tmpl. Each template is resolved
+   to a complete xfrm_state (see below) and we pack bundle of transformations
+   to a dst_entry returned to requestor.
+
+   dst -. xfrm  .-> xfrm_state #1
+    |---. child .-> dst -. xfrm .-> xfrm_state #2
+                     |---. child .-> dst -. xfrm .-> xfrm_state #3
+                                      |---. child .-> NULL
+
+   Bundles are cached at xrfm_policy struct (field ->bundles).
+
+
+   Resolution of xrfm_tmpl
+   -----------------------
+   Template contains:
+   1. ->mode		Mode: transport or tunnel
+   2. ->id.proto	Protocol: AH/ESP/IPCOMP
+   3. ->id.daddr	Remote tunnel endpoint, ignored for transport mode.
+      Q: allow to resolve security gateway?
+   4. ->id.spi          If not zero, static SPI.
+   5. ->saddr		Local tunnel endpoint, ignored for transport mode.
+   6. ->algos		List of allowed algos. Plain bitmask now.
+      Q: ealgos, aalgos, calgos. What a mess...
+   7. ->share		Sharing mode.
+      Q: how to implement private sharing mode? To add struct sock* to
+      flow id?
+
+   Having this template we search through SAD searching for entries
+   with appropriate mode/proto/algo, permitted by selector.
+   If no appropriate entry found, it is requested from key manager.
+
+   PROBLEMS:
+   Q: How to find all the bundles referring to a physical path for
+      PMTU discovery? Seems, dst should contain list of all parents...
+      and enter to infinite locking hierarchy disaster.
+      No! It is easier, we will not search for them, let them find us.
+      We add genid to each dst plus pointer to genid of raw IP route,
+      pmtu disc will update pmtu on raw IP route and increase its genid.
+      dst_check() will see this for top level and trigger resyncing
+      metrics. Plus, it will be made via sk->dst_cache. Solved.
+ */
+
+/* Full description of state of transformer. */
+struct xfrm_state
+{
+	/* Note: bydst is re-used during gc */
+	struct list_head	bydst;
+	struct list_head	byspi;
+
+	atomic_t		refcnt;
+	spinlock_t		lock;
+
+	struct xfrm_id		id;
+	struct xfrm_selector	sel;
+
+	/* Key manger bits */
+	struct {
+		u8		state;
+		u8		dying;
+		u32		seq;
+	} km;
+
+	/* Parameters of this state. */
+	struct {
+		u32		reqid;
+		u8		mode;
+		u8		replay_window;
+		u8		aalgo, ealgo, calgo;
+		u8		flags;
+		u16		family;
+		xfrm_address_t	saddr;
+		int		header_len;
+		int		trailer_len;
+	} props;
+
+	struct xfrm_lifetime_cfg lft;
+
+	/* Data for transformer */
+	struct xfrm_algo	*aalg;
+	struct xfrm_algo	*ealg;
+	struct xfrm_algo	*calg;
+
+	/* Data for encapsulator */
+	struct xfrm_encap_tmpl	*encap;
+
+	/* IPComp needs an IPIP tunnel for handling uncompressed packets */
+	struct xfrm_state	*tunnel;
+
+	/* If a tunnel, number of users + 1 */
+	atomic_t		tunnel_users;
+
+	/* State for replay detection */
+	struct xfrm_replay_state replay;
+
+	/* Statistics */
+	struct xfrm_stats	stats;
+
+	struct xfrm_lifetime_cur curlft;
+	struct timer_list	timer;
+
+	/* Reference to data common to all the instances of this
+	 * transformer. */
+	struct xfrm_type	*type;
+
+	/* Private data of this transformer, format is opaque,
+	 * interpreted by xfrm_type methods. */
+	void			*data;
+};
+
+enum {
+	XFRM_STATE_VOID,
+	XFRM_STATE_ACQ,
+	XFRM_STATE_VALID,
+	XFRM_STATE_ERROR,
+	XFRM_STATE_EXPIRED,
+	XFRM_STATE_DEAD
+};
+
+struct xfrm_type;
+struct xfrm_dst;
+struct xfrm_policy_afinfo {
+	unsigned short		family;
+	rwlock_t		lock;
+	struct xfrm_type_map	*type_map;
+	struct dst_ops		*dst_ops;
+	void			(*garbage_collect)(void);
+	int			(*dst_lookup)(struct xfrm_dst **dst, struct flowi *fl);
+	struct dst_entry	*(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
+	int			(*bundle_create)(struct xfrm_policy *policy, 
+						 struct xfrm_state **xfrm, 
+						 int nx,
+						 struct flowi *fl, 
+						 struct dst_entry **dst_p);
+	void			(*decode_session)(struct sk_buff *skb,
+						  struct flowi *fl);
+};
+
+extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo);
+extern int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo);
+extern struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
+extern void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
+
+#define XFRM_ACQ_EXPIRES	30
+
+struct xfrm_tmpl;
+struct xfrm_state_afinfo {
+	unsigned short		family;
+	rwlock_t		lock;
+	struct list_head	*state_bydst;
+	struct list_head	*state_byspi;
+	void			(*init_tempsel)(struct xfrm_state *x, struct flowi *fl,
+						struct xfrm_tmpl *tmpl,
+						xfrm_address_t *daddr, xfrm_address_t *saddr);
+	struct xfrm_state	*(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto);
+	struct xfrm_state	*(*find_acq)(u8 mode, u32 reqid, u8 proto, 
+					     xfrm_address_t *daddr, xfrm_address_t *saddr, 
+					     int create);
+};
+
+extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo);
+extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo);
+extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
+extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
+
+extern void xfrm_state_delete_tunnel(struct xfrm_state *x);
+
+struct xfrm_decap_state;
+struct xfrm_type
+{
+	char			*description;
+	struct module		*owner;
+	__u8			proto;
+
+	int			(*init_state)(struct xfrm_state *x, void *args);
+	void			(*destructor)(struct xfrm_state *);
+	int			(*input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb);
+	int			(*post_input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb);
+	int			(*output)(struct sk_buff *skb);
+	/* Estimate maximal size of result of transformation of a dgram */
+	u32			(*get_max_size)(struct xfrm_state *, int size);
+};
+
+struct xfrm_type_map {
+	rwlock_t		lock;
+	struct xfrm_type	*map[256];
+};
+
+extern int xfrm_register_type(struct xfrm_type *type, unsigned short family);
+extern int xfrm_unregister_type(struct xfrm_type *type, unsigned short family);
+extern struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family);
+extern void xfrm_put_type(struct xfrm_type *type);
+
+struct xfrm_tmpl
+{
+/* id in template is interpreted as:
+ * daddr - destination of tunnel, may be zero for transport mode.
+ * spi   - zero to acquire spi. Not zero if spi is static, then
+ *	   daddr must be fixed too.
+ * proto - AH/ESP/IPCOMP
+ */
+	struct xfrm_id		id;
+
+/* Source address of tunnel. Ignored, if it is not a tunnel. */
+	xfrm_address_t		saddr;
+
+	__u32			reqid;
+
+/* Mode: transport/tunnel */
+	__u8			mode;
+
+/* Sharing mode: unique, this session only, this user only etc. */
+	__u8			share;
+
+/* May skip this transfomration if no SA is found */
+	__u8			optional;
+
+/* Bit mask of algos allowed for acquisition */
+	__u32			aalgos;
+	__u32			ealgos;
+	__u32			calgos;
+};
+
+#define XFRM_MAX_DEPTH		4
+
+struct xfrm_policy
+{
+	struct xfrm_policy	*next;
+	struct list_head	list;
+
+	/* This lock only affects elements except for entry. */
+	rwlock_t		lock;
+	atomic_t		refcnt;
+	struct timer_list	timer;
+
+	u32			priority;
+	u32			index;
+	struct xfrm_selector	selector;
+	struct xfrm_lifetime_cfg lft;
+	struct xfrm_lifetime_cur curlft;
+	struct dst_entry       *bundles;
+	__u16			family;
+	__u8			action;
+	__u8			flags;
+	__u8			dead;
+	__u8			xfrm_nr;
+	struct xfrm_tmpl       	xfrm_vec[XFRM_MAX_DEPTH];
+};
+
+#define XFRM_KM_TIMEOUT		30
+
+struct xfrm_mgr
+{
+	struct list_head	list;
+	char			*id;
+	int			(*notify)(struct xfrm_state *x, int event);
+	int			(*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir);
+	struct xfrm_policy	*(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir);
+	int			(*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport);
+	int			(*notify_policy)(struct xfrm_policy *x, int dir, int event);
+};
+
+extern int xfrm_register_km(struct xfrm_mgr *km);
+extern int xfrm_unregister_km(struct xfrm_mgr *km);
+
+
+#define XFRM_FLOWCACHE_HASH_SIZE	1024
+
+static inline u32 __flow_hash4(struct flowi *fl)
+{
+	u32 hash = fl->fl4_src ^ fl->fl_ip_sport;
+
+	hash = ((hash & 0xF0F0F0F0) >> 4) | ((hash & 0x0F0F0F0F) << 4);
+
+	hash ^= fl->fl4_dst ^ fl->fl_ip_dport;
+	hash ^= (hash >> 10);
+	hash ^= (hash >> 20);
+	return hash & (XFRM_FLOWCACHE_HASH_SIZE-1);
+}
+
+static inline u32 __flow_hash6(struct flowi *fl)
+{
+	u32 hash = fl->fl6_src.s6_addr32[2] ^
+		   fl->fl6_src.s6_addr32[3] ^ 
+		   fl->fl_ip_sport;
+
+	hash = ((hash & 0xF0F0F0F0) >> 4) | ((hash & 0x0F0F0F0F) << 4);
+
+	hash ^= fl->fl6_dst.s6_addr32[2] ^
+		fl->fl6_dst.s6_addr32[3] ^ 
+		fl->fl_ip_dport;
+	hash ^= (hash >> 10);
+	hash ^= (hash >> 20);
+	return hash & (XFRM_FLOWCACHE_HASH_SIZE-1);
+}
+
+static inline u32 flow_hash(struct flowi *fl, unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __flow_hash4(fl);
+	case AF_INET6:
+		return __flow_hash6(fl);
+	}
+	return 0;	/*XXX*/
+}
+
+extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
+
+static inline void xfrm_pol_hold(struct xfrm_policy *policy)
+{
+	if (likely(policy != NULL))
+		atomic_inc(&policy->refcnt);
+}
+
+extern void __xfrm_policy_destroy(struct xfrm_policy *policy);
+
+static inline void xfrm_pol_put(struct xfrm_policy *policy)
+{
+	if (atomic_dec_and_test(&policy->refcnt))
+		__xfrm_policy_destroy(policy);
+}
+
+#define XFRM_DST_HSIZE		1024
+
+static __inline__
+unsigned __xfrm4_dst_hash(xfrm_address_t *addr)
+{
+	unsigned h;
+	h = ntohl(addr->a4);
+	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
+	return h;
+}
+
+static __inline__
+unsigned __xfrm6_dst_hash(xfrm_address_t *addr)
+{
+	unsigned h;
+	h = ntohl(addr->a6[2]^addr->a6[3]);
+	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
+	return h;
+}
+
+static __inline__
+unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_dst_hash(addr);
+	case AF_INET6:
+		return __xfrm6_dst_hash(addr);
+	}
+	return 0;
+}
+
+static __inline__
+unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
+{
+	unsigned h;
+	h = ntohl(addr->a4^spi^proto);
+	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
+	return h;
+}
+
+static __inline__
+unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
+{
+	unsigned h;
+	h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
+	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
+	return h;
+}
+
+static __inline__
+unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_spi_hash(addr, spi, proto);
+	case AF_INET6:
+		return __xfrm6_spi_hash(addr, spi, proto);
+	}
+	return 0;	/*XXX*/
+}
+
+extern void __xfrm_state_destroy(struct xfrm_state *);
+
+static inline void xfrm_state_put(struct xfrm_state *x)
+{
+	if (atomic_dec_and_test(&x->refcnt))
+		__xfrm_state_destroy(x);
+}
+
+static inline void xfrm_state_hold(struct xfrm_state *x)
+{
+	atomic_inc(&x->refcnt);
+}
+
+static __inline__ int addr_match(void *token1, void *token2, int prefixlen)
+{
+	__u32 *a1 = token1;
+	__u32 *a2 = token2;
+	int pdw;
+	int pbi;
+
+	pdw = prefixlen >> 5;	  /* num of whole __u32 in prefix */
+	pbi = prefixlen &  0x1f;  /* num of bits in incomplete u32 in prefix */
+
+	if (pdw)
+		if (memcmp(a1, a2, pdw << 2))
+			return 0;
+
+	if (pbi) {
+		__u32 mask;
+
+		mask = htonl((0xffffffff) << (32 - pbi));
+
+		if ((a1[pdw] ^ a2[pdw]) & mask)
+			return 0;
+	}
+
+	return 1;
+}
+
+static inline int
+__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
+{
+	return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
+		addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
+		!((fl->fl_ip_dport^sel->dport)&sel->dport_mask) &&
+		!((fl->fl_ip_sport^sel->sport)&sel->sport_mask) &&
+		(fl->proto == sel->proto || !sel->proto) &&
+		(fl->oif == sel->ifindex || !sel->ifindex);
+}
+
+static inline int
+__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
+{
+	return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
+		addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
+		!((fl->fl_ip_dport^sel->dport)&sel->dport_mask) &&
+		!((fl->fl_ip_sport^sel->sport)&sel->sport_mask) &&
+		(fl->proto == sel->proto || !sel->proto) &&
+		(fl->oif == sel->ifindex || !sel->ifindex);
+}
+
+static inline int
+xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
+		    unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_selector_match(sel, fl);
+	case AF_INET6:
+		return __xfrm6_selector_match(sel, fl);
+	}
+	return 0;
+}
+
+/* placeholder until xfrm6_tunnel.c is written */
+static inline int xfrm6_tunnel_check_size(struct sk_buff *skb)
+{ return 0; }
+
+/* A struct encoding bundle of transformations to apply to some set of flow.
+ *
+ * dst->child points to the next element of bundle.
+ * dst->xfrm  points to an instanse of transformer.
+ *
+ * Due to unfortunate limitations of current routing cache, which we
+ * have no time to fix, it mirrors struct rtable and bound to the same
+ * routing key, including saddr,daddr. However, we can have many of
+ * bundles differing by session id. All the bundles grow from a parent
+ * policy rule.
+ */
+struct xfrm_dst
+{
+	union {
+		struct xfrm_dst		*next;
+		struct dst_entry	dst;
+		struct rtable		rt;
+		struct rt6_info		rt6;
+	} u;
+};
+
+/* Decapsulation state, used by the input to store data during
+ * decapsulation procedure, to be used later (during the policy
+ * check
+ */
+struct xfrm_decap_state {
+	char	decap_data[20];
+	__u16	decap_type;
+};   
+
+struct sec_decap_state {
+	struct xfrm_state	*xvec;
+	struct xfrm_decap_state decap;
+};
+
+struct sec_path
+{
+	atomic_t		refcnt;
+	int			len;
+	struct sec_decap_state	x[XFRM_MAX_DEPTH];
+};
+
+static inline struct sec_path *
+secpath_get(struct sec_path *sp)
+{
+	if (sp)
+		atomic_inc(&sp->refcnt);
+	return sp;
+}
+
+extern void __secpath_destroy(struct sec_path *sp);
+
+static inline void
+secpath_put(struct sec_path *sp)
+{
+	if (sp && atomic_dec_and_test(&sp->refcnt))
+		__secpath_destroy(sp);
+}
+
+extern struct sec_path *secpath_dup(struct sec_path *src);
+
+static inline void
+secpath_reset(struct sk_buff *skb)
+{
+#ifdef CONFIG_XFRM
+	secpath_put(skb->sp);
+	skb->sp = NULL;
+#endif
+}
+
+static inline int
+__xfrm4_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x)
+{
+	return	(tmpl->saddr.a4 &&
+		 tmpl->saddr.a4 != x->props.saddr.a4);
+}
+
+static inline int
+__xfrm6_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x)
+{
+	return	(!ipv6_addr_any((struct in6_addr*)&tmpl->saddr) &&
+		 ipv6_addr_cmp((struct in6_addr *)&tmpl->saddr, (struct in6_addr*)&x->props.saddr));
+}
+
+static inline int
+xfrm_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x, unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_state_addr_cmp(tmpl, x);
+	case AF_INET6:
+		return __xfrm6_state_addr_cmp(tmpl, x);
+	}
+	return !0;
+}
+
+#ifdef CONFIG_XFRM
+
+extern int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, unsigned short family);
+
+static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family)
+{
+	if (sk && sk->policy[XFRM_POLICY_IN])
+		return __xfrm_policy_check(sk, dir, skb, family);
+		
+	return	!xfrm_policy_list[dir] ||
+		(skb->dst->flags & DST_NOPOLICY) ||
+		__xfrm_policy_check(sk, dir, skb, family);
+}
+
+static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
+{
+	return xfrm_policy_check(sk, dir, skb, AF_INET);
+}
+
+static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
+{
+	return xfrm_policy_check(sk, dir, skb, AF_INET6);
+}
+
+
+extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
+
+static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
+{
+	return	!xfrm_policy_list[XFRM_POLICY_OUT] ||
+		(skb->dst->flags & DST_NOXFRM) ||
+		__xfrm_route_forward(skb, family);
+}
+
+static inline int xfrm4_route_forward(struct sk_buff *skb)
+{
+	return xfrm_route_forward(skb, AF_INET);
+}
+
+static inline int xfrm6_route_forward(struct sk_buff *skb)
+{
+	return xfrm_route_forward(skb, AF_INET6);
+}
+
+extern int __xfrm_sk_clone_policy(struct sock *sk);
+
+static inline int xfrm_sk_clone_policy(struct sock *sk)
+{
+	if (unlikely(sk->policy[0] || sk->policy[1]))
+		return __xfrm_sk_clone_policy(sk);
+	return 0;
+}
+
+extern void xfrm_policy_delete(struct xfrm_policy *pol, int dir);
+
+static inline void xfrm_sk_free_policy(struct sock *sk)
+{
+	if (unlikely(sk->policy[0] != NULL)) {
+		xfrm_policy_delete(sk->policy[0], XFRM_POLICY_MAX);
+		sk->policy[0] = NULL;
+	}
+	if (unlikely(sk->policy[1] != NULL)) {
+		xfrm_policy_delete(sk->policy[1], XFRM_POLICY_MAX+1);
+		sk->policy[1] = NULL;
+	}
+}
+
+#else
+
+static inline void xfrm_sk_free_policy(struct sock *sk) {}
+static inline int xfrm_sk_clone_policy(struct sock *sk) { return 0; }
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }  
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } 
+static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
+{ 
+	return 1; 
+} 
+static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
+{
+	return 1;
+}
+static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family)
+{
+	return 1;
+}
+#endif
+
+static __inline__
+xfrm_address_t *xfrm_flowi_daddr(struct flowi *fl, unsigned short family)
+{
+	switch (family){
+	case AF_INET:
+		return (xfrm_address_t *)&fl->fl4_dst;
+	case AF_INET6:
+		return (xfrm_address_t *)&fl->fl6_dst;
+	}
+	return NULL;
+}
+
+static __inline__
+xfrm_address_t *xfrm_flowi_saddr(struct flowi *fl, unsigned short family)
+{
+	switch (family){
+	case AF_INET:
+		return (xfrm_address_t *)&fl->fl4_src;
+	case AF_INET6:
+		return (xfrm_address_t *)&fl->fl6_src;
+	}
+	return NULL;
+}
+
+static __inline__ int
+__xfrm4_state_addr_check(struct xfrm_state *x,
+			 xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	if (daddr->a4 == x->id.daddr.a4 &&
+	    (saddr->a4 == x->props.saddr.a4 || !saddr->a4 || !x->props.saddr.a4))
+		return 1;
+	return 0;
+}
+
+static __inline__ int
+__xfrm6_state_addr_check(struct xfrm_state *x,
+			 xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	if (!ipv6_addr_cmp((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
+	    (!ipv6_addr_cmp((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr)|| 
+	     ipv6_addr_any((struct in6_addr *)saddr) || 
+	     ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
+		return 1;
+	return 0;
+}
+
+static __inline__ int
+xfrm_state_addr_check(struct xfrm_state *x,
+		      xfrm_address_t *daddr, xfrm_address_t *saddr,
+		      unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_state_addr_check(x, daddr, saddr);
+	case AF_INET6:
+		return __xfrm6_state_addr_check(x, daddr, saddr);
+	}
+	return 0;
+}
+
+static inline int xfrm_state_kern(struct xfrm_state *x)
+{
+	return atomic_read(&x->tunnel_users);
+}
+
+/*
+ * xfrm algorithm information
+ */
+struct xfrm_algo_auth_info {
+	u16 icv_truncbits;
+	u16 icv_fullbits;
+};
+
+struct xfrm_algo_encr_info {
+	u16 blockbits;
+	u16 defkeybits;
+};
+
+struct xfrm_algo_comp_info {
+	u16 threshold;
+};
+
+struct xfrm_algo_desc {
+	char *name;
+	u8 available:1;
+	union {
+		struct xfrm_algo_auth_info auth;
+		struct xfrm_algo_encr_info encr;
+		struct xfrm_algo_comp_info comp;
+	} uinfo;
+	struct sadb_alg desc;
+};
+
+/* XFRM tunnel handlers.  */
+struct xfrm_tunnel {
+	int (*handler)(struct sk_buff *skb);
+	void (*err_handler)(struct sk_buff *skb, void *info);
+};
+
+extern void xfrm_init(void);
+extern void xfrm4_init(void);
+extern void xfrm4_fini(void);
+extern void xfrm6_init(void);
+extern void xfrm6_fini(void);
+extern void xfrm_state_init(void);
+extern void xfrm4_state_init(void);
+extern void xfrm4_state_fini(void);
+extern void xfrm6_state_init(void);
+extern void xfrm6_state_fini(void);
+
+extern int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), void *);
+extern struct xfrm_state *xfrm_state_alloc(void);
+extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
+					  struct flowi *fl, struct xfrm_tmpl *tmpl,
+					  struct xfrm_policy *pol, int *err,
+					  unsigned short family);
+extern int xfrm_state_check_expire(struct xfrm_state *x);
+extern void xfrm_state_insert(struct xfrm_state *x);
+extern int xfrm_state_add(struct xfrm_state *x);
+extern int xfrm_state_update(struct xfrm_state *x);
+extern int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb);
+extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family);
+extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
+extern void xfrm_state_delete(struct xfrm_state *x);
+extern void xfrm_state_flush(u8 proto);
+extern int xfrm_replay_check(struct xfrm_state *x, u32 seq);
+extern void xfrm_replay_advance(struct xfrm_state *x, u32 seq);
+extern int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl);
+extern int xfrm_check_output(struct xfrm_state *x, struct sk_buff *skb, unsigned short family);
+extern int xfrm4_rcv(struct sk_buff *skb);
+extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler);
+extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler);
+extern int xfrm4_tunnel_check_size(struct sk_buff *skb);
+extern int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
+
+#ifdef CONFIG_XFRM
+extern int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type);
+extern int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen);
+extern int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family);
+#else
+static inline int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen)
+{
+ 	return -ENOPROTOOPT;
+} 
+
+static inline int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
+{
+ 	/* should not happen */
+ 	kfree_skb(skb);
+	return 0;
+}
+static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family)
+{
+	return -EINVAL;
+} 
+#endif
+
+void xfrm_policy_init(void);
+void xfrm4_policy_init(void);
+void xfrm6_policy_init(void);
+struct xfrm_policy *xfrm_policy_alloc(int gfp);
+extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *);
+int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
+struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
+				      int delete);
+struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete);
+void xfrm_policy_flush(void);
+u32 xfrm_get_acqseq(void);
+void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
+struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
+				  xfrm_address_t *daddr, xfrm_address_t *saddr, 
+				  int create, unsigned short family);
+extern void xfrm_policy_flush(void);
+extern void xfrm_policy_kill(struct xfrm_policy *);
+extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
+extern struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl);
+extern int xfrm_flush_bundles(void);
+
+extern wait_queue_head_t km_waitq;
+extern void km_state_expired(struct xfrm_state *x, int hard);
+extern int km_query(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *pol);
+extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport);
+extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard);
+
+extern void xfrm_input_init(void);
+extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq);
+
+extern void xfrm_probe_algs(void);
+extern int xfrm_count_auth_supported(void);
+extern int xfrm_count_enc_supported(void);
+extern struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx);
+extern struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx);
+extern struct xfrm_algo_desc *xfrm_calg_get_byidx(unsigned int idx);
+extern struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id);
+extern struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id);
+extern struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id);
+extern struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name);
+extern struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name);
+extern struct xfrm_algo_desc *xfrm_calg_get_byname(char *name);
+
+struct crypto_tfm;
+typedef void (icv_update_fn_t)(struct crypto_tfm *, struct scatterlist *, unsigned int);
+
+extern void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
+			 int offset, int len, icv_update_fn_t icv_update);
+
+#endif	/* _NET_XFRM_H */
Index: include/net/sctp/compat.h
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/include/net/sctp/compat.h,v
retrieving revision 1.1.1.7
retrieving revision 1.1.1.7.2.1
diff -u -r1.1.1.7 -r1.1.1.7.2.1
--- a/include/net/sctp/compat.h	14 Apr 2004 13:05:40 -0000	1.1.1.7
+++ b/include/net/sctp/compat.h	16 Apr 2004 13:16:19 -0000	1.1.1.7.2.1
@@ -55,14 +55,10 @@
 	extern type name[]
 #define SNMP_DEC_STATS(mib, field) ((mib)[2*smp_processor_id()+!in_softirq()].field--)
 
-#define inet_sk(__sk) (&(((struct sock *)__sk)->protinfo.af_inet))
-#define inet6_sk(__sk) (&(((struct sock *)__sk)->net_pinfo.af_inet6))
-
 #define virt_addr_valid(x)	VALID_PAGE(virt_to_page((x)))
 #define sock_owned_by_user(sk)  ((sk)->lock.users)
 #define sk_set_owner(x, y)
 #define __unsafe(x) MOD_INC_USE_COUNT
-#define dst_pmtu(x) ((x)->pmtu)
 
 #define sk_family family
 #define sk_state state
Index: net/Config.in
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/Config.in,v
retrieving revision 1.1.1.22
retrieving revision 1.1.1.22.2.1
diff -u -r1.1.1.22 -r1.1.1.22.2.1
--- a/net/Config.in	28 Nov 2003 18:26:21 -0000	1.1.1.22
+++ b/net/Config.in	16 Apr 2004 13:16:19 -0000	1.1.1.22.2.1
@@ -16,6 +16,7 @@
 fi
 bool 'Socket Filtering'  CONFIG_FILTER
 tristate 'Unix domain sockets' CONFIG_UNIX
+tristate 'PF_KEY sockets' CONFIG_NET_KEY
 bool 'TCP/IP networking' CONFIG_INET
 if [ "$CONFIG_INET" = "y" ]; then
    source net/ipv4/Config.in
@@ -26,6 +27,29 @@
 	 source net/ipv6/Config.in
       fi
    fi
+   if [ "$CONFIG_NET_KEY" != "n" -o \
+	"$CONFIG_NET_IPIP" != "n" -o \
+	"$CONFIG_NET_IPGRE" != "n" -o \
+	"$CONFIG_INET_AH" != "n" -o \
+	"$CONFIG_INET_ESP" != "n" -o \
+	"$CONFIG_INET_IPCOMP" != "n" ]; then
+      define_bool CONFIG_XFRM y
+   else
+      if [ "$CONFIG_IPV6" != "n" ]; then
+	 if [ "$CONFIG_INET6_AH" != "n" -o \
+	      "$CONFIG_INET6_ESP" != "n" -o \
+	      "$CONFIG_INET6_IPCOMP" != "n" ]; then
+	    define_bool CONFIG_XFRM y
+	 else
+	    bool '  XFRM support' CONFIG_XFRM
+	 fi
+      else
+	 bool '  XFRM support' CONFIG_XFRM
+      fi
+   fi
+   if [ "$CONFIG_XFRM" = "y" ]; then
+      source net/xfrm/Config.in
+   fi
    if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
       source net/khttpd/Config.in
    fi
Index: net/Makefile
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/Makefile,v
retrieving revision 1.1.1.22
retrieving revision 1.1.1.22.2.1
diff -u -r1.1.1.22 -r1.1.1.22.2.1
--- a/net/Makefile	14 Apr 2004 13:05:41 -0000	1.1.1.22
+++ b/net/Makefile	16 Apr 2004 13:16:19 -0000	1.1.1.22.2.1
@@ -7,28 +7,23 @@
 
 O_TARGET :=	network.o
 
-mod-subdirs :=	ipv4/netfilter ipv6/netfilter ipx irda bluetooth atm netlink sched core sctp
+mod-subdirs :=	ipv4/netfilter ipv6 ipx irda bluetooth atm netlink sched core sctp xfrm
 export-objs :=	netsyms.o
 
 subdir-y :=	core ethernet
-subdir-m :=	ipv4 # hum?
+subdir-m :=	ipv4 xfrm # hum?
 
 
 subdir-$(CONFIG_NET)		+= 802 sched netlink
 subdir-$(CONFIG_IPV6)		+= ipv6
-subdir-$(CONFIG_INET)		+= ipv4
+subdir-$(CONFIG_INET)		+= ipv4 xfrm
 subdir-$(CONFIG_NETFILTER)	+= ipv4/netfilter
 subdir-$(CONFIG_UNIX)		+= unix
 subdir-$(CONFIG_IP_SCTP)	+= sctp
 
-ifneq ($(CONFIG_IPV6),n)
-ifneq ($(CONFIG_IPV6),)
-subdir-$(CONFIG_NETFILTER)	+= ipv6/netfilter
-endif
-endif
-
 subdir-$(CONFIG_KHTTPD)		+= khttpd
 subdir-$(CONFIG_PACKET)		+= packet
+subdir-$(CONFIG_NET_KEY)	+= key
 subdir-$(CONFIG_NET_SCHED)	+= sched
 subdir-$(CONFIG_BRIDGE)		+= bridge
 subdir-$(CONFIG_IPX)		+= ipx
Index: net/netsyms.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/netsyms.c,v
retrieving revision 1.1.1.32
retrieving revision 1.1.1.32.2.1
diff -u -r1.1.1.32 -r1.1.1.32.2.1
--- a/net/netsyms.c	14 Apr 2004 13:05:41 -0000	1.1.1.32
+++ b/net/netsyms.c	16 Apr 2004 13:16:19 -0000	1.1.1.32.2.1
@@ -58,6 +58,12 @@
 #include <linux/inet.h>
 #include <linux/mroute.h>
 #include <linux/igmp.h>
+#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
+#include <net/ah.h>
+#endif
+#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+#include <net/esp.h>
+#endif
 
 extern struct net_proto_family inet_family_ops;
 
@@ -188,6 +194,7 @@
 #endif
 #ifdef CONFIG_SYSCTL
 EXPORT_SYMBOL(neigh_sysctl_register);
+EXPORT_SYMBOL(neigh_sysctl_unregister);
 #endif
 EXPORT_SYMBOL(pneigh_lookup);
 EXPORT_SYMBOL(pneigh_enqueue);
@@ -284,6 +291,7 @@
 EXPORT_SYMBOL(inetdev_by_index);
 EXPORT_SYMBOL(in_dev_finish_destroy);
 EXPORT_SYMBOL(ip_defrag);
+EXPORT_SYMBOL(inet_peer_idlock);
 
 /* Route manipulation */
 EXPORT_SYMBOL(ip_rt_ioctl);
@@ -299,6 +307,14 @@
 EXPORT_SYMBOL(dlci_ioctl_hook);
 #endif
 
+#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+EXPORT_SYMBOL_GPL(skb_cow_data);
+EXPORT_SYMBOL_GPL(pskb_put);
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+#endif
+
+EXPORT_SYMBOL(flow_cache_lookup);
+EXPORT_SYMBOL(flow_cache_genid);
 
 #if defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) || defined (CONFIG_IP_SCTP_MODULE)
 /* inet functions common to v4 and v6 */
@@ -412,8 +428,9 @@
 EXPORT_SYMBOL(secure_ipv6_id);
 #endif
 
-#endif
+EXPORT_SYMBOL(ip_generic_getfrag);
 
+#endif
 EXPORT_SYMBOL(tcp_read_sock);
 
 #ifdef CONFIG_IP_SCTP_MODULE
@@ -490,6 +507,7 @@
 EXPORT_SYMBOL(loopback_dev);
 EXPORT_SYMBOL(register_netdevice);
 EXPORT_SYMBOL(unregister_netdevice);
+EXPORT_SYMBOL(synchronize_net);
 EXPORT_SYMBOL(netdev_state_change);
 EXPORT_SYMBOL(dev_new_index);
 EXPORT_SYMBOL(dev_get_by_flags);
Index: net/atm/clip.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/atm/clip.c,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/net/atm/clip.c	18 Feb 2004 13:36:32 -0000	1.1.1.17
+++ b/net/atm/clip.c	16 Apr 2004 13:16:19 -0000	1.1.1.17.2.1
@@ -521,6 +521,7 @@
 	struct atmarp_entry *entry;
 	int error;
 	struct clip_vcc *clip_vcc;
+	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1 } } };
 	struct rtable *rt;
 
 	if (vcc->push != clip_push) {
@@ -537,7 +538,7 @@
 		unlink_clip_vcc(clip_vcc);
 		return 0;
 	}
-	error = ip_route_output(&rt,ip,0,1,0);
+	error = ip_route_output_key(&rt,&fl);
 	if (error) return error;
 	neigh = __neigh_lookup(&clip_tbl,&ip,rt->u.dst.dev,1);
 	ip_rt_put(rt);
Index: net/core/Makefile
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/core/Makefile,v
retrieving revision 1.1.1.18
retrieving revision 1.1.1.18.2.1
diff -u -r1.1.1.18 -r1.1.1.18.2.1
--- a/net/core/Makefile	28 Nov 2003 18:26:21 -0000	1.1.1.18
+++ b/net/core/Makefile	16 Apr 2004 13:16:19 -0000	1.1.1.18.2.1
@@ -21,8 +21,8 @@
 
 obj-$(CONFIG_FILTER) += filter.o
 
-obj-$(CONFIG_NET) +=	dev.o ethtool.o dev_mcast.o dst.o neighbour.o \
-			rtnetlink.o utils.o
+obj-$(CONFIG_NET) +=	flow.o dev.o ethtool.o dev_mcast.o dst.o \
+			neighbour.o rtnetlink.o utils.o
 
 obj-$(CONFIG_NETFILTER) += netfilter.o
 obj-$(CONFIG_NET_DIVERT) += dv.o
Index: net/core/dev.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/core/dev.c,v
retrieving revision 1.1.1.30
retrieving revision 1.1.1.30.2.1
diff -u -r1.1.1.30 -r1.1.1.30.2.1
--- a/net/core/dev.c	14 Apr 2004 13:05:41 -0000	1.1.1.30
+++ b/net/core/dev.c	16 Apr 2004 13:16:19 -0000	1.1.1.30.2.1
@@ -912,6 +912,13 @@
 	return notifier_chain_register(&netdev_chain, nb);
 }
 
+/* Synchronize with packet receive processing. */
+void synchronize_net(void) 
+{
+	br_write_lock_bh(BR_NETPROTO_LOCK);
+	br_write_unlock_bh(BR_NETPROTO_LOCK);
+}
+
 /**
  *	unregister_netdevice_notifier - unregister a network notifier block
  *	@nb: notifier
Index: net/core/dst.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/core/dst.c,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/net/core/dst.c	25 Aug 2003 11:44:44 -0000	1.1.1.15
+++ b/net/core/dst.c	16 Apr 2004 13:16:20 -0000	1.1.1.15.2.1
@@ -36,11 +36,11 @@
 static unsigned long dst_gc_timer_expires;
 static unsigned long dst_gc_timer_inc = DST_GC_MAX;
 static void dst_run_gc(unsigned long);
+static void ___dst_free(struct dst_entry * dst);
 
 static struct timer_list dst_gc_timer =
 	{ data: DST_GC_MIN, function: dst_run_gc };
 
-
 static void dst_run_gc(unsigned long dummy)
 {
 	int    delayed = 0;
@@ -61,7 +61,25 @@
 			continue;
 		}
 		*dstp = dst->next;
-		dst_destroy(dst);
+
+		dst = dst_destroy(dst);
+		if (dst) {
+			/* NOHASH and still referenced. Unless it is already
+			 * on gc list, invalidate it and add to gc list.
+			 *
+			 * Note: this is temporary. Actually, NOHASH dst's
+			 * must be obsoleted when parent is obsoleted.
+			 * But we do not have state "obsoleted, but
+			 * referenced by parent", so it is right.
+			 */
+			if (dst->obsolete > 1)
+				continue;
+
+			___dst_free(dst);
+			dst->next = *dstp;
+			*dstp = dst;
+			dstp = &dst->next;
+		}
 	}
 	if (!dst_garbage_list) {
 		dst_gc_timer_inc = DST_GC_MAX;
@@ -108,6 +126,7 @@
 	atomic_set(&dst->__refcnt, 0);
 	dst->ops = ops;
 	dst->lastuse = jiffies;
+	dst->path = dst;
 	dst->input = dst_discard;
 	dst->output = dst_blackhole;
 #if RT_CACHE_DEBUG >= 2 
@@ -117,10 +136,8 @@
 	return dst;
 }
 
-void __dst_free(struct dst_entry * dst)
+static void ___dst_free(struct dst_entry * dst)
 {
-	spin_lock_bh(&dst_lock);
-
 	/* The first case (dev==NULL) is required, when
 	   protocol module is unloaded.
 	 */
@@ -129,6 +146,12 @@
 		dst->output = dst_blackhole;
 	}
 	dst->obsolete = 2;
+}
+
+void __dst_free(struct dst_entry * dst)
+{
+	spin_lock_bh(&dst_lock);
+	___dst_free(dst);
 	dst->next = dst_garbage_list;
 	dst_garbage_list = dst;
 	if (dst_gc_timer_inc > DST_GC_INC) {
@@ -136,14 +159,19 @@
 		dst_gc_timer_expires = DST_GC_MIN;
 		mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
 	}
-
 	spin_unlock_bh(&dst_lock);
 }
 
-void dst_destroy(struct dst_entry * dst)
+struct dst_entry *dst_destroy(struct dst_entry * dst)
 {
-	struct neighbour *neigh = dst->neighbour;
-	struct hh_cache *hh = dst->hh;
+	struct dst_entry *child;
+	struct neighbour *neigh;
+	struct hh_cache *hh;
+
+again:
+	neigh = dst->neighbour;
+	hh = dst->hh;
+	child = dst->child;
 
 	dst->hh = NULL;
 	if (hh && atomic_dec_and_test(&hh->hh_refcnt))
@@ -164,6 +192,21 @@
 	atomic_dec(&dst_total);
 #endif
 	kmem_cache_free(dst->ops->kmem_cachep, dst);
+
+	dst = child;
+	if (dst) {
+		if (atomic_dec_and_test(&dst->__refcnt)) {
+			/* We were real parent of this dst, so kill child. */
+			if (dst->flags&DST_NOHASH)
+				goto again;
+		} else {
+			/* Child is still referenced, return it for freeing. */
+			if (dst->flags&DST_NOHASH)
+				return dst;
+			/* Child is still in his hash table */
+		}
+	}
+	return NULL;
 }
 
 static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
Index: net/core/flow.c
===================================================================
RCS file: net/core/flow.c
diff -N net/core/flow.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/core/flow.c	16 Apr 2004 13:16:20 -0000	1.5.18.1
@@ -0,0 +1,322 @@
+/* flow.c: Generic flow cache.
+ *
+ * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/completion.h>
+#include <net/flow.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+
+struct flow_cache_entry {
+	struct flow_cache_entry	*next;
+	u16			family;
+	u8			dir;
+	struct flowi		key;
+	u32			genid;
+	void			*object;
+	atomic_t		*object_ref;
+};
+
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
+
+static u32 flow_hash_shift;
+#define flow_hash_size	(1 << flow_hash_shift)
+static struct flow_cache_entry **flow_table;
+static kmem_cache_t *flow_cachep;
+
+static int flow_lwm, flow_hwm;
+
+struct flow_percpu_info {
+	int hash_rnd_recalc;
+	u32 hash_rnd;
+	int count;
+} ____cacheline_aligned;
+static struct flow_percpu_info flow_hash_info[NR_CPUS];
+
+#define flow_hash_rnd_recalc(cpu)	(flow_hash_info[cpu].hash_rnd_recalc)
+#define flow_hash_rnd(cpu)		(flow_hash_info[cpu].hash_rnd)
+#define flow_count(cpu)			(flow_hash_info[cpu].count)
+
+static struct timer_list flow_hash_rnd_timer;
+
+#define FLOW_HASH_RND_PERIOD	(10 * 60 * HZ)
+
+struct flow_flush_info {
+	atomic_t cpuleft;
+	struct completion completion;
+};
+static struct tasklet_struct flow_flush_tasklets[NR_CPUS];
+static DECLARE_MUTEX(flow_flush_sem);
+
+static void flow_cache_new_hashrnd(unsigned long arg)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		flow_hash_rnd_recalc(i) = 1;
+
+	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&flow_hash_rnd_timer);
+}
+
+static void __flow_cache_shrink(int cpu, int shrink_to)
+{
+	struct flow_cache_entry *fle, **flp;
+	int i;
+
+	for (i = 0; i < flow_hash_size; i++) {
+		int k = 0;
+
+		flp = &flow_table[cpu*flow_hash_size+i];
+		while ((fle = *flp) != NULL && k < shrink_to) {
+			k++;
+			flp = &fle->next;
+		}
+		while ((fle = *flp) != NULL) {
+			*flp = fle->next;
+			if (fle->object)
+				atomic_dec(fle->object_ref);
+			kmem_cache_free(flow_cachep, fle);
+			flow_count(cpu)--;
+		}
+	}
+}
+
+static void flow_cache_shrink(int cpu)
+{
+	int shrink_to = flow_lwm / flow_hash_size;
+
+	__flow_cache_shrink(cpu, shrink_to);
+}
+
+static void flow_new_hash_rnd(int cpu)
+{
+	get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
+	flow_hash_rnd_recalc(cpu) = 0;
+
+	__flow_cache_shrink(cpu, 0);
+}
+
+static u32 flow_hash_code(struct flowi *key, int cpu)
+{
+	u32 *k = (u32 *) key;
+
+	return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
+		(flow_hash_size - 1));
+}
+
+#if (BITS_PER_LONG == 64)
+typedef u64 flow_compare_t;
+#else
+typedef u32 flow_compare_t;
+#endif
+
+extern void flowi_is_missized(void);
+
+/* I hear what you're saying, use memcmp.  But memcmp cannot make
+ * important assumptions that we can here, such as alignment and
+ * constant size.
+ */
+static int flow_key_compare(struct flowi *key1, struct flowi *key2)
+{
+	flow_compare_t *k1, *k1_lim, *k2;
+	const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
+
+	if (sizeof(struct flowi) % sizeof(flow_compare_t))
+		flowi_is_missized();
+
+	k1 = (flow_compare_t *) key1;
+	k1_lim = k1 + n_elem;
+
+	k2 = (flow_compare_t *) key2;
+
+	do {
+		if (*k1++ != *k2++)
+			return 1;
+	} while (k1 < k1_lim);
+
+	return 0;
+}
+
+void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
+			flow_resolve_t resolver)
+{
+	struct flow_cache_entry *fle, **head;
+	unsigned int hash;
+	int cpu;
+
+	local_bh_disable();
+	cpu = smp_processor_id();
+	if (flow_hash_rnd_recalc(cpu))
+		flow_new_hash_rnd(cpu);
+	hash = flow_hash_code(key, cpu);
+
+	head = &flow_table[(cpu << flow_hash_shift) + hash];
+	for (fle = *head; fle; fle = fle->next) {
+		if (fle->family == family &&
+		    fle->dir == dir &&
+		    flow_key_compare(key, &fle->key) == 0) {
+			if (fle->genid == atomic_read(&flow_cache_genid)) {
+				void *ret = fle->object;
+
+				if (ret)
+					atomic_inc(fle->object_ref);
+				local_bh_enable();
+
+				return ret;
+			}
+			break;
+		}
+	}
+
+	if (!fle) {
+		if (flow_count(cpu) > flow_hwm)
+			flow_cache_shrink(cpu);
+
+		fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC);
+		if (fle) {
+			fle->next = *head;
+			*head = fle;
+			fle->family = family;
+			fle->dir = dir;
+			memcpy(&fle->key, key, sizeof(*key));
+			fle->object = NULL;
+			flow_count(cpu)++;
+		}
+	}
+
+	{
+		void *obj;
+		atomic_t *obj_ref;
+
+		resolver(key, family, dir, &obj, &obj_ref);
+
+		if (fle) {
+			fle->genid = atomic_read(&flow_cache_genid);
+
+			if (fle->object)
+				atomic_dec(fle->object_ref);
+
+			fle->object = obj;
+			fle->object_ref = obj_ref;
+			if (obj)
+				atomic_inc(fle->object_ref);
+		}
+		local_bh_enable();
+
+		return obj;
+	}
+}
+
+static void flow_cache_flush_tasklet(unsigned long data)
+{
+	struct flow_flush_info *info = (void *)data;
+	int i;
+	int cpu;
+
+	cpu = smp_processor_id();
+	for (i = 0; i < flow_hash_size; i++) {
+		struct flow_cache_entry *fle;
+
+		fle = flow_table[(cpu << flow_hash_shift) + i];
+		for (; fle; fle = fle->next) {
+			unsigned genid = atomic_read(&flow_cache_genid);
+
+			if (!fle->object || fle->genid == genid)
+				continue;
+
+			fle->object = NULL;
+			atomic_dec(fle->object_ref);
+		}
+	}
+
+	if (atomic_dec_and_test(&info->cpuleft))
+		complete(&info->completion);
+}
+
+static void flow_cache_flush_per_cpu(void *data)
+{
+	struct flow_flush_info *info = data;
+	int cpu;
+	struct tasklet_struct *tasklet;
+
+	cpu = smp_processor_id();
+	tasklet = &flow_flush_tasklets[cpu];
+	tasklet_init(tasklet, flow_cache_flush_tasklet, (unsigned long)info);
+	tasklet_schedule(tasklet);
+}
+
+void flow_cache_flush(void)
+{
+	struct flow_flush_info info;
+
+	atomic_set(&info.cpuleft, smp_num_cpus);
+	init_completion(&info.completion);
+
+	down(&flow_flush_sem);
+
+	local_bh_disable();
+	smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0);
+	flow_cache_flush_per_cpu(&info);
+	local_bh_enable();
+
+	wait_for_completion(&info.completion);
+
+	up(&flow_flush_sem);
+}
+
+static int __init flow_cache_init(void)
+{
+	unsigned long order;
+	int i;
+
+	flow_cachep = kmem_cache_create("flow_cache",
+					sizeof(struct flow_cache_entry),
+					0, SLAB_HWCACHE_ALIGN,
+					NULL, NULL);
+
+	if (!flow_cachep)
+		panic("NET: failed to allocate flow cache slab\n");
+
+	flow_hash_shift = 10;
+	flow_lwm = 2 * flow_hash_size;
+	flow_hwm = 4 * flow_hash_size;
+
+	for (i = 0; i < NR_CPUS; i++)
+		flow_hash_rnd_recalc(i) = 1;
+
+	init_timer(&flow_hash_rnd_timer);
+	flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
+	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&flow_hash_rnd_timer);
+
+	for (order = 0;
+	     (PAGE_SIZE << order) <
+		     (NR_CPUS*sizeof(struct flow_entry *)*flow_hash_size);
+	     order++)
+		/* NOTHING */;
+
+	flow_table = (struct flow_cache_entry **)
+		__get_free_pages(GFP_ATOMIC, order);
+
+	if (!flow_table)
+		panic("Failed to allocate flow cache hash table\n");
+
+	memset(flow_table, 0, PAGE_SIZE << order);
+
+	return 0;
+}
+
+module_init(flow_cache_init);
Index: net/core/neighbour.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/core/neighbour.c,v
retrieving revision 1.1.1.19
retrieving revision 1.1.1.19.2.1
diff -u -r1.1.1.19 -r1.1.1.19.2.1
--- a/net/core/neighbour.c	18 Feb 2004 13:36:32 -0000	1.1.1.19
+++ b/net/core/neighbour.c	16 Apr 2004 13:16:20 -0000	1.1.1.19.2.1
@@ -638,7 +638,9 @@
 static __inline__ int neigh_max_probes(struct neighbour *n)
 {
 	struct neigh_parms *p = n->parms;
-	return p->ucast_probes + p->app_probes + p->mcast_probes;
+	return (n->nud_state & NUD_PROBE ?
+		p->ucast_probes :
+		p->ucast_probes + p->app_probes + p->mcast_probes);
 }
 
 
@@ -1117,9 +1119,6 @@
 		if (*p == parms) {
 			*p = parms->next;
 			write_unlock_bh(&tbl->lock);
-#ifdef CONFIG_SYSCTL
-			neigh_sysctl_unregister(parms);
-#endif
 			kfree(parms);
 			return;
 		}
@@ -1184,9 +1183,6 @@
 		}
 	}
 	write_unlock(&neigh_tbl_lock);
-#ifdef CONFIG_SYSCTL
-	neigh_sysctl_unregister(&tbl->parms);
-#endif
 	return 0;
 }
 
Index: net/core/netfilter.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/core/netfilter.c,v
retrieving revision 1.1.1.20
retrieving revision 1.1.1.20.2.1
diff -u -r1.1.1.20 -r1.1.1.20.2.1
--- a/net/core/netfilter.c	25 Aug 2003 11:44:44 -0000	1.1.1.20
+++ b/net/core/netfilter.c	16 Apr 2004 13:16:20 -0000	1.1.1.20.2.1
@@ -563,7 +563,7 @@
 {
 	struct iphdr *iph = (*pskb)->nh.iph;
 	struct rtable *rt;
-	struct rt_key key = {};
+	struct flowi fl = {};
 	struct dst_entry *odst;
 	unsigned int hh_len;
 
@@ -571,14 +571,15 @@
 	 * packets with foreign saddr to be appear on the NF_IP_LOCAL_OUT hook.
 	 */
 	if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
-		key.dst = iph->daddr;
-		key.src = iph->saddr;
-		key.oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0;
-		key.tos = RT_TOS(iph->tos);
+		fl.nl_u.ip4_u.daddr = iph->daddr;
+		fl.nl_u.ip4_u.saddr = iph->saddr;
+		fl.oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0;
+		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
 #ifdef CONFIG_IP_ROUTE_FWMARK
-		key.fwmark = (*pskb)->nfmark;
+		fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
 #endif
-		if (ip_route_output_key(&rt, &key) != 0)
+		fl.proto = iph->protocol;
+		if (ip_route_output_key(&rt, &fl) != 0)
 			return -1;
 
 		/* Drop old route. */
@@ -587,8 +588,8 @@
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
-		key.dst = iph->saddr;
-		if (ip_route_output_key(&rt, &key) != 0)
+		fl.nl_u.ip4_u.daddr = iph->saddr;
+		if (ip_route_output_key(&rt, &fl) != 0)
 			return -1;
 
 		odst = (*pskb)->dst;
Index: net/core/rtnetlink.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/core/rtnetlink.c,v
retrieving revision 1.1.1.18
retrieving revision 1.1.1.18.2.1
diff -u -r1.1.1.18 -r1.1.1.18.2.1
--- a/net/core/rtnetlink.c	25 Aug 2003 11:44:44 -0000	1.1.1.18
+++ b/net/core/rtnetlink.c	16 Apr 2004 13:16:20 -0000	1.1.1.18.2.1
@@ -128,7 +128,7 @@
 	return err;
 }
 
-int rtnetlink_put_metrics(struct sk_buff *skb, unsigned *metrics)
+int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 {
 	struct rtattr *mx = (struct rtattr*)skb->tail;
 	int i;
@@ -136,7 +136,7 @@
 	RTA_PUT(skb, RTA_METRICS, 0, NULL);
 	for (i=0; i<RTAX_MAX; i++) {
 		if (metrics[i])
-			RTA_PUT(skb, i+1, sizeof(unsigned), metrics+i);
+			RTA_PUT(skb, i+1, sizeof(u32), metrics+i);
 	}
 	mx->rta_len = skb->tail - (u8*)mx;
 	if (mx->rta_len == RTA_LENGTH(0))
Index: net/core/skbuff.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/core/skbuff.c,v
retrieving revision 1.1.1.22
retrieving revision 1.1.1.22.2.1
diff -u -r1.1.1.22 -r1.1.1.22.2.1
--- a/net/core/skbuff.c	25 Aug 2003 11:44:44 -0000	1.1.1.22
+++ b/net/core/skbuff.c	16 Apr 2004 13:16:20 -0000	1.1.1.22.2.1
@@ -57,6 +57,7 @@
 #include <net/dst.h>
 #include <net/sock.h>
 #include <net/checksum.h>
+#include <net/xfrm.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -201,6 +202,7 @@
 
 	/* Set up other state */
 	skb->len = 0;
+	skb->local_df = 0;
 	skb->cloned = 0;
 	skb->data_len = 0;
 
@@ -233,6 +235,7 @@
 	skb->dev = NULL;
 	skb->real_dev = NULL;
 	skb->dst = NULL;
+	skb->sp = NULL;
 	memset(skb->cb, 0, sizeof(skb->cb));
 	skb->pkt_type = PACKET_HOST;	/* Default type */
 	skb->ip_summed = 0;
@@ -317,6 +320,9 @@
 	}
 
 	dst_release(skb->dst);
+#ifdef CONFIG_XFRM
+	secpath_put(skb->sp);
+#endif
 	if(skb->destructor) {
 		if (in_irq()) {
 			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
@@ -369,10 +375,15 @@
 	C(mac);
 	C(dst);
 	dst_clone(n->dst);
+	C(sp);
+#ifdef CONFIG_INET
+	secpath_get(n->sp);
+#endif
 	memcpy(n->cb, skb->cb, sizeof(skb->cb));
 	C(len);
 	C(data_len);
 	C(csum);
+	C(local_df);
 	n->cloned = 1;
 	C(pkt_type);
 	C(ip_summed);
@@ -423,11 +434,15 @@
 	new->priority=old->priority;
 	new->protocol=old->protocol;
 	new->dst=dst_clone(old->dst);
+#ifdef CONFIG_INET
+	new->sp=secpath_get(old->sp);
+#endif
 	new->h.raw=old->h.raw+offset;
 	new->nh.raw=old->nh.raw+offset;
 	new->mac.raw=old->mac.raw+offset;
 	memcpy(new->cb, old->cb, sizeof(old->cb));
 	atomic_set(&new->users, 1);
+	new->local_df=old->local_df;
 	new->pkt_type=old->pkt_type;
 	new->stamp=old->stamp;
 	new->destructor = NULL;
Index: net/decnet/dn_nsp_out.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/decnet/dn_nsp_out.c,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/net/decnet/dn_nsp_out.c	22 Jan 2001 21:32:10 -0000	1.1.1.15
+++ b/net/decnet/dn_nsp_out.c	16 Apr 2004 13:16:20 -0000	1.1.1.15.2.1
@@ -593,7 +593,7 @@
 	 * associations.
 	 */
 	skb->dst = dst_clone(dst);
-	skb->dst->output(skb);
+	dst_output(skb);
 }
 
 
Index: net/decnet/dn_route.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/decnet/dn_route.c,v
retrieving revision 1.1.1.21
retrieving revision 1.1.1.21.2.1
diff -u -r1.1.1.21 -r1.1.1.21.2.1
--- a/net/decnet/dn_route.c	28 Nov 2002 23:53:15 -0000	1.1.1.21
+++ b/net/decnet/dn_route.c	16 Apr 2004 13:16:20 -0000	1.1.1.21.2.1
@@ -100,7 +100,6 @@
 
 static int dn_dst_gc(void);
 static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
-static struct dst_entry *dn_dst_reroute(struct dst_entry *, struct sk_buff *skb);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
 static int dn_route_input(struct sk_buff *);
@@ -119,7 +118,6 @@
 	gc_thresh:		128,
 	gc:			dn_dst_gc,
 	check:			dn_dst_check,
-	reroute:		dn_dst_reroute,
 	negative_advice:	dn_dst_negative_advice,
 	link_failure:		dn_dst_link_failure,
 	entry_size:		sizeof(struct dn_route),
@@ -202,12 +200,6 @@
 	return NULL;
 }
 
-static struct dst_entry *dn_dst_reroute(struct dst_entry *dst,
-					struct sk_buff *skb)
-{
-	return NULL;
-}
-
 /*
  * This is called through sendmsg() when you specify MSG_TRYHARD
  * and there is already a route in cache.
@@ -396,7 +388,7 @@
 	int err;
 
 	if ((err = dn_route_input(skb)) == 0)
-		return skb->dst->input(skb);
+		return dst_input(skb);
 
 	if (decnet_debug_level & 4) {
 		char *devname = skb->dev ? skb->dev->name : "???";
@@ -1049,10 +1041,12 @@
 	RTA_PUT(skb, RTA_SRC, 2, &rt->rt_saddr);
 	if (rt->u.dst.dev)
 		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
-	if (rt->u.dst.window)
-		RTA_PUT(skb, RTAX_WINDOW, sizeof(unsigned), &rt->u.dst.window);
-	if (rt->u.dst.rtt)
-		RTA_PUT(skb, RTAX_RTT, sizeof(unsigned), &rt->u.dst.rtt);
+	if (dst_metric(&rt->u.dst, RTAX_WINDOW))
+		RTA_PUT(skb, RTAX_WINDOW, sizeof(unsigned),
+			&rt->u.dst.metrics[RTAX_WINDOW - 1]);
+	if (dst_metric(&rt->u.dst, RTAX_RTT))
+		RTA_PUT(skb, RTAX_RTT, sizeof(unsigned),
+			&rt->u.dst.metrics[RTAX_RTT]);
 
 	nlh->nlmsg_len = skb->tail - b;
 	return skb->len;
@@ -1208,7 +1202,7 @@
 					dn_addr2asc(dn_ntohs(rt->rt_saddr), buf2),
 					atomic_read(&rt->u.dst.__refcnt),
 					rt->u.dst.__use,
-					(int)rt->u.dst.rtt
+					(int) dst_metric(&rt->u.dst, RTAX_RTT)
 					);
 
 
Index: net/ipv4/Config.in
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/Config.in,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/net/ipv4/Config.in	28 Nov 2003 18:26:21 -0000	1.1.1.17
+++ b/net/ipv4/Config.in	16 Apr 2004 13:16:20 -0000	1.1.1.17.2.1
@@ -40,6 +40,9 @@
 fi
 bool '  IP: TCP Explicit Congestion Notification support' CONFIG_INET_ECN
 bool '  IP: TCP syncookie support (disabled per default)' CONFIG_SYN_COOKIES
+tristate '  IP: AH transformation' CONFIG_INET_AH
+tristate '  IP: ESP transformation' CONFIG_INET_ESP
+tristate '  IP: IPComp transformation' CONFIG_INET_IPCOMP
 if [ "$CONFIG_NETFILTER" != "n" ]; then
    source net/ipv4/netfilter/Config.in
 fi
Index: net/ipv4/Makefile
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/Makefile,v
retrieving revision 1.1.1.18
retrieving revision 1.1.1.18.2.1
diff -u -r1.1.1.18 -r1.1.1.18.2.1
--- a/net/ipv4/Makefile	21 Dec 2001 17:42:05 -0000	1.1.1.18
+++ b/net/ipv4/Makefile	16 Apr 2004 13:16:21 -0000	1.1.1.18.2.1
@@ -24,6 +24,11 @@
 obj-$(CONFIG_NET_IPIP) += ipip.o
 obj-$(CONFIG_NET_IPGRE) += ip_gre.o
 obj-$(CONFIG_SYN_COOKIES) += syncookies.o
+obj-$(CONFIG_INET_AH) += ah4.o
+obj-$(CONFIG_INET_ESP) += esp4.o
+obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
 obj-$(CONFIG_IP_PNP) += ipconfig.o
 
+obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o xfrm4_tunnel.o
+
 include $(TOPDIR)/Rules.make
Index: net/ipv4/af_inet.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/af_inet.c,v
retrieving revision 1.1.1.24
retrieving revision 1.1.1.24.2.1
diff -u -r1.1.1.24 -r1.1.1.24.2.1
--- a/net/ipv4/af_inet.c	28 Nov 2003 18:26:21 -0000	1.1.1.24
+++ b/net/ipv4/af_inet.c	16 Apr 2004 13:16:21 -0000	1.1.1.24.2.1
@@ -89,6 +89,7 @@
 
 #include <linux/smp_lock.h>
 #include <linux/inet.h>
+#include <linux/igmp.h>
 #include <linux/netdevice.h>
 #include <linux/brlock.h>
 #include <net/ip.h>
@@ -103,6 +104,7 @@
 #include <net/icmp.h>
 #include <net/ipip.h>
 #include <net/inet_common.h>
+#include <net/xfrm.h>
 #ifdef CONFIG_IP_MROUTE
 #include <linux/mroute.h>
 #endif
@@ -213,6 +215,8 @@
 
 	sock_orphan(sk);
 
+	xfrm_sk_free_policy(sk);
+
 #ifdef INET_REFCNT_DEBUG
 	if (atomic_read(&sk->refcnt) != 1) {
 		printk(KERN_DEBUG "Destruction inet %p delayed, c=%d\n", sk, atomic_read(&sk->refcnt));
@@ -386,7 +390,7 @@
 
 	sk->backlog_rcv = sk->prot->backlog_rcv;
 
-	sk->protinfo.af_inet.ttl	= sysctl_ip_default_ttl;
+	sk->protinfo.af_inet.uc_ttl	= -1;
 
 	sk->protinfo.af_inet.mc_loop	= 1;
 	sk->protinfo.af_inet.mc_ttl	= 1;
@@ -698,6 +702,27 @@
 	return err;
 }
 
+#ifdef CONFIG_IP_MULTICAST
+static struct inet_protocol igmp_protocol = {
+	.handler =	igmp_rcv,
+};
+#endif
+
+static struct inet_protocol tcp_protocol = {
+	.handler =	tcp_v4_rcv,
+	.err_handler =	tcp_v4_err,
+	.no_policy =	1,
+};
+
+static struct inet_protocol udp_protocol = {
+	.handler =	udp_rcv,
+	.err_handler =	udp_err,
+	.no_policy =	1,
+};
+
+static struct inet_protocol icmp_protocol = {
+	.handler =	icmp_rcv,
+};
 
 /*
  *	This does both peername and sockname.
@@ -724,6 +749,7 @@
 		sin->sin_port = sk->sport;
 		sin->sin_addr.s_addr = addr;
 	}
+	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 	*uaddr_len = sizeof(*sin);
 	return(0);
 }
@@ -757,6 +783,21 @@
 	return sk->prot->sendmsg(sk, msg, size);
 }
 
+
+ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+
+	/* We may need to bind the socket. */
+	if (!sk->num && inet_autobind(sk))
+		return -EAGAIN;
+
+	if (sk->prot->sendpage)
+		return sk->prot->sendpage(sk, page, offset, size, flags);
+	return sock_no_sendpage(sock, page, offset, size, flags);
+}
+
+
 int inet_shutdown(struct socket *sock, int how)
 {
 	struct sock *sk = sock->sk;
@@ -981,7 +1022,7 @@
 	sendmsg:	inet_sendmsg,
 	recvmsg:	inet_recvmsg,
 	mmap:		sock_no_mmap,
-	sendpage:	sock_no_sendpage,
+	sendpage:	inet_sendpage,
 };
 
 struct net_proto_family inet_family_ops = {
@@ -1109,7 +1150,6 @@
 static int __init inet_init(void)
 {
 	struct sk_buff *dummy_skb;
-	struct inet_protocol *p;
 	struct inet_protosw *q;
 	struct list_head *r;
 
@@ -1127,16 +1167,19 @@
   	(void) sock_register(&inet_family_ops);
 
 	/*
-	 *	Add all the protocols. 
+	 *	Add all the base protocols.
 	 */
 
-	printk(KERN_INFO "IP Protocols: ");
-	for (p = inet_protocol_base; p != NULL;) {
-		struct inet_protocol *tmp = (struct inet_protocol *) p->next;
-		inet_add_protocol(p);
-		printk("%s%s",p->name,tmp?", ":"\n");
-		p = tmp;
-	}
+	if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
+		printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
+	if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
+		printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
+	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
+		printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
+#ifdef CONFIG_IP_MULTICAST
+	if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
+		printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
+#endif
 
 	/* Register the socket-side information for inet_create. */
 	for(r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
Index: net/ipv4/ah4.c
===================================================================
RCS file: net/ipv4/ah4.c
diff -N net/ipv4/ah4.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv4/ah4.c	16 Apr 2004 13:16:21 -0000	1.6.2.1
@@ -0,0 +1,377 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/ah.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <net/icmp.h>
+#include <asm/scatterlist.h>
+
+
+/* Clear mutable options and find final destination to substitute
+ * into IP header for icv calculation. Options are already checked
+ * for validity, so paranoia is not required. */
+
+static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr)
+{
+	unsigned char * optptr = (unsigned char*)(iph+1);
+	int  l = iph->ihl*4 - sizeof(struct iphdr);
+	int  optlen;
+
+	while (l > 0) {
+		switch (*optptr) {
+		case IPOPT_END:
+			return 0;
+		case IPOPT_NOOP:
+			l--;
+			optptr++;
+			continue;
+		}
+		optlen = optptr[1];
+		if (optlen<2 || optlen>l)
+			return -EINVAL;
+		switch (*optptr) {
+		case IPOPT_SEC:
+		case 0x85:	/* Some "Extended Security" crap. */
+		case 0x86:	/* Another "Commercial Security" crap. */
+		case IPOPT_RA:
+		case 0x80|21:	/* RFC1770 */
+			break;
+		case IPOPT_LSRR:
+		case IPOPT_SSRR:
+			if (optlen < 6)
+				return -EINVAL;
+			memcpy(daddr, optptr+optlen-4, 4);
+			/* Fall through */
+		default:
+			memset(optptr+2, 0, optlen-2);
+		}
+		l -= optlen;
+		optptr += optlen;
+	}
+	return 0;
+}
+
+static int ah_output(struct sk_buff *skb)
+{
+	int err;
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x  = dst->xfrm;
+	struct iphdr *iph, *top_iph;
+	struct ip_auth_hdr *ah;
+	struct ah_data *ahp;
+	union {
+		struct iphdr	iph;
+		char 		buf[60];
+	} tmp_iph;
+
+	if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) {
+		err = -EINVAL;
+		goto error_nolock;
+	}
+
+	spin_lock_bh(&x->lock);
+	err = xfrm_check_output(x, skb, AF_INET);
+	if (err)
+		goto error;
+
+	iph = skb->nh.iph;
+	if (x->props.mode) {
+		top_iph = (struct iphdr*)skb_push(skb, x->props.header_len);
+		top_iph->ihl = 5;
+		top_iph->version = 4;
+		top_iph->tos = 0;
+		top_iph->tot_len = htons(skb->len);
+		if (!(iph->frag_off&htons(IP_DF))) {
+#ifdef NETIF_F_TSO
+			__ip_select_ident(top_iph, dst, 0);
+#else
+			__ip_select_ident(top_iph, dst);
+#endif
+		}
+		top_iph->frag_off = 0;
+		top_iph->ttl = 0;
+		top_iph->protocol = IPPROTO_AH;
+		top_iph->check = 0;
+		top_iph->saddr = x->props.saddr.a4;
+		top_iph->daddr = x->id.daddr.a4;
+		ah = (struct ip_auth_hdr*)(top_iph+1);
+		ah->nexthdr = IPPROTO_IPIP;
+	} else {
+		memcpy(&tmp_iph, skb->data, iph->ihl*4);
+		top_iph = (struct iphdr*)skb_push(skb, x->props.header_len);
+		memcpy(top_iph, &tmp_iph, iph->ihl*4);
+		iph = &tmp_iph.iph;
+		top_iph->tos = 0;
+		top_iph->tot_len = htons(skb->len);
+		top_iph->frag_off = 0;
+		top_iph->ttl = 0;
+		top_iph->protocol = IPPROTO_AH;
+		top_iph->check = 0;
+		if (top_iph->ihl != 5) {
+			err = ip_clear_mutable_options(top_iph, &top_iph->daddr);
+			if (err)
+				goto error;
+		}
+		ah = (struct ip_auth_hdr*)((char*)top_iph+iph->ihl*4);
+		ah->nexthdr = iph->protocol;
+	}
+	ahp = x->data;
+	ah->hdrlen  = (XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + 
+				   ahp->icv_trunc_len) >> 2) - 2;
+
+	ah->reserved = 0;
+	ah->spi = x->id.spi;
+	ah->seq_no = htonl(++x->replay.oseq);
+	ahp->icv(ahp, skb, ah->auth_data);
+	top_iph->tos = iph->tos;
+	top_iph->ttl = iph->ttl;
+	if (x->props.mode) {
+		if (x->props.flags & XFRM_STATE_NOECN)
+			IP_ECN_clear(top_iph);
+		top_iph->frag_off = iph->frag_off&~htons(IP_MF|IP_OFFSET);
+		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+	} else {
+		top_iph->frag_off = iph->frag_off;
+		top_iph->daddr = iph->daddr;
+		if (iph->ihl != 5)
+			memcpy(top_iph+1, iph+1, iph->ihl*4 - sizeof(struct iphdr));
+	}
+	ip_send_check(top_iph);
+
+	skb->nh.raw = skb->data;
+
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+	spin_unlock_bh(&x->lock);
+	if ((skb->dst = dst_pop(dst)) == NULL) {
+		err = -EHOSTUNREACH;
+		goto error_nolock;
+	}
+	return NET_XMIT_BYPASS;
+
+error:
+	spin_unlock_bh(&x->lock);
+error_nolock:
+	kfree_skb(skb);
+	return err;
+}
+
+int ah_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	int ah_hlen;
+	struct iphdr *iph;
+	struct ip_auth_hdr *ah;
+	struct ah_data *ahp;
+	char work_buf[60];
+
+	if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
+		goto out;
+
+	ah = (struct ip_auth_hdr*)skb->data;
+	ahp = x->data;
+	ah_hlen = (ah->hdrlen + 2) << 2;
+	
+	if (ah_hlen != XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_full_len) &&
+	    ah_hlen != XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len)) 
+		goto out;
+
+	if (!pskb_may_pull(skb, ah_hlen))
+		goto out;
+
+	/* We are going to _remove_ AH header to keep sockets happy,
+	 * so... Later this can change. */
+	if (skb_cloned(skb) &&
+	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		goto out;
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	ah = (struct ip_auth_hdr*)skb->data;
+	iph = skb->nh.iph;
+
+	memcpy(work_buf, iph, iph->ihl*4);
+
+	iph->ttl = 0;
+	iph->tos = 0;
+	iph->frag_off = 0;
+	iph->check = 0;
+	if (iph->ihl != 5) {
+		u32 dummy;
+		if (ip_clear_mutable_options(iph, &dummy))
+			goto out;
+	}
+        {
+		u8 auth_data[MAX_AH_AUTH_LEN];
+		
+		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
+		skb_push(skb, skb->data - skb->nh.raw);
+		ahp->icv(ahp, skb, ah->auth_data);
+		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
+			x->stats.integrity_failed++;
+			goto out;
+		}
+	}
+	((struct iphdr*)work_buf)->protocol = ah->nexthdr;
+	skb->nh.raw = skb_pull(skb, ah_hlen);
+	memcpy(skb->nh.raw, work_buf, iph->ihl*4);
+	skb->nh.iph->tot_len = htons(skb->len);
+	skb_pull(skb, skb->nh.iph->ihl*4);
+	skb->h.raw = skb->data;
+
+	return 0;
+
+out:
+	return -EINVAL;
+}
+
+void ah4_err(struct sk_buff *skb, u32 info)
+{
+	struct iphdr *iph = (struct iphdr*)skb->data;
+	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2));
+	struct xfrm_state *x;
+
+	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
+	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+		return;
+
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
+	if (!x)
+		return;
+	printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
+	       ntohl(ah->spi), ntohl(iph->daddr));
+	xfrm_state_put(x);
+}
+
+static int ah_init_state(struct xfrm_state *x, void *args)
+{
+	struct ah_data *ahp = NULL;
+	struct xfrm_algo_desc *aalg_desc;
+
+	if (!x->aalg)
+		goto error;
+
+	/* null auth can use a zero length key */
+	if (x->aalg->alg_key_len > 512)
+		goto error;
+
+	ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
+	if (ahp == NULL)
+		return -ENOMEM;
+
+	memset(ahp, 0, sizeof(*ahp));
+
+	ahp->key = x->aalg->alg_key;
+	ahp->key_len = (x->aalg->alg_key_len+7)/8;
+	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
+	if (!ahp->tfm)
+		goto error;
+	ahp->icv = ah_hmac_digest;
+	
+	/*
+	 * Lookup the algorithm description maintained by xfrm_algo,
+	 * verify crypto transform properties, and store information
+	 * we need for AH processing.  This lookup cannot fail here
+	 * after a successful crypto_alloc_tfm().
+	 */
+	aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name);
+	BUG_ON(!aalg_desc);
+
+	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+	    crypto_tfm_alg_digestsize(ahp->tfm)) {
+		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
+		       x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
+		       aalg_desc->uinfo.auth.icv_fullbits/8);
+		goto error;
+	}
+	
+	ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
+	ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+	
+	BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
+	
+	ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
+	if (!ahp->work_icv)
+		goto error;
+	
+	x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len);
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct iphdr);
+	x->data = ahp;
+
+	return 0;
+
+error:
+	if (ahp) {
+		if (ahp->work_icv)
+			kfree(ahp->work_icv);
+		if (ahp->tfm)
+			crypto_free_tfm(ahp->tfm);
+		kfree(ahp);
+	}
+	return -EINVAL;
+}
+
+static void ah_destroy(struct xfrm_state *x)
+{
+	struct ah_data *ahp = x->data;
+
+	if (!ahp)
+		return;
+
+	if (ahp->work_icv) {
+		kfree(ahp->work_icv);
+		ahp->work_icv = NULL;
+	}
+	if (ahp->tfm) {
+		crypto_free_tfm(ahp->tfm);
+		ahp->tfm = NULL;
+	}
+	kfree(ahp);
+}
+
+
+static struct xfrm_type ah_type =
+{
+	.description	= "AH4",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_AH,
+	.init_state	= ah_init_state,
+	.destructor	= ah_destroy,
+	.input		= ah_input,
+	.output		= ah_output
+};
+
+static struct inet_protocol ah4_protocol = {
+	.handler	=	xfrm4_rcv,
+	.err_handler	=	ah4_err,
+	.no_policy	=	1,
+};
+
+static int __init ah4_init(void)
+{
+	if (xfrm_register_type(&ah_type, AF_INET) < 0) {
+		printk(KERN_INFO "ip ah init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet_add_protocol(&ah4_protocol, IPPROTO_AH) < 0) {
+		printk(KERN_INFO "ip ah init: can't add protocol\n");
+		xfrm_unregister_type(&ah_type, AF_INET);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit ah4_fini(void)
+{
+	if (inet_del_protocol(&ah4_protocol, IPPROTO_AH) < 0)
+		printk(KERN_INFO "ip ah close: can't remove protocol\n");
+	if (xfrm_unregister_type(&ah_type, AF_INET) < 0)
+		printk(KERN_INFO "ip ah close: can't remove xfrm type\n");
+}
+
+module_init(ah4_init);
+module_exit(ah4_fini);
+MODULE_LICENSE("GPL");
Index: net/ipv4/arp.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/arp.c,v
retrieving revision 1.1.1.22
retrieving revision 1.1.1.22.2.1
diff -u -r1.1.1.22 -r1.1.1.22.2.1
--- a/net/ipv4/arp.c	14 Apr 2004 13:05:41 -0000	1.1.1.22
+++ b/net/ipv4/arp.c	16 Apr 2004 13:16:21 -0000	1.1.1.22.2.1
@@ -413,11 +413,13 @@
 
 static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev)
 {
+	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip,
+						 .saddr = tip } } };
 	struct rtable *rt;
 	int flag = 0; 
 	/*unsigned long now; */
 
-	if (ip_route_output(&rt, sip, tip, 0, 0) < 0) 
+	if (ip_route_output_key(&rt, &fl) < 0) 
 		return 1;
 	if (rt->u.dst.dev != dev) { 
 		NET_INC_STATS_BH(ArpFilter);
@@ -563,11 +565,11 @@
 	 */
 	
 	skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4)
-				+ dev->hard_header_len + 15, GFP_ATOMIC);
+				+ LL_RESERVED_SPACE(dev), GFP_ATOMIC);
 	if (skb == NULL)
 		return NULL;
 
-	skb_reserve(skb, (dev->hard_header_len+15)&~15);
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 	skb->nh.raw = skb->data;
 	arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
 	skb->dev = dev;
@@ -1016,8 +1018,10 @@
 	if (r->arp_flags & ATF_PERM)
 		r->arp_flags |= ATF_COM;
 	if (dev == NULL) {
+		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
+							 .tos = RTO_ONLINK } } };
 		struct rtable * rt;
-		if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0)) != 0)
+		if ((err = ip_route_output_key(&rt, &fl)) != 0)
 			return err;
 		dev = rt->u.dst.dev;
 		ip_rt_put(rt);
@@ -1099,8 +1103,10 @@
 	}
 
 	if (dev == NULL) {
+		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
+							 .tos = RTO_ONLINK } } };
 		struct rtable * rt;
-		if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0)) != 0)
+		if ((err = ip_route_output_key(&rt, &fl)) != 0)
 			return err;
 		dev = rt->u.dst.dev;
 		ip_rt_put(rt);
Index: net/ipv4/devinet.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/devinet.c,v
retrieving revision 1.1.1.24
retrieving revision 1.1.1.24.2.1
diff -u -r1.1.1.24 -r1.1.1.24.2.1
--- a/net/ipv4/devinet.c	14 Apr 2004 13:05:41 -0000	1.1.1.24
+++ b/net/ipv4/devinet.c	16 Apr 2004 13:16:21 -0000	1.1.1.24.2.1
@@ -180,7 +180,9 @@
 	/* in_dev_put following below will kill the in_device */
 	write_unlock_bh(&inetdev_lock);
 
-
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_unregister(in_dev->arp_parms);
+#endif
 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 	in_dev_put(in_dev);
 }
@@ -926,6 +928,8 @@
 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 				inet_insert_ifa(ifa);
 			}
+			in_dev->cnf.no_xfrm = 1;
+			in_dev->cnf.no_policy = 1;
 		}
 		ip_mc_up(in_dev);
 		break;
@@ -1132,6 +1136,62 @@
         return ret;
 }
 
+int ipv4_doint_and_flush(ctl_table *ctl, int write,
+			 struct file* filp, void *buffer,
+			 size_t *lenp)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret = proc_dointvec(ctl, write, filp, buffer, lenp);
+
+	if (write && *valp != val)
+		rt_cache_flush(0);
+
+	return ret;
+}
+
+int ipv4_doint_and_flush_strategy(ctl_table *table, int *name, int nlen,
+				  void *oldval, size_t *oldlenp,
+				  void *newval, size_t newlen, 
+				  void **context)
+{
+	int *valp = table->data;
+	int new;
+
+	if (!newval || !newlen)
+		return 0;
+
+	if (newlen != sizeof(int))
+		return -EINVAL;
+
+	if (get_user(new, (int *)newval))
+		return -EFAULT;
+
+	if (new == *valp)
+		return 0;
+
+	if (oldval && oldlenp) {
+		size_t len;
+
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+
+		if (len) {
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if (copy_to_user(oldval, valp, len))
+				return -EFAULT;
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	*valp = new;
+	rt_cache_flush(0);
+	return 1;
+}
+
+
 static struct devinet_sysctl_table
 {
 	struct ctl_table_header *sysctl_header;
@@ -1190,6 +1250,12 @@
 	{NET_IPV4_CONF_ARP_IGNORE, "arp_ignore",
 	 &ipv4_devconf.arp_ignore, sizeof(int), 0644, NULL,
 	 &proc_dointvec},
+	{NET_IPV4_CONF_NOXFRM, "disable_xfrm",
+	 &ipv4_devconf.no_xfrm, sizeof(int), 0644, NULL,
+	 &ipv4_doint_and_flush, &ipv4_doint_and_flush_strategy,},
+	{NET_IPV4_CONF_NOPOLICY, "disable_policy",
+	 &ipv4_devconf.no_policy, sizeof(int), 0644, NULL,
+	 &ipv4_doint_and_flush, &ipv4_doint_and_flush_strategy},
 	{NET_IPV4_CONF_FORCE_IGMP_VERSION, "force_igmp_version",
 	 &ipv4_devconf.force_igmp_version, sizeof(int), 0644, NULL,
 	 &proc_dointvec},
Index: net/ipv4/esp4.c
===================================================================
RCS file: net/ipv4/esp4.c
diff -N net/ipv4/esp4.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv4/esp4.c	16 Apr 2004 13:16:21 -0000	1.5.18.1
@@ -0,0 +1,613 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/random.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+
+#define MAX_SG_ONSTACK 4
+
+/* decapsulation data for use when post-processing */
+struct esp_decap_data {
+	xfrm_address_t	saddr;
+	__u16		sport;
+	__u8		proto;
+};
+
+int esp_output(struct sk_buff *skb)
+{
+	int err;
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x  = dst->xfrm;
+	struct iphdr *iph, *top_iph;
+	struct ip_esp_hdr *esph;
+	struct crypto_tfm *tfm;
+	struct esp_data *esp;
+	struct sk_buff *trailer;
+	struct udphdr *uh = NULL;
+	struct xfrm_encap_tmpl *encap = NULL;
+	int blksize;
+	int clen;
+	int alen;
+	int nfrags;
+	union {
+		struct iphdr	iph;
+		char 		buf[60];
+	} tmp_iph;
+
+	/* First, if the skb is not checksummed, complete checksum. */
+	if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) {
+		err = -EINVAL;
+		goto error_nolock;
+	}
+
+	spin_lock_bh(&x->lock);
+	err = xfrm_check_output(x, skb, AF_INET);
+	if (err)
+		goto error;
+	err = -ENOMEM;
+
+	/* Strip IP header in transport mode. Save it. */
+	if (!x->props.mode) {
+		iph = skb->nh.iph;
+		memcpy(&tmp_iph, iph, iph->ihl*4);
+		__skb_pull(skb, iph->ihl*4);
+	}
+	/* Now skb is pure payload to encrypt */
+
+	/* Round to block size */
+	clen = skb->len;
+
+	esp = x->data;
+	alen = esp->auth.icv_trunc_len;
+	tfm = esp->conf.tfm;
+	blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
+	clen = (clen + 2 + blksize-1)&~(blksize-1);
+	if (esp->conf.padlen)
+		clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+
+	if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0)
+		goto error;
+
+	/* Fill padding... */
+	do {
+		int i;
+		for (i=0; i<clen-skb->len - 2; i++)
+			*(u8*)(trailer->tail + i) = i+1;
+	} while (0);
+	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+	pskb_put(skb, trailer, clen - skb->len);
+
+	encap = x->encap;
+
+	iph = skb->nh.iph;
+	if (x->props.mode) {
+		top_iph = (struct iphdr*)skb_push(skb, x->props.header_len);
+		esph = (struct ip_esp_hdr*)(top_iph+1);
+		if (encap && encap->encap_type) {
+			switch (encap->encap_type) {
+			case UDP_ENCAP_ESPINUDP:
+				uh = (struct udphdr*) esph;
+				esph = (struct ip_esp_hdr*)(uh+1);
+				top_iph->protocol = IPPROTO_UDP;
+				break;
+			default:
+				printk(KERN_INFO
+				       "esp_output(): Unhandled encap: %u\n",
+				       encap->encap_type);
+				top_iph->protocol = IPPROTO_ESP;
+				break;
+			}
+		} else
+			top_iph->protocol = IPPROTO_ESP;
+		*(u8*)(trailer->tail - 1) = IPPROTO_IPIP;
+		top_iph->ihl = 5;
+		top_iph->version = 4;
+		top_iph->tos = iph->tos;	/* DS disclosed */
+		if (x->props.flags & XFRM_STATE_NOECN)
+			IP_ECN_clear(top_iph);
+		top_iph->tot_len = htons(skb->len + alen);
+		top_iph->frag_off = iph->frag_off&htons(IP_DF);
+		if (!(top_iph->frag_off))
+			ip_select_ident(top_iph, dst, 0);
+		top_iph->ttl = iph->ttl;	/* TTL disclosed */
+		top_iph->check = 0;
+		top_iph->saddr = x->props.saddr.a4;
+		top_iph->daddr = x->id.daddr.a4;
+		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+	} else {
+		esph = (struct ip_esp_hdr*)skb_push(skb, x->props.header_len);
+		top_iph = (struct iphdr*)skb_push(skb, iph->ihl*4);
+		memcpy(top_iph, &tmp_iph, iph->ihl*4);
+		if (encap && encap->encap_type) {
+			switch (encap->encap_type) {
+			case UDP_ENCAP_ESPINUDP:
+				uh = (struct udphdr*) esph;
+				esph = (struct ip_esp_hdr*)(uh+1);
+				top_iph->protocol = IPPROTO_UDP;
+				break;
+			default:
+				printk(KERN_INFO
+				       "esp_output(): Unhandled encap: %u\n",
+				       encap->encap_type);
+				top_iph->protocol = IPPROTO_ESP;
+				break;
+			}
+		} else
+			top_iph->protocol = IPPROTO_ESP;
+		iph = &tmp_iph.iph;
+		top_iph->tot_len = htons(skb->len + alen);
+		top_iph->check = 0;
+		top_iph->frag_off = iph->frag_off;
+		*(u8*)(trailer->tail - 1) = iph->protocol;
+	}
+
+	/* this is non-NULL only with UDP Encapsulation */
+	if (encap && uh) {
+		uh->source = encap->encap_sport;
+		uh->dest = encap->encap_dport;
+		uh->len = htons(skb->len + alen - sizeof(struct iphdr));
+		uh->check = 0;
+	}
+
+	esph->spi = x->id.spi;
+	esph->seq_no = htonl(++x->replay.oseq);
+
+	if (esp->conf.ivlen)
+		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+
+	do {
+		struct scatterlist sgbuf[nfrags>MAX_SG_ONSTACK ? 0 : nfrags];
+		struct scatterlist *sg = sgbuf;
+
+		if (unlikely(nfrags > MAX_SG_ONSTACK)) {
+			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
+			if (!sg)
+				goto error;
+		}
+		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
+		crypto_cipher_encrypt(tfm, sg, sg, clen);
+		if (unlikely(sg != sgbuf))
+			kfree(sg);
+	} while (0);
+
+	if (esp->conf.ivlen) {
+		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+	}
+
+	if (esp->auth.icv_full_len) {
+		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
+		              sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
+		pskb_put(skb, trailer, alen);
+	}
+
+	ip_send_check(top_iph);
+
+	skb->nh.raw = skb->data;
+
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+	spin_unlock_bh(&x->lock);
+	if ((skb->dst = dst_pop(dst)) == NULL) {
+		err = -EHOSTUNREACH;
+		goto error_nolock;
+	}
+	return NET_XMIT_BYPASS;
+
+error:
+	spin_unlock_bh(&x->lock);
+error_nolock:
+	kfree_skb(skb);
+	return err;
+}
+
+/*
+ * Note: detecting truncated vs. non-truncated authentication data is very
+ * expensive, so we only support truncated data, which is the recommended
+ * and common case.
+ */
+int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	struct ip_esp_hdr *esph;
+	struct esp_data *esp = x->data;
+	struct sk_buff *trailer;
+	int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+	int alen = esp->auth.icv_trunc_len;
+	int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen;
+	int nfrags;
+	int encap_len = 0;
+
+	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr)))
+		goto out;
+
+	if (elen <= 0 || (elen & (blksize-1)))
+		goto out;
+
+	/* If integrity check is required, do this. */
+	if (esp->auth.icv_full_len) {
+		u8 sum[esp->auth.icv_full_len];
+		u8 sum1[alen];
+		
+		esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+
+		if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
+			BUG();
+
+		if (unlikely(memcmp(sum, sum1, alen))) {
+			x->stats.integrity_failed++;
+			goto out;
+		}
+	}
+
+	if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0)
+		goto out;
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	esph = (struct ip_esp_hdr*)skb->data;
+	iph = skb->nh.iph;
+
+	/* Get ivec. This can be wrong, check against another impls. */
+	if (esp->conf.ivlen)
+		crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
+
+        {
+		u8 nexthdr[2];
+		struct scatterlist sgbuf[nfrags>MAX_SG_ONSTACK ? 0 : nfrags];
+		struct scatterlist *sg = sgbuf;
+		u8 workbuf[60];
+		int padlen;
+
+		if (unlikely(nfrags > MAX_SG_ONSTACK)) {
+			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
+			if (!sg)
+				goto out;
+		}
+		skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen);
+		crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
+		if (unlikely(sg != sgbuf))
+			kfree(sg);
+
+		if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
+			BUG();
+
+		padlen = nexthdr[0];
+		if (padlen+2 >= elen)
+			goto out;
+
+		/* ... check padding bits here. Silly. :-) */ 
+
+		if (x->encap && decap && decap->decap_type) {
+			struct esp_decap_data *encap_data;
+			struct udphdr *uh = (struct udphdr *) (iph+1);
+
+			encap_data = (struct esp_decap_data *) (decap->decap_data);
+			encap_data->proto = 0;
+
+			switch (decap->decap_type) {
+			case UDP_ENCAP_ESPINUDP:
+
+				if ((void*)uh == (void*)esph) {
+					printk(KERN_DEBUG
+					       "esp_input(): Got ESP; expecting ESPinUDP\n");
+					break;
+				}
+
+				encap_data->proto = AF_INET;
+				encap_data->saddr.a4 = iph->saddr;
+				encap_data->sport = uh->source;
+				encap_len = (void*)esph - (void*)uh;
+				if (encap_len != sizeof(*uh))
+				  printk(KERN_DEBUG
+					 "esp_input(): UDP -> ESP: too much room: %d\n",
+					 encap_len);
+				break;
+
+			default:
+				printk(KERN_INFO
+			       "esp_input(): processing unknown encap type: %u\n",
+				       decap->decap_type);
+				break;
+			}
+		}
+
+		iph->protocol = nexthdr[1];
+		pskb_trim(skb, skb->len - alen - padlen - 2);
+		memcpy(workbuf, skb->nh.raw, iph->ihl*4);
+		skb->h.raw = skb_pull(skb, sizeof(struct ip_esp_hdr) + esp->conf.ivlen);
+		skb->nh.raw += encap_len + sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
+		memcpy(skb->nh.raw, workbuf, iph->ihl*4);
+		skb->nh.iph->tot_len = htons(skb->len);
+	}
+
+	return 0;
+
+out:
+	return -EINVAL;
+}
+
+int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+  
+	if (x->encap) {
+		struct xfrm_encap_tmpl *encap;
+		struct esp_decap_data *decap_data;
+
+		encap = x->encap;
+		decap_data = (struct esp_decap_data *)(decap->decap_data);
+
+		/* first, make sure that the decap type == the encap type */
+		if (encap->encap_type != decap->decap_type)
+			return -EINVAL;
+
+		/* Next, if we don't have an encap type, then ignore it */
+		if (!encap->encap_type)
+			return 0;
+
+		switch (encap->encap_type) {
+		case UDP_ENCAP_ESPINUDP:
+			/*
+			 * 1) if the NAT-T peer's IP or port changed then
+			 *    advertize the change to the keying daemon.
+			 *    This is an inbound SA, so just compare
+			 *    SRC ports.
+			 */
+			if (decap_data->proto == AF_INET &&
+			    (decap_data->saddr.a4 != x->props.saddr.a4 ||
+			     decap_data->sport != encap->encap_sport)) {
+				xfrm_address_t ipaddr;
+
+				ipaddr.a4 = decap_data->saddr.a4;
+				km_new_mapping(x, &ipaddr, decap_data->sport);
+					
+				/* XXX: perhaps add an extra
+				 * policy check here, to see
+				 * if we should allow or
+				 * reject a packet from a
+				 * different source
+				 * address/port.
+				 */
+			}
+		
+			/*
+			 * 2) ignore UDP/TCP checksums in case
+			 *    of NAT-T in Transport Mode, or
+			 *    perform other post-processing fixes
+			 *    as per * draft-ietf-ipsec-udp-encaps-06,
+			 *    section 3.1.2
+			 */
+			if (!x->props.mode)
+				skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+			break;
+		default:
+			printk(KERN_INFO
+			       "esp4_post_input(): Unhandled encap type: %u\n",
+			       encap->encap_type);
+			break;
+		}
+	}
+	return 0;
+}
+
+static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
+{
+	struct esp_data *esp = x->data;
+	u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+
+	if (x->props.mode) {
+		mtu = (mtu + 2 + blksize-1)&~(blksize-1);
+	} else {
+		/* The worst case. */
+		mtu += 2 + blksize;
+	}
+	if (esp->conf.padlen)
+		mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+
+	return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+}
+
+void esp4_err(struct sk_buff *skb, u32 info)
+{
+	struct iphdr *iph = (struct iphdr*)skb->data;
+	struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2));
+	struct xfrm_state *x;
+
+	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
+	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+		return;
+
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
+	if (!x)
+		return;
+	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
+	       ntohl(esph->spi), ntohl(iph->daddr));
+	xfrm_state_put(x);
+}
+
+void esp_destroy(struct xfrm_state *x)
+{
+	struct esp_data *esp = x->data;
+
+	if (!esp)
+		return;
+
+	if (esp->conf.tfm) {
+		crypto_free_tfm(esp->conf.tfm);
+		esp->conf.tfm = NULL;
+	}
+	if (esp->conf.ivec) {
+		kfree(esp->conf.ivec);
+		esp->conf.ivec = NULL;
+	}
+	if (esp->auth.tfm) {
+		crypto_free_tfm(esp->auth.tfm);
+		esp->auth.tfm = NULL;
+	}
+	if (esp->auth.work_icv) {
+		kfree(esp->auth.work_icv);
+		esp->auth.work_icv = NULL;
+	}
+	kfree(esp);
+}
+
+int esp_init_state(struct xfrm_state *x, void *args)
+{
+	struct esp_data *esp = NULL;
+
+	/* null auth and encryption can have zero length keys */
+	if (x->aalg) {
+		if (x->aalg->alg_key_len > 512)
+			goto error;
+	}
+	if (x->ealg == NULL)
+		goto error;
+
+	esp = kmalloc(sizeof(*esp), GFP_KERNEL);
+	if (esp == NULL)
+		return -ENOMEM;
+
+	memset(esp, 0, sizeof(*esp));
+
+	if (x->aalg) {
+		struct xfrm_algo_desc *aalg_desc;
+
+		esp->auth.key = x->aalg->alg_key;
+		esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
+		esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
+		if (esp->auth.tfm == NULL)
+			goto error;
+		esp->auth.icv = esp_hmac_digest;
+
+		aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name);
+		BUG_ON(!aalg_desc);
+
+		if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+		    crypto_tfm_alg_digestsize(esp->auth.tfm)) {
+			printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+			       x->aalg->alg_name,
+			       crypto_tfm_alg_digestsize(esp->auth.tfm),
+			       aalg_desc->uinfo.auth.icv_fullbits/8);
+			goto error;
+		}
+
+		esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
+		esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+
+		esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL);
+		if (!esp->auth.work_icv)
+			goto error;
+	}
+	esp->conf.key = x->ealg->alg_key;
+	esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
+	if (x->props.ealgo == SADB_EALG_NULL)
+		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
+	else
+		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
+	if (esp->conf.tfm == NULL)
+		goto error;
+	esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
+	esp->conf.padlen = 0;
+	if (esp->conf.ivlen) {
+		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
+		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+	}
+	crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len);
+	x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct iphdr);
+	if (x->encap) {
+		struct xfrm_encap_tmpl *encap = x->encap;
+
+		if (encap->encap_type) {
+			switch (encap->encap_type) {
+			case UDP_ENCAP_ESPINUDP:
+				x->props.header_len += sizeof(struct udphdr);
+				break;
+			default:
+				printk (KERN_INFO
+				"esp_init_state(): Unhandled encap type: %u\n",
+					encap->encap_type);
+				break;
+			}
+		}
+	}
+	x->data = esp;
+	x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
+	return 0;
+
+error:
+	if (esp) {
+		if (esp->auth.tfm)
+			crypto_free_tfm(esp->auth.tfm);
+		if (esp->auth.work_icv)
+			kfree(esp->auth.work_icv);
+		if (esp->conf.tfm)
+			crypto_free_tfm(esp->conf.tfm);
+		kfree(esp);
+	}
+	return -EINVAL;
+}
+
+static struct xfrm_type esp_type =
+{
+	.description	= "ESP4",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_ESP,
+	.init_state	= esp_init_state,
+	.destructor	= esp_destroy,
+	.get_max_size	= esp4_get_max_size,
+	.input		= esp_input,
+	.post_input	= esp_post_input,
+	.output		= esp_output
+};
+
+static struct inet_protocol esp4_protocol = {
+	.handler	=	xfrm4_rcv,
+	.err_handler	=	esp4_err,
+	.no_policy	=	1,
+};
+
+int __init esp4_init(void)
+{
+	struct xfrm_decap_state decap;
+
+	if (sizeof(struct esp_decap_data)  <
+	    sizeof(decap.decap_data)) {
+		extern void decap_data_too_small(void);
+
+		decap_data_too_small();
+	}
+
+	SET_MODULE_OWNER(&esp_type);
+	if (xfrm_register_type(&esp_type, AF_INET) < 0) {
+		printk(KERN_INFO "ip esp init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet_add_protocol(&esp4_protocol, IPPROTO_ESP) < 0) {
+		printk(KERN_INFO "ip esp init: can't add protocol\n");
+		xfrm_unregister_type(&esp_type, AF_INET);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit esp4_fini(void)
+{
+	if (inet_del_protocol(&esp4_protocol, IPPROTO_ESP) < 0)
+		printk(KERN_INFO "ip esp close: can't remove protocol\n");
+	if (xfrm_unregister_type(&esp_type, AF_INET) < 0)
+		printk(KERN_INFO "ip esp close: can't remove xfrm type\n");
+}
+
+module_init(esp4_init);
+module_exit(esp4_fini);
+MODULE_LICENSE("GPL");
Index: net/ipv4/fib_frontend.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/fib_frontend.c,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/net/ipv4/fib_frontend.c	25 Aug 2003 11:44:44 -0000	1.1.1.17
+++ b/net/ipv4/fib_frontend.c	16 Apr 2004 13:16:21 -0000	1.1.1.17.2.1
@@ -144,17 +144,15 @@
 
 struct net_device * ip_dev_find(u32 addr)
 {
-	struct rt_key key;
+	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
 	struct fib_result res;
 	struct net_device *dev = NULL;
 
-	memset(&key, 0, sizeof(key));
-	key.dst = addr;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	res.r = NULL;
 #endif
 
-	if (!local_table || local_table->tb_lookup(local_table, &key, &res)) {
+	if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) {
 		return NULL;
 	}
 	if (res.type != RTN_LOCAL)
@@ -170,7 +168,7 @@
 
 unsigned inet_addr_type(u32 addr)
 {
-	struct rt_key		key;
+	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
 	struct fib_result	res;
 	unsigned ret = RTN_BROADCAST;
 
@@ -179,15 +177,13 @@
 	if (MULTICAST(addr))
 		return RTN_MULTICAST;
 
-	memset(&key, 0, sizeof(key));
-	key.dst = addr;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	res.r = NULL;
 #endif
 	
 	if (local_table) {
 		ret = RTN_UNICAST;
-		if (local_table->tb_lookup(local_table, &key, &res) == 0) {
+		if (local_table->tb_lookup(local_table, &fl, &res) == 0) {
 			ret = res.type;
 			fib_res_put(&res);
 		}
@@ -207,18 +203,15 @@
 			struct net_device *dev, u32 *spec_dst, u32 *itag)
 {
 	struct in_device *in_dev;
-	struct rt_key key;
+	struct flowi fl = { .nl_u = { .ip4_u =
+				      { .daddr = src,
+					.saddr = dst,
+					.tos = tos } },
+			    .iif = oif };
 	struct fib_result res;
 	int no_addr, rpf;
 	int ret;
 
-	key.dst = src;
-	key.src = dst;
-	key.tos = tos;
-	key.oif = 0;
-	key.iif = oif;
-	key.scope = RT_SCOPE_UNIVERSE;
-
 	no_addr = rpf = 0;
 	read_lock(&inetdev_lock);
 	in_dev = __in_dev_get(dev);
@@ -231,7 +224,7 @@
 	if (in_dev == NULL)
 		goto e_inval;
 
-	if (fib_lookup(&key, &res))
+	if (fib_lookup(&fl, &res))
 		goto last_resort;
 	if (res.type != RTN_UNICAST)
 		goto e_inval_res;
@@ -252,10 +245,10 @@
 		goto last_resort;
 	if (rpf)
 		goto e_inval;
-	key.oif = dev->ifindex;
+	fl.oif = dev->ifindex;
 
 	ret = 0;
-	if (fib_lookup(&key, &res) == 0) {
+	if (fib_lookup(&fl, &res) == 0) {
 		if (res.type == RTN_UNICAST) {
 			*spec_dst = FIB_RES_PREFSRC(res);
 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
Index: net/ipv4/fib_hash.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/fib_hash.c,v
retrieving revision 1.1.1.14
retrieving revision 1.1.1.14.2.1
diff -u -r1.1.1.14 -r1.1.1.14.2.1
--- a/net/ipv4/fib_hash.c	25 Aug 2003 11:44:44 -0000	1.1.1.14
+++ b/net/ipv4/fib_hash.c	16 Apr 2004 13:16:21 -0000	1.1.1.14.2.1
@@ -290,7 +290,7 @@
 }
 
 static int
-fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
+fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
 {
 	int err;
 	struct fn_zone *fz;
@@ -299,7 +299,7 @@
 	read_lock(&fib_hash_lock);
 	for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
 		struct fib_node *f;
-		fn_key_t k = fz_key(key->dst, fz);
+		fn_key_t k = fz_key(flp->fl4_dst, fz);
 
 		for (f = fz_chain(k, fz); f; f = f->fn_next) {
 			if (!fn_key_eq(k, f->fn_key)) {
@@ -309,17 +309,17 @@
 					continue;
 			}
 #ifdef CONFIG_IP_ROUTE_TOS
-			if (f->fn_tos && f->fn_tos != key->tos)
+			if (f->fn_tos && f->fn_tos != flp->fl4_tos)
 				continue;
 #endif
 			f->fn_state |= FN_S_ACCESSED;
 
 			if (f->fn_state&FN_S_ZOMBIE)
 				continue;
-			if (f->fn_scope < key->scope)
+			if (f->fn_scope < flp->fl4_scope)
 				continue;
 
-			err = fib_semantic_match(f->fn_type, FIB_INFO(f), key, res);
+			err = fib_semantic_match(f->fn_type, FIB_INFO(f), flp, res);
 			if (err == 0) {
 				res->type = f->fn_type;
 				res->scope = f->fn_scope;
@@ -362,7 +362,7 @@
 }
 
 static void
-fn_hash_select_default(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
+fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
 {
 	int order, last_idx;
 	struct fib_node *f;
Index: net/ipv4/fib_rules.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/fib_rules.c,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/net/ipv4/fib_rules.c	18 Feb 2004 13:36:32 -0000	1.1.1.15
+++ b/net/ipv4/fib_rules.c	16 Apr 2004 13:16:21 -0000	1.1.1.15.2.1
@@ -307,28 +307,28 @@
 	}
 }
 
-int fib_lookup(const struct rt_key *key, struct fib_result *res)
+int fib_lookup(const struct flowi *flp, struct fib_result *res)
 {
 	int err;
 	struct fib_rule *r, *policy;
 	struct fib_table *tb;
 
-	u32 daddr = key->dst;
-	u32 saddr = key->src;
+	u32 daddr = flp->fl4_dst;
+	u32 saddr = flp->fl4_src;
 
 FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
-	NIPQUAD(key->dst), NIPQUAD(key->src));
+	NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
 	read_lock(&fib_rules_lock);
 	for (r = fib_rules; r; r=r->r_next) {
 		if (((saddr^r->r_src) & r->r_srcmask) ||
 		    ((daddr^r->r_dst) & r->r_dstmask) ||
 #ifdef CONFIG_IP_ROUTE_TOS
-		    (r->r_tos && r->r_tos != key->tos) ||
+		    (r->r_tos && r->r_tos != flp->fl4_tos) ||
 #endif
 #ifdef CONFIG_IP_ROUTE_FWMARK
-		    (r->r_fwmark && r->r_fwmark != key->fwmark) ||
+		    (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) ||
 #endif
-		    (r->r_ifindex && r->r_ifindex != key->iif))
+		    (r->r_ifindex && r->r_ifindex != flp->iif))
 			continue;
 
 FRprintk("tb %d r %d ", r->r_table, r->r_action);
@@ -351,7 +351,7 @@
 
 		if ((tb = fib_get_table(r->r_table)) == NULL)
 			continue;
-		err = tb->tb_lookup(tb, key, res);
+		err = tb->tb_lookup(tb, flp, res);
 		if (err == 0) {
 			res->r = policy;
 			if (policy)
@@ -369,13 +369,13 @@
 	return -ENETUNREACH;
 }
 
-void fib_select_default(const struct rt_key *key, struct fib_result *res)
+void fib_select_default(const struct flowi *flp, struct fib_result *res)
 {
 	if (res->r && res->r->r_action == RTN_UNICAST &&
 	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
 		struct fib_table *tb;
 		if ((tb = fib_get_table(res->r->r_table)) != NULL)
-			tb->tb_select_default(tb, key, res);
+			tb->tb_select_default(tb, flp, res);
 	}
 }
 
Index: net/ipv4/fib_semantics.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/fib_semantics.c,v
retrieving revision 1.1.1.18
retrieving revision 1.1.1.18.2.1
diff -u -r1.1.1.18 -r1.1.1.18.2.1
--- a/net/ipv4/fib_semantics.c	25 Aug 2003 11:44:44 -0000	1.1.1.18
+++ b/net/ipv4/fib_semantics.c	16 Apr 2004 13:16:21 -0000	1.1.1.18.2.1
@@ -349,7 +349,6 @@
 	int err;
 
 	if (nh->nh_gw) {
-		struct rt_key key;
 		struct fib_result res;
 
 #ifdef CONFIG_IP_ROUTE_PERVASIVE
@@ -372,16 +371,18 @@
 			nh->nh_scope = RT_SCOPE_LINK;
 			return 0;
 		}
-		memset(&key, 0, sizeof(key));
-		key.dst = nh->nh_gw;
-		key.oif = nh->nh_oif;
-		key.scope = r->rtm_scope + 1;
-
-		/* It is not necessary, but requires a bit of thinking */
-		if (key.scope < RT_SCOPE_LINK)
-			key.scope = RT_SCOPE_LINK;
-		if ((err = fib_lookup(&key, &res)) != 0)
-			return err;
+		{
+			struct flowi fl = { .nl_u = { .ip4_u =
+						      { .daddr = nh->nh_gw,
+							.scope = r->rtm_scope + 1 } },
+					    .oif = nh->nh_oif };
+
+			/* It is not necessary, but requires a bit of thinking */
+			if (fl.fl4_scope < RT_SCOPE_LINK)
+				fl.fl4_scope = RT_SCOPE_LINK;
+			if ((err = fib_lookup(&fl, &res)) != 0)
+				return err;
+		}
 		err = -EINVAL;
 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
 			goto out;
@@ -578,7 +579,7 @@
 }
 
 int 
-fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
+fib_semantic_match(int type, struct fib_info *fi, const struct flowi *flp, struct fib_result *res)
 {
 	int err = fib_props[type].error;
 
@@ -603,7 +604,7 @@
 			for_nexthops(fi) {
 				if (nh->nh_flags&RTNH_F_DEAD)
 					continue;
-				if (!key->oif || key->oif == nh->nh_oif)
+				if (!flp->oif || flp->oif == nh->nh_oif)
 					break;
 			}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -949,7 +950,7 @@
    fair weighted route distribution.
  */
 
-void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
+void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
 {
 	struct fib_info *fi = res->fi;
 	int w;
Index: net/ipv4/icmp.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/icmp.c,v
retrieving revision 1.1.1.25
retrieving revision 1.1.1.25.2.1
diff -u -r1.1.1.25 -r1.1.1.25.2.1
--- a/net/ipv4/icmp.c	14 Apr 2004 13:05:41 -0000	1.1.1.25
+++ b/net/ipv4/icmp.c	16 Apr 2004 13:16:21 -0000	1.1.1.25.2.1
@@ -101,7 +101,6 @@
 	int offset;
 	int data_len;
 
-	unsigned int csum;
 	struct {
 		struct icmphdr icmph;
 		__u32	       times[3];
@@ -139,8 +138,6 @@
   { EHOSTUNREACH,	1 }	/*	ICMP_PREC_CUTOFF	*/
 };
 
-extern int sysctl_ip_default_ttl;
-
 /* Control parameters for ECHO replies. */
 int sysctl_icmp_echo_ignore_all;
 int sysctl_icmp_echo_ignore_broadcasts;
@@ -281,37 +278,47 @@
  *	Checksum each fragment, and on the first include the headers and final checksum.
  */
  
-static int icmp_glue_bits(const void *p, char *to, unsigned int offset, unsigned int fraglen)
+int
+icmp_glue_bits(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 {
-	struct icmp_bxm *icmp_param = (struct icmp_bxm *)p;
-	struct icmphdr *icmph;
+	struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
 	unsigned int csum;
 
-	if (offset) {
-		icmp_param->csum=skb_copy_and_csum_bits(icmp_param->skb,
-							icmp_param->offset+(offset-icmp_param->head_len), 
-							to, fraglen,icmp_param->csum);
-		return 0;
-	}
+	csum = skb_copy_and_csum_bits(icmp_param->skb,
+				      icmp_param->offset + offset,
+				      to, len, 0);
 
-	/*
-	 *	First fragment includes header. Note that we've done
-	 *	the other fragments first, so that we get the checksum
-	 *	for the whole packet here.
-	 */
-	csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
-		to, icmp_param->head_len,
-		icmp_param->csum);
-	csum=skb_copy_and_csum_bits(icmp_param->skb,
-				    icmp_param->offset, 
-				    to+icmp_param->head_len,
-				    fraglen-icmp_param->head_len,
-				    csum);
-	icmph=(struct icmphdr *)to;
-	icmph->checksum = csum_fold(csum);
+	skb->csum = csum_block_add(skb->csum, csum, odd);
 	return 0;
 }
 
+static void
+icmp_push_reply(struct icmp_bxm *icmp_param, struct ipcm_cookie *ipc, struct rtable *rt)
+{
+	struct sk_buff *skb;
+
+	ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
+		       icmp_param->data_len+icmp_param->head_len,
+		       icmp_param->head_len,
+		       ipc, rt, MSG_DONTWAIT);
+
+	if ((skb = skb_peek(&icmp_socket->sk->write_queue)) != NULL) {
+		struct icmphdr *icmph = skb->h.icmph;
+		unsigned int csum = 0;
+		struct sk_buff *skb1;
+
+		skb_queue_walk(&icmp_socket->sk->write_queue, skb1) {
+			csum = csum_add(csum, skb1->csum);
+		}
+		csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
+						 (char*)icmph, icmp_param->head_len,	
+						 csum);
+		icmph->checksum = csum_fold(csum);
+		skb->ip_summed = CHECKSUM_NONE;
+		ip_push_pending_frames(icmp_socket->sk);
+	}
+}
+
 /*
  *	Driving logic for building and sending ICMP messages.
  */
@@ -330,11 +337,9 @@
 		return;
 
 	icmp_param->data.icmph.checksum=0;
-	icmp_param->csum=0;
 	icmp_out_count(icmp_param->data.icmph.type);
 
 	sk->protinfo.af_inet.tos = skb->nh.iph->tos;
-	sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl;
 	daddr = ipc.addr = rt->rt_src;
 	ipc.opt = NULL;
 	if (icmp_param->replyopts.optlen) {
@@ -342,14 +347,18 @@
 		if (ipc.opt->srr)
 			daddr = icmp_param->replyopts.faddr;
 	}
-	if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
-		goto out;
-	if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, 
-			       icmp_param->data.icmph.code)) { 
-		ip_build_xmit(sk, icmp_glue_bits, icmp_param, 
-			      icmp_param->data_len+icmp_param->head_len,
-			      &ipc, rt, MSG_DONTWAIT);
+	{
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = daddr,
+						.saddr = rt->rt_spec_dst,
+						.tos = RT_TOS(skb->nh.iph->tos) } },
+				    .proto = IPPROTO_ICMP };
+		if (ip_route_output_key(&rt, &fl))
+			goto out;
 	}
+	if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, 
+			       icmp_param->data.icmph.code))
+		icmp_push_reply(icmp_param, &ipc, rt);
 	ip_rt_put(rt);
 out:
 	icmp_xmit_unlock();
@@ -446,8 +455,8 @@
 	 *	Restore original addresses if packet has been translated.
 	 */
 	if (rt->rt_flags&RTCF_NAT && IPCB(skb_in)->flags&IPSKB_TRANSLATED) {
-		iph->daddr = rt->key.dst;
-		iph->saddr = rt->key.src;
+		iph->daddr = rt->fl.fl4_dst;
+		iph->saddr = rt->fl.fl4_src;
 	}
 #endif
 
@@ -459,9 +468,14 @@
 		((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) :
 			iph->tos;
 
-	if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0))
-		goto out;
-
+	{
+		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->saddr,
+							 .saddr = saddr,
+							 .tos = RT_TOS(tos) } },
+				    .proto = IPPROTO_ICMP };
+		if (ip_route_output_key(&rt, &fl))
+			goto out;
+	}
 	if (ip_options_echo(&icmp_param.replyopts, skb_in)) 
 		goto ende;
 
@@ -474,17 +488,20 @@
 	icmp_param.data.icmph.code=code;
 	icmp_param.data.icmph.un.gateway = info;
 	icmp_param.data.icmph.checksum=0;
-	icmp_param.csum=0;
 	icmp_param.skb=skb_in;
 	icmp_param.offset=skb_in->nh.raw - skb_in->data;
 	icmp_out_count(icmp_param.data.icmph.type);
 	icmp_socket->sk->protinfo.af_inet.tos = tos;
-	icmp_socket->sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl;
 	ipc.addr = iph->saddr;
 	ipc.opt = &icmp_param.replyopts;
 	if (icmp_param.replyopts.srr) {
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = icmp_param.replyopts.faddr,
+						.saddr = saddr,
+						.tos = RT_TOS(tos) } },
+				    .proto = IPPROTO_ICMP };
 		ip_rt_put(rt);
-		if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0))
+		if (ip_route_output_key(&rt, &fl))
 			goto out;
 	}
 
@@ -493,7 +510,7 @@
 
 	/* RFC says return as much as we can without exceeding 576 bytes. */
 
-	room = rt->u.dst.pmtu;
+	room = dst_pmtu(&rt->u.dst);
 	if (room > 576)
 		room = 576;
 	room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
@@ -504,9 +521,7 @@
 		icmp_param.data_len = room;
 	icmp_param.head_len = sizeof(struct icmphdr);
 
-	ip_build_xmit(icmp_socket->sk, icmp_glue_bits, &icmp_param, 
-		icmp_param.data_len+sizeof(struct icmphdr),
-		&ipc, rt, MSG_DONTWAIT);
+	icmp_push_reply(&icmp_param, &ipc, rt);
 
 ende:
 	ip_rt_put(rt);
@@ -645,24 +660,10 @@
 	 *	we are OK.
 	 */
 
-	ipprot = (struct inet_protocol *) inet_protos[hash];
-	while (ipprot) {
-		struct inet_protocol *nextip;
-
-		nextip = (struct inet_protocol *) ipprot->next;
-	
-		/* 
-		 *	Pass it off to everyone who wants it. 
-		 */
+	ipprot = inet_protos[hash];
+	if (ipprot && ipprot->err_handler)
+		ipprot->err_handler(skb, info);
 
-		/* RFC1122: OK. Passes appropriate ICMP errors to the */
-		/* appropriate protocol layer (MUST), as per 3.2.2. */
-
-		if (protocol == ipprot->protocol && ipprot->err_handler)
- 			ipprot->err_handler(skb, info);
-
-		ipprot = nextip;
-  	}
 out:;
 }
 
@@ -991,7 +992,7 @@
 		icmp_socket_cpu(i)->sk->sndbuf =
 			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
 
-		icmp_socket_cpu(i)->sk->protinfo.af_inet.ttl = MAXTTL;
+		icmp_socket_cpu(i)->sk->protinfo.af_inet.uc_ttl = -1;
 		icmp_socket_cpu(i)->sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
 
 		/* Unhash it so that IP input processing does not even
Index: net/ipv4/igmp.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/igmp.c,v
retrieving revision 1.1.1.20
retrieving revision 1.1.1.20.2.1
diff -u -r1.1.1.20 -r1.1.1.20.2.1
--- a/net/ipv4/igmp.c	14 Apr 2004 13:05:41 -0000	1.1.1.20
+++ b/net/ipv4/igmp.c	16 Apr 2004 13:16:22 -0000	1.1.1.20.2.1
@@ -218,15 +218,6 @@
 
 #define IGMP_SIZE (sizeof(struct igmphdr)+sizeof(struct iphdr)+4)
 
-/* Don't just hand NF_HOOK skb->dst->output, in case netfilter hook
-   changes route */
-static inline int
-output_maybe_reroute(struct sk_buff *skb)
-{
-	return skb->dst->output(skb);
-}
-
-
 static int is_in(struct ip_mc_list *pmc, struct ip_sf_list *psf, int type,
 	int gdeleted, int sdeleted)
 {
@@ -283,13 +274,18 @@
 	u32	dst;
 
 	dst = IGMPV3_ALL_MCR;
-	if (ip_route_output(&rt, dst, 0, 0, dev->ifindex))
-		return 0;
+	{
+		struct flowi fl = { .oif = dev->ifindex,
+				    .nl_u = { .ip4_u = { .daddr = dst } },
+				    .proto = IPPROTO_IGMP };
+		if (ip_route_output_key(&rt, &fl))
+			return 0;
+	}
 	if (rt->rt_src == 0) {
 		ip_rt_put(rt);
 		return 0;
 	}
-	skb = alloc_skb(size + dev->hard_header_len + 15, GFP_ATOMIC);
+	skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
 	if (skb == NULL) {
 		ip_rt_put(rt);
 		return 0;
@@ -298,7 +294,7 @@
 	skb->dst = &rt->u.dst;
 	skb->dev = dev;
 
-	skb_reserve(skb, (dev->hard_header_len+15)&~15);
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
 	skb->nh.iph = pip =(struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
 
@@ -341,7 +337,7 @@
 	pig->csum = ip_compute_csum((void *)skb->h.igmph, igmplen);
 
 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev,
-		       output_maybe_reroute);
+		       dst_output);
 }
 
 static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
@@ -623,14 +619,19 @@
 	else
 		dst = group;
 
-	if (ip_route_output(&rt, dst, 0, 0, dev->ifindex))
-		return -1;
+	{
+		struct flowi fl = { .oif = dev->ifindex,
+				    .nl_u = { .ip4_u = { .daddr = dst } },
+				    .proto = IPPROTO_IGMP };
+		if (ip_route_output_key(&rt, &fl))
+			return -1;
+	}
 	if (rt->rt_src == 0) {
 		ip_rt_put(rt);
 		return -1;
 	}
 
-	skb=alloc_skb(IGMP_SIZE+dev->hard_header_len+15, GFP_ATOMIC);
+	skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC);
 	if (skb == NULL) {
 		ip_rt_put(rt);
 		return -1;
@@ -638,7 +639,7 @@
 
 	skb->dst = &rt->u.dst;
 
-	skb_reserve(skb, (dev->hard_header_len+15)&~15);
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
 	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
 
@@ -666,7 +667,7 @@
 	ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr));
 
 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		       output_maybe_reroute);
+		       dst_output);
 }
 
 static void igmp_gq_timer_expire(unsigned long data)
@@ -874,7 +875,7 @@
 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
 		/* Is it our report looped back? */
-		if (((struct rtable*)skb->dst)->key.iif == 0)
+		if (((struct rtable*)skb->dst)->fl.iif == 0)
 			break;
 		igmp_heard_report(in_dev, ih->group);
 		break;
@@ -1283,6 +1284,8 @@
 
 static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
 {
+	struct flowi fl = { .nl_u = { .ip4_u =
+				      { .daddr = imr->imr_multiaddr.s_addr } } };
 	struct rtable *rt;
 	struct net_device *dev = NULL;
 	struct in_device *idev = NULL;
@@ -1300,7 +1303,7 @@
 		__dev_put(dev);
 	}
 
-	if (!dev && !ip_route_output(&rt, imr->imr_multiaddr.s_addr, 0, 0, 0)) {
+	if (!dev && !ip_route_output_key(&rt, &fl)) {
 		dev = rt->u.dst.dev;
 		ip_rt_put(rt);
 	}
Index: net/ipv4/ip_forward.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/ip_forward.c,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/net/ipv4/ip_forward.c	12 Apr 2001 19:11:39 -0000	1.1.1.15
+++ b/net/ipv4/ip_forward.c	16 Apr 2004 13:16:22 -0000	1.1.1.15.2.1
@@ -40,6 +40,7 @@
 #include <net/checksum.h>
 #include <linux/route.h>
 #include <net/route.h>
+#include <net/xfrm.h>
 
 static inline int ip_forward_finish(struct sk_buff *skb)
 {
@@ -47,36 +48,20 @@
 
 	IP_INC_STATS_BH(IpForwDatagrams);
 
-	if (opt->optlen == 0) {
-#ifdef CONFIG_NET_FASTROUTE
-		struct rtable *rt = (struct rtable*)skb->dst;
-
-		if (rt->rt_flags&RTCF_FAST && !netdev_fastroute_obstacles) {
-			struct dst_entry *old_dst;
-			unsigned h = ((*(u8*)&rt->key.dst)^(*(u8*)&rt->key.src))&NETDEV_FASTROUTE_HMASK;
-
-			write_lock_irq(&skb->dev->fastpath_lock);
-			old_dst = skb->dev->fastpath[h];
-			skb->dev->fastpath[h] = dst_clone(&rt->u.dst);
-			write_unlock_irq(&skb->dev->fastpath_lock);
-
-			dst_release(old_dst);
-		}
-#endif
-		return (ip_send(skb));
-	}
+	if (unlikely(opt->optlen))
+		ip_forward_options(skb);
 
-	ip_forward_options(skb);
-	return (ip_send(skb));
+	return dst_output(skb);
 }
 
 int ip_forward(struct sk_buff *skb)
 {
-	struct net_device *dev2;	/* Output device */
 	struct iphdr *iph;	/* Our header */
 	struct rtable *rt;	/* Route we use */
 	struct ip_options * opt	= &(IPCB(skb)->opt);
-	unsigned short mtu;
+
+	if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
+		goto drop;
 
 	if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
 		return NET_RX_SUCCESS;
@@ -93,32 +78,21 @@
 	 */
 
 	iph = skb->nh.iph;
-	rt = (struct rtable*)skb->dst;
 
 	if (iph->ttl <= 1)
                 goto too_many_hops;
 
-	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
-                goto sr_failed;
-
-	/*
-	 *	Having picked a route we can now send the frame out
-	 *	after asking the firewall permission to do so.
-	 */
+	if (!xfrm4_route_forward(skb))
+		goto drop;
 
-	skb->priority = rt_tos2priority(iph->tos);
-	dev2 = rt->u.dst.dev;
-	mtu = rt->u.dst.pmtu;
+	iph = skb->nh.iph;
+	rt = (struct rtable*)skb->dst;
 
-	/*
-	 *	We now generate an ICMP HOST REDIRECT giving the route
-	 *	we calculated.
-	 */
-	if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr)
-		ip_rt_send_redirect(skb);
+	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+		goto sr_failed;
 
 	/* We are about to mangle packet. Copy it! */
-	if (skb_cow(skb, dev2->hard_header_len))
+	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
 		goto drop;
 	iph = skb->nh.iph;
 
@@ -126,30 +100,17 @@
 	ip_decrease_ttl(iph);
 
 	/*
-	 * We now may allocate a new buffer, and copy the datagram into it.
-	 * If the indicated interface is up and running, kick it.
+	 *	We now generate an ICMP HOST REDIRECT giving the route
+	 *	we calculated.
 	 */
+	if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr)
+		ip_rt_send_redirect(skb);
 
-	if (skb->len > mtu && (ntohs(iph->frag_off) & IP_DF))
-		goto frag_needed;
-
-#ifdef CONFIG_IP_ROUTE_NAT
-	if (rt->rt_flags & RTCF_NAT) {
-		if (ip_do_nat(skb)) {
-			kfree_skb(skb);
-			return NET_RX_BAD;
-		}
-	}
-#endif
+	skb->priority = rt_tos2priority(iph->tos);
 
-	return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev2,
+	return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, rt->u.dst.dev,
 		       ip_forward_finish);
 
-frag_needed:
-	IP_INC_STATS_BH(IpFragFails);
-	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-        goto drop;
-
 sr_failed:
         /*
 	 *	Strict routing permits no gatewaying
Index: net/ipv4/ip_gre.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/ip_gre.c,v
retrieving revision 1.1.1.24
retrieving revision 1.1.1.24.2.1
diff -u -r1.1.1.24 -r1.1.1.24.2.1
--- a/net/ipv4/ip_gre.c	28 Nov 2003 18:26:21 -0000	1.1.1.24
+++ b/net/ipv4/ip_gre.c	16 Apr 2004 13:16:22 -0000	1.1.1.24.2.1
@@ -37,6 +37,7 @@
 #include <net/arp.h>
 #include <net/checksum.h>
 #include <net/inet_ecn.h>
+#include <net/xfrm.h>
 
 #ifdef CONFIG_IPV6
 #include <net/ipv6.h>
@@ -410,6 +411,7 @@
 	u16 flags;
 	int grehlen = (iph->ihl<<2) + 4;
 	struct sk_buff *skb2;
+	struct flowi fl;
 	struct rtable *rt;
 
 	if (p[1] != htons(ETH_P_IP))
@@ -486,7 +488,11 @@
 	skb2->nh.raw = skb2->data;
 
 	/* Try to guess incoming interface */
-	if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
+	memset(&fl, 0, sizeof(fl));
+	fl.fl4_dst = eiph->saddr;
+	fl.fl4_tos = RT_TOS(eiph->tos);
+	fl.proto = IPPROTO_GRE;
+	if (ip_route_output_key(&rt, &fl)) {
 		kfree_skb(skb2);
 		return;
 	}
@@ -496,7 +502,10 @@
 	if (rt->rt_flags&RTCF_LOCAL) {
 		ip_rt_put(rt);
 		rt = NULL;
-		if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
+		fl.fl4_dst = eiph->daddr;
+		fl.fl4_src = eiph->saddr;
+		fl.fl4_tos = eiph->tos;
+		if (ip_route_output_key(&rt, &fl) ||
 		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
 			ip_rt_put(rt);
 			kfree_skb(skb2);
@@ -513,11 +522,11 @@
 
 	/* change mtu on this route */
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-		if (rel_info > skb2->dst->pmtu) {
+		if (rel_info > dst_pmtu(skb2->dst)) {
 			kfree_skb(skb2);
 			return;
 		}
-		skb2->dst->pmtu = rel_info;
+		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
 		rel_info = htonl(rel_info);
 	} else if (type == ICMP_TIME_EXCEEDED) {
 		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
@@ -606,6 +615,8 @@
 
 	read_lock(&ipgre_lock);
 	if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
+		secpath_reset(skb);
+
 		skb->mac.raw = skb->nh.raw;
 		skb->nh.raw = __pskb_pull(skb, offset);
 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
@@ -617,7 +628,7 @@
 #ifdef CONFIG_NET_IPGRE_BROADCAST
 		if (MULTICAST(iph->daddr)) {
 			/* Looped back packet, drop it! */
-			if (((struct rtable*)skb->dst)->key.iif == 0)
+			if (((struct rtable*)skb->dst)->fl.iif == 0)
 				goto drop;
 			tunnel->stat.multicast++;
 			skb->pkt_type = PACKET_BROADCAST;
@@ -665,12 +676,6 @@
 	return(0);
 }
 
-/* Need this wrapper because NF_HOOK takes the function address */
-static inline int do_ip_send(struct sk_buff *skb)
-{
-	return ip_send(skb);
-}
-
 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
@@ -747,9 +752,17 @@
 		tos &= ~1;
 	}
 
-	if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
-		tunnel->stat.tx_carrier_errors++;
-		goto tx_error;
+	{
+		struct flowi fl = { .oif = tunnel->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = dst,
+						.saddr = tiph->saddr,
+						.tos = RT_TOS(tos) } },
+				    .proto = IPPROTO_GRE };
+		if (ip_route_output_key(&rt, &fl)) {
+			tunnel->stat.tx_carrier_errors++;
+			goto tx_error;
+		}
 	}
 	tdev = rt->u.dst.dev;
 
@@ -761,14 +774,14 @@
 
 	df = tiph->frag_off;
 	if (df)
-		mtu = rt->u.dst.pmtu - tunnel->hlen;
+		mtu = dst_pmtu(&rt->u.dst) - tunnel->hlen;
 	else
-		mtu = skb->dst ? skb->dst->pmtu : dev->mtu;
+		mtu = skb->dst ? dst_pmtu(skb->dst) : dev->mtu;
 
-	if (skb->protocol == htons(ETH_P_IP)) {
-		if (skb->dst && mtu < skb->dst->pmtu && mtu >= 68)
-			skb->dst->pmtu = mtu;
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
 
+	if (skb->protocol == htons(ETH_P_IP)) {
 		df |= (old_iph->frag_off&htons(IP_DF));
 
 		if ((old_iph->frag_off&htons(IP_DF)) &&
@@ -782,11 +795,11 @@
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
 		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
 
-		if (rt6 && mtu < rt6->u.dst.pmtu && mtu >= IPV6_MIN_MTU) {
+		if (rt6 && mtu < dst_pmtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
 			if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
 			    rt6->rt6i_dst.plen == 128) {
 				rt6->rt6i_flags |= RTF_MODIFIED;
-				skb->dst->pmtu = mtu;
+				skb->dst->metrics[RTAX_MTU-1] = mtu;
 			}
 		}
 
@@ -807,7 +820,7 @@
 			tunnel->err_count = 0;
 	}
 
-	max_headroom = ((tdev->hard_header_len+15)&~15)+ gre_hlen;
+	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
 
 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
@@ -852,7 +865,7 @@
 			iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
 #endif
 		else
-			iph->ttl = sysctl_ip_default_ttl;
+			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
 	}
 
 	((u16*)(iph+1))[0] = tunnel->parms.o_flags;
@@ -1102,10 +1115,14 @@
 
 	MOD_INC_USE_COUNT;
 	if (MULTICAST(t->parms.iph.daddr)) {
+		struct flowi fl = { .oif = t->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = t->parms.iph.daddr,
+						.saddr = t->parms.iph.saddr,
+						.tos = RT_TOS(t->parms.iph.tos) } },
+				    .proto = IPPROTO_GRE };
 		struct rtable *rt;
-		if (ip_route_output(&rt, t->parms.iph.daddr,
-				    t->parms.iph.saddr, RT_TOS(t->parms.iph.tos), 
-				    t->parms.link)) {
+		if (ip_route_output_key(&rt, &fl)) {
 			MOD_DEC_USE_COUNT;
 			return -EADDRNOTAVAIL;
 		}
@@ -1175,8 +1192,14 @@
 	/* Guess output device to choose reasonable mtu and hard_header_len */
 
 	if (iph->daddr) {
+		struct flowi fl = { .oif = tunnel->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = iph->daddr,
+						.saddr = iph->saddr,
+						.tos = RT_TOS(iph->tos) } },
+				    .proto = IPPROTO_GRE };
 		struct rtable *rt;
-		if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
+		if (!ip_route_output_key(&rt, &fl)) {
 			tdev = rt->u.dst.dev;
 			ip_rt_put(rt);
 		}
@@ -1257,13 +1280,8 @@
 
 
 static struct inet_protocol ipgre_protocol = {
-  ipgre_rcv,             /* GRE handler          */
-  ipgre_err,             /* TUNNEL error control */
-  0,                    /* next                 */
-  IPPROTO_GRE,          /* protocol ID          */
-  0,                    /* copy                 */
-  NULL,                 /* data                 */
-  "GRE"                 /* name                 */
+	.handler	=	ipgre_rcv,
+	.err_handler	=	ipgre_err,
 };
 
 
@@ -1279,9 +1297,13 @@
 {
 	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
 
+	if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
+		printk(KERN_INFO "ipgre init: can't add protocol\n");
+		return -EAGAIN;
+	}
+
 	ipgre_fb_tunnel_dev.priv = (void*)&ipgre_fb_tunnel;
 	register_netdev(&ipgre_fb_tunnel_dev);
-	inet_add_protocol(&ipgre_protocol);
 	return 0;
 }
 
@@ -1289,7 +1311,7 @@
 
 void cleanup_module(void)
 {
-	if ( inet_del_protocol(&ipgre_protocol) < 0 )
+	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
 
 	unregister_netdev(&ipgre_fb_tunnel_dev);
Index: net/ipv4/ip_input.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/ip_input.c,v
retrieving revision 1.1.1.20
retrieving revision 1.1.1.20.2.1
diff -u -r1.1.1.20 -r1.1.1.20.2.1
--- a/net/ipv4/ip_input.c	3 Aug 2002 00:39:46 -0000	1.1.1.20
+++ b/net/ipv4/ip_input.c	16 Apr 2004 13:16:22 -0000	1.1.1.20.2.1
@@ -141,6 +141,7 @@
 #include <net/raw.h>
 #include <net/checksum.h>
 #include <linux/netfilter_ipv4.h>
+#include <net/xfrm.h>
 #include <linux/mroute.h>
 #include <linux/netlink.h>
 
@@ -194,34 +195,13 @@
 	return 0;
 }
 
-/* Handle this out of line, it is rare. */
-static int ip_run_ipprot(struct sk_buff *skb, struct iphdr *iph,
-			 struct inet_protocol *ipprot, int force_copy)
-{
-	int ret = 0;
-
-	do {
-		if (ipprot->protocol == iph->protocol) {
-			struct sk_buff *skb2 = skb;
-			if (ipprot->copy || force_copy)
-				skb2 = skb_clone(skb, GFP_ATOMIC);
-			if(skb2 != NULL) {
-				ret = 1;
-				ipprot->handler(skb2);
-			}
-		}
-		ipprot = (struct inet_protocol *) ipprot->next;
-	} while(ipprot != NULL);
-
-	return ret;
-}
-
 static inline int ip_local_deliver_finish(struct sk_buff *skb)
 {
 	int ihl = skb->nh.iph->ihl*4;
 
 #ifdef CONFIG_NETFILTER_DEBUG
 	nf_debug_ip_local_deliver(skb);
+	skb->nf_debug = 0;
 #endif /*CONFIG_NETFILTER_DEBUG*/
 
 	__skb_pull(skb, ihl);
@@ -239,44 +219,40 @@
 	{
 		/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
 		int protocol = skb->nh.iph->protocol;
-		int hash = protocol & (MAX_INET_PROTOS - 1);
-		struct sock *raw_sk = raw_v4_htable[hash];
+		int hash;
+		struct sock *raw_sk;
 		struct inet_protocol *ipprot;
-		int flag;
+
+	resubmit:
+		hash = protocol & (MAX_INET_PROTOS - 1);
+		raw_sk = raw_v4_htable[hash];
 
 		/* If there maybe a raw socket we must check - if not we
 		 * don't care less
 		 */
-		if(raw_sk != NULL)
-			raw_sk = raw_v4_input(skb, skb->nh.iph, hash);
+		if (raw_sk)
+			raw_v4_input(skb, skb->nh.iph, hash);
 
-		ipprot = (struct inet_protocol *) inet_protos[hash];
-		flag = 0;
-		if(ipprot != NULL) {
-			if(raw_sk == NULL &&
-			   ipprot->next == NULL &&
-			   ipprot->protocol == protocol) {
-				int ret;
-
-				/* Fast path... */
-				ret = ipprot->handler(skb);
-
-				return ret;
-			} else {
-				flag = ip_run_ipprot(skb, skb->nh.iph, ipprot, (raw_sk != NULL));
-			}
-		}
+		if ((ipprot = inet_protos[hash]) != NULL) {
+			int ret;
 
-		/* All protocols checked.
-		 * If this packet was a broadcast, we may *not* reply to it, since that
-		 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
-		 * ICMP reply messages get queued up for transmission...)
-		 */
-		if(raw_sk != NULL) {	/* Shift to last raw user */
-			raw_rcv(raw_sk, skb);
-			sock_put(raw_sk);
-		} else if (!flag) {		/* Free and report errors */
-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);	
+			if (!ipprot->no_policy &&
+			    !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+				kfree_skb(skb);
+				return 0;
+			}
+			ret = ipprot->handler(skb);
+			if (ret < 0) {
+				protocol = -ret;
+				goto resubmit;
+			}
+		} else {
+			if (!raw_sk) {
+				if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+					icmp_send(skb, ICMP_DEST_UNREACH,
+						  ICMP_PROT_UNREACH, 0);
+				}
+			}
 			kfree_skb(skb);
 		}
 	}
@@ -364,7 +340,7 @@
 		}
 	}
 
-	return skb->dst->input(skb);
+	return dst_input(skb);
 
 inhdr_error:
 	IP_INC_STATS_BH(IpInHdrErrors);
Index: net/ipv4/ip_nat_dumb.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/ip_nat_dumb.c,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/net/ipv4/ip_nat_dumb.c	12 Apr 2001 19:11:39 -0000	1.1.1.15
+++ b/net/ipv4/ip_nat_dumb.c	16 Apr 2004 13:16:22 -0000	1.1.1.15.2.1
@@ -117,23 +117,23 @@
 			if (rt->rt_flags&RTCF_SNAT) {
 				if (ciph->daddr != osaddr) {
 					struct   fib_result res;
-					struct   rt_key key;
 					unsigned flags = 0;
-
-					key.src = ciph->daddr;
-					key.dst = ciph->saddr;
-					key.iif = skb->dev->ifindex;
-					key.oif = 0;
+					struct flowi fl = {
+						.iif = skb->dev->ifindex,
+						.nl_u =
+						{ .ip4_u =
+						  { .daddr = ciph->saddr,
+						    .saddr = ciph->daddr,
 #ifdef CONFIG_IP_ROUTE_TOS
-					key.tos = RT_TOS(ciph->tos);
-#endif
-#ifdef CONFIG_IP_ROUTE_FWMARK
-					key.fwmark = 0;
+						    .tos = RT_TOS(ciph->tos)
 #endif
+						  } },
+						.proto = ciph->protocol };
+
 					/* Use fib_lookup() until we get our own
 					 * hash table of NATed hosts -- Rani
 				 	 */
-					if (fib_lookup(&key, &res) == 0) {
+					if (fib_lookup(&fl, &res) == 0) {
 						if (res.r) {
 							ciph->daddr = fib_rules_policy(ciph->daddr, &res, &flags);
 							if (ciph->daddr != idaddr)
Index: net/ipv4/ip_output.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/ip_output.c,v
retrieving revision 1.1.1.24
retrieving revision 1.1.1.24.2.1
diff -u -r1.1.1.24 -r1.1.1.24.2.1
--- a/net/ipv4/ip_output.c	28 Nov 2003 18:26:21 -0000	1.1.1.24
+++ b/net/ipv4/ip_output.c	16 Apr 2004 13:16:22 -0000	1.1.1.24.2.1
@@ -15,6 +15,7 @@
  *		Stefan Becker, <stefanb@yello.ping.de>
  *		Jorge Cwik, <jorge@laser.satlink.net>
  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Hirokazu Takahashi, <taka@valinux.co.jp>
  *
  *	See ip_input.c for original log
  *
@@ -38,6 +39,9 @@
  *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
  *					silently drop skb instead of failing with -EPERM.
  *		Detlev Wengorz	:	Copy protocol for fragments.
+ *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
+ *					datagrams.
+ *		Hirokazu Takahashi:	sendfile() on UDP works now.
  */
 
 #include <asm/uaccess.h>
@@ -108,16 +112,18 @@
 	return 0;
 }
 
-/* Don't just hand NF_HOOK skb->dst->output, in case netfilter hook
-   changes route */
-static inline int
-output_maybe_reroute(struct sk_buff *skb)
+static inline int ip_select_ttl(struct inet_opt *inet, struct dst_entry *dst)
 {
-	return skb->dst->output(skb);
+	int ttl = inet->uc_ttl;
+
+	if (ttl < 0)
+		ttl = dst_metric(dst, RTAX_HOPLIMIT);
+	return ttl;
 }
 
 /* 
  *		Add an ip header to a skbuff and send it out.
+ *
  */
 int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 			  u32 saddr, u32 daddr, struct ip_options *opt)
@@ -138,7 +144,7 @@
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
-	iph->ttl      = sk->protinfo.af_inet.ttl;
+	iph->ttl      = ip_select_ttl(&sk->protinfo.af_inet, &rt->u.dst);
 	iph->daddr    = rt->rt_dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = sk->protocol;
@@ -152,15 +158,34 @@
 	}
 	ip_send_check(iph);
 
+	skb->priority = sk->priority;
+
 	/* Send it out. */
 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		       output_maybe_reroute);
+		       dst_output);
 }
 
 static inline int ip_finish_output2(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
 	struct hh_cache *hh = dst->hh;
+	struct net_device *dev = dst->dev;
+	int hh_len = LL_RESERVED_SPACE(dev);
+
+	/* Be paranoid, rather than too clever. */
+	if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
+		struct sk_buff *skb2;
+
+		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
+		if (skb2 == NULL) {
+			kfree_skb(skb);
+			return -ENOMEM;
+		}
+		if (skb->sk)
+			skb_set_owner_w(skb2, skb->sk);
+		kfree_skb(skb);
+		skb = skb2;
+	}
 
 #ifdef CONFIG_NETFILTER_DEBUG
 	nf_debug_ip_finish_output2(skb);
@@ -184,7 +209,7 @@
 	return -EINVAL;
 }
 
-__inline__ int ip_finish_output(struct sk_buff *skb)
+int ip_finish_output(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dst->dev;
 
@@ -205,10 +230,6 @@
 	 *	If the indicated interface is up and running, send the packet.
 	 */
 	IP_INC_STATS(IpOutRequests);
-#ifdef CONFIG_IP_ROUTE_NAT
-	if (rt->rt_flags & RTCF_NAT)
-		ip_do_nat(skb);
-#endif
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
@@ -253,90 +274,26 @@
 				newskb->dev, ip_dev_loopback_xmit);
 	}
 
-	return ip_finish_output(skb);
+	if (skb->len > dst_pmtu(&rt->u.dst) || skb_shinfo(skb)->frag_list)
+		return ip_fragment(skb, ip_finish_output);
+	else
+		return ip_finish_output(skb);
 }
 
 int ip_output(struct sk_buff *skb)
 {
-#ifdef CONFIG_IP_ROUTE_NAT
-	struct rtable *rt = (struct rtable*)skb->dst;
-#endif
-
 	IP_INC_STATS(IpOutRequests);
 
-#ifdef CONFIG_IP_ROUTE_NAT
-	if (rt->rt_flags&RTCF_NAT)
-		ip_do_nat(skb);
+	if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list) &&
+#ifdef NETIF_F_TSO
+	    !skb_shinfo(skb)->tso_size
+#else
+	    1
 #endif
-
-	return ip_finish_output(skb);
-}
-
-/* Queues a packet to be sent, and starts the transmitter if necessary.  
- * This routine also needs to put in the total length and compute the 
- * checksum.  We use to do this in two stages, ip_build_header() then
- * this, but that scheme created a mess when routes disappeared etc.
- * So we do it all here, and the TCP send engine has been changed to
- * match. (No more unroutable FIN disasters, etc. wheee...)  This will
- * most likely make other reliable transport layers above IP easier
- * to implement under Linux.
- */
-static inline int ip_queue_xmit2(struct sk_buff *skb)
-{
-	struct sock *sk = skb->sk;
-	struct rtable *rt = (struct rtable *)skb->dst;
-	struct net_device *dev;
-	struct iphdr *iph = skb->nh.iph;
-
-	dev = rt->u.dst.dev;
-
-	/* This can happen when the transport layer has segments queued
-	 * with a cached route, and by the time we get here things are
-	 * re-routed to a device with a different MTU than the original
-	 * device.  Sick, but we must cover it.
-	 */
-	if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) {
-		struct sk_buff *skb2;
-
-		skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
-		kfree_skb(skb);
-		if (skb2 == NULL)
-			return -ENOMEM;
-		if (sk)
-			skb_set_owner_w(skb2, sk);
-		skb = skb2;
-		iph = skb->nh.iph;
-	}
-
-	if (skb->len > rt->u.dst.pmtu)
-		goto fragment;
-
-	ip_select_ident(iph, &rt->u.dst, sk);
-
-	/* Add an IP checksum. */
-	ip_send_check(iph);
-
-	skb->priority = sk->priority;
-	return skb->dst->output(skb);
-
-fragment:
-	if (ip_dont_fragment(sk, &rt->u.dst)) {
-		/* Reject packet ONLY if TCP might fragment
-		 * it itself, if were careful enough.
-		 */
-		NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big (len[%u] pmtu[%u]) to self\n",
-				skb->len, rt->u.dst.pmtu));
-
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-			  htonl(rt->u.dst.pmtu));
-		kfree_skb(skb);
-		return -EMSGSIZE;
-	}
-	ip_select_ident(iph, &rt->u.dst, sk);
-	if (skb->ip_summed == CHECKSUM_HW &&
-	    (skb = skb_checksum_help(skb)) == NULL)
-		return -ENOMEM;
-	return ip_fragment(skb, skb->dst->output);
+		)
+		return ip_fragment(skb, ip_finish_output);
+	else
+		return ip_finish_output(skb);
 }
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
@@ -345,6 +302,9 @@
 	struct ip_options *opt = sk->protinfo.af_inet.opt;
 	struct rtable *rt;
 	struct iphdr *iph;
+#ifdef NETIF_F_TSO
+	u32 mtu;
+#endif
 
 	/* Skip all of this if the packet is already routed,
 	 * f.e. by something like SCTP.
@@ -363,14 +323,24 @@
 		if(opt && opt->srr)
 			daddr = opt->faddr;
 
-		/* If this fails, retransmit mechanism of transport layer will
-		 * keep trying until route appears or the connection times itself
-		 * out.
-		 */
-		if (ip_route_output(&rt, daddr, sk->saddr,
-				    RT_CONN_FLAGS(sk),
-				    sk->bound_dev_if))
-			goto no_route;
+		{
+			struct flowi fl = { .oif = sk->bound_dev_if,
+					    .nl_u = { .ip4_u =
+						      { .daddr = daddr,
+							.saddr = sk->saddr,
+							.tos = RT_CONN_FLAGS(sk) } },
+					    .proto = sk->protocol,
+					    .uli_u = { .ports =
+						       { .sport = sk->sport,
+							 .dport = sk->dport } } };
+
+			/* If this fails, retransmit mechanism of transport layer will
+			 * keep trying until route appears or the connection times
+			 * itself out.
+			 */
+			if (ip_route_output_flow(&rt, &fl, sk, 0))
+				goto no_route;
+		}
 		__sk_dst_set(sk, &rt->u.dst);
 		sk->route_caps = rt->u.dst.dev->features;
 	}
@@ -388,7 +358,7 @@
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
-	iph->ttl      = sk->protinfo.af_inet.ttl;
+	iph->ttl      = ip_select_ttl(&sk->protinfo.af_inet, &rt->u.dst);
 	iph->protocol = sk->protocol;
 	iph->saddr    = rt->rt_src;
 	iph->daddr    = rt->rt_dst;
@@ -400,8 +370,30 @@
 		ip_options_build(skb, opt, sk->daddr, rt, 0);
 	}
 
+#ifdef NETIF_F_TSO
+	mtu = dst_pmtu(&rt->u.dst);
+ 	if (skb->len > mtu && (sk->route_caps&NETIF_F_TSO)) {
+		unsigned int hlen;
+
+		/* Hack zone: all this must be done by TCP. */
+		hlen = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		skb_shinfo(skb)->tso_size = mtu - hlen;
+		skb_shinfo(skb)->tso_segs =
+			(skb->len - hlen + skb_shinfo(skb)->tso_size - 1)/
+				skb_shinfo(skb)->tso_size - 1;
+	}
+	ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
+#else
+	ip_select_ident(iph, &rt->u.dst, sk);
+#endif
+
+	/* Add an IP checksum. */
+	ip_send_check(iph);
+
+	skb->priority = sk->priority;
+
 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		       ip_queue_xmit2);
+		       dst_output);
 
 no_route:
 	IP_INC_STATS(IpOutNoRoutes);
@@ -409,336 +401,32 @@
 	return -EHOSTUNREACH;
 }
 
-/*
- *	Build and send a packet, with as little as one copy
- *
- *	Doesn't care much about ip options... option length can be
- *	different for fragment at 0 and other fragments.
- *
- *	Note that the fragment at the highest offset is sent first,
- *	so the getfrag routine can fill in the TCP/UDP checksum header
- *	field in the last fragment it sends... actually it also helps
- * 	the reassemblers, they can put most packets in at the head of
- *	the fragment queue, and they know the total size in advance. This
- *	last feature will measurably improve the Linux fragment handler one
- *	day.
- *
- *	The callback has five args, an arbitrary pointer (copy of frag),
- *	the source IP address (may depend on the routing table), the 
- *	destination address (char *), the offset to copy from, and the
- *	length to be copied.
- */
-
-static int ip_build_xmit_slow(struct sock *sk,
-		  int getfrag (const void *,
-			       char *,
-			       unsigned int,	
-			       unsigned int),
-		  const void *frag,
-		  unsigned length,
-		  struct ipcm_cookie *ipc,
-		  struct rtable *rt,
-		  int flags)
-{
-	unsigned int fraglen, maxfraglen, fragheaderlen;
-	int err;
-	int offset, mf;
-	int mtu;
-	u16 id;
-
-	int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
-	int nfrags=0;
-	struct ip_options *opt = ipc->opt;
-	int df = 0;
-
-	mtu = rt->u.dst.pmtu;
-	if (ip_dont_fragment(sk, &rt->u.dst))
-		df = htons(IP_DF);
-
-	length -= sizeof(struct iphdr);
-
-	if (opt) {
-		fragheaderlen = sizeof(struct iphdr) + opt->optlen;
-		maxfraglen = ((mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
-	} else {
-		fragheaderlen = sizeof(struct iphdr);
-
-		/*
-		 *	Fragheaderlen is the size of 'overhead' on each buffer. Now work
-		 *	out the size of the frames to send.
-		 */
-
-		maxfraglen = ((mtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
-	}
-
-	if (length + fragheaderlen > 0xFFFF) {
-		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
-		return -EMSGSIZE;
-	}
-
-	/*
-	 *	Start at the end of the frame by handling the remainder.
-	 */
-
-	offset = length - (length % (maxfraglen - fragheaderlen));
-
-	/*
-	 *	Amount of memory to allocate for final fragment.
-	 */
-
-	fraglen = length - offset + fragheaderlen;
-
-	if (length-offset==0) {
-		fraglen = maxfraglen;
-		offset -= maxfraglen-fragheaderlen;
-	}
-
-	/*
-	 *	The last fragment will not have MF (more fragments) set.
-	 */
-
-	mf = 0;
-
-	/*
-	 *	Don't fragment packets for path mtu discovery.
-	 */
-
-	if (offset > 0 && sk->protinfo.af_inet.pmtudisc==IP_PMTUDISC_DO) { 
-		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
- 		return -EMSGSIZE;
-	}
-	if (flags&MSG_PROBE)
-		goto out;
-
-	/*
-	 *	Begin outputting the bytes.
-	 */
-
-	id = sk->protinfo.af_inet.id++;
-
-	do {
-		char *data;
-		struct sk_buff * skb;
-
-		/*
-		 *	Get the memory we require with some space left for alignment.
-		 */
-		if (!(flags & MSG_DONTWAIT) || nfrags == 0) {
-			skb = sock_alloc_send_skb(sk, fraglen + hh_len + 15,
-						  (flags & MSG_DONTWAIT), &err);
-		} else {
-			/* On a non-blocking write, we check for send buffer
-			 * usage on the first fragment only.
-			 */
-			skb = sock_wmalloc(sk, fraglen + hh_len + 15, 1,
-					   sk->allocation);
-			if (!skb)
-				err = -ENOBUFS;
-		}
-		if (skb == NULL)
-			goto error;
-
-		/*
-		 *	Fill in the control structures
-		 */
-
-		skb->priority = sk->priority;
-		skb->dst = dst_clone(&rt->u.dst);
-		skb_reserve(skb, hh_len);
-
-		/*
-		 *	Find where to start putting bytes.
-		 */
-
-		data = skb_put(skb, fraglen);
-		skb->nh.iph = (struct iphdr *)data;
-
-		/*
-		 *	Only write IP header onto non-raw packets 
-		 */
-
-		{
-			struct iphdr *iph = (struct iphdr *)data;
-
-			iph->version = 4;
-			iph->ihl = 5;
-			if (opt) {
-				iph->ihl += opt->optlen>>2;
-				ip_options_build(skb, opt,
-						 ipc->addr, rt, offset);
-			}
-			iph->tos = sk->protinfo.af_inet.tos;
-			iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
-			iph->frag_off = htons(offset>>3)|mf|df;
-			iph->id = id;
-			if (!mf) {
-				if (offset || !df) {
-					/* Select an unpredictable ident only
-					 * for packets without DF or having
-					 * been fragmented.
-					 */
-					__ip_select_ident(iph, &rt->u.dst);
-					id = iph->id;
-				}
-
-				/*
-				 *	Any further fragments will have MF set.
-				 */
-				mf = htons(IP_MF);
-			}
-			if (rt->rt_type == RTN_MULTICAST)
-				iph->ttl = sk->protinfo.af_inet.mc_ttl;
-			else
-				iph->ttl = sk->protinfo.af_inet.ttl;
-			iph->protocol = sk->protocol;
-			iph->check = 0;
-			iph->saddr = rt->rt_src;
-			iph->daddr = rt->rt_dst;
-			iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
-			data += iph->ihl*4;
-		}
-
-		/*
-		 *	User data callback
-		 */
-
-		if (getfrag(frag, data, offset, fraglen-fragheaderlen)) {
-			err = -EFAULT;
-			kfree_skb(skb);
-			goto error;
-		}
-
-		offset -= (maxfraglen-fragheaderlen);
-		fraglen = maxfraglen;
-
-		nfrags++;
-
-		err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 
-			      skb->dst->dev, output_maybe_reroute);
-		if (err) {
-			if (err > 0)
-				err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0;
-			if (err)
-				goto error;
-		}
-	} while (offset >= 0);
-
-	if (nfrags>1)
-		ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
-out:
-	return 0;
-
-error:
-	IP_INC_STATS(IpOutDiscards);
-	if (nfrags>1)
-		ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
-	return err; 
-}
-
-/*
- *	Fast path for unfragmented packets.
- */
-int ip_build_xmit(struct sock *sk, 
-		  int getfrag (const void *,
-			       char *,
-			       unsigned int,	
-			       unsigned int),
-		  const void *frag,
-		  unsigned length,
-		  struct ipcm_cookie *ipc,
-		  struct rtable *rt,
-		  int flags)
+static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 {
-	int err;
-	struct sk_buff *skb;
-	int df;
-	struct iphdr *iph;
-
-	/*
-	 *	Try the simple case first. This leaves fragmented frames, and by
-	 *	choice RAW frames within 20 bytes of maximum size(rare) to the long path
-	 */
-
-	if (!sk->protinfo.af_inet.hdrincl) {
-		length += sizeof(struct iphdr);
-
-		/*
-		 * 	Check for slow path.
-		 */
-		if (length > rt->u.dst.pmtu || ipc->opt != NULL)  
-			return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags); 
-	} else {
-		if (length > rt->u.dst.dev->mtu) {
-			ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, rt->u.dst.dev->mtu);
-			return -EMSGSIZE;
-		}
-	}
-	if (flags&MSG_PROBE)
-		goto out;
+	to->pkt_type = from->pkt_type;
+	to->priority = from->priority;
+	to->protocol = from->protocol;
+	to->security = from->security;
+	to->dst = dst_clone(from->dst);
+	to->dev = from->dev;
 
-	/*
-	 *	Do path mtu discovery if needed.
-	 */
-	df = 0;
-	if (ip_dont_fragment(sk, &rt->u.dst))
-		df = htons(IP_DF);
+	/* Copy the flags to each fragment. */
+	IPCB(to)->flags = IPCB(from)->flags;
 
-	/* 
-	 *	Fast path for unfragmented frames without options. 
-	 */ 
-	{
-	int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
-
-	skb = sock_alloc_send_skb(sk, length+hh_len+15,
-				  flags&MSG_DONTWAIT, &err);
-	if(skb==NULL)
-		goto error; 
-	skb_reserve(skb, hh_len);
-	}
-
-	skb->priority = sk->priority;
-	skb->dst = dst_clone(&rt->u.dst);
-
-	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
-
-	if(!sk->protinfo.af_inet.hdrincl) {
-		iph->version=4;
-		iph->ihl=5;
-		iph->tos=sk->protinfo.af_inet.tos;
-		iph->tot_len = htons(length);
-		iph->frag_off = df;
-		iph->ttl=sk->protinfo.af_inet.mc_ttl;
-		ip_select_ident(iph, &rt->u.dst, sk);
-		if (rt->rt_type != RTN_MULTICAST)
-			iph->ttl=sk->protinfo.af_inet.ttl;
-		iph->protocol=sk->protocol;
-		iph->saddr=rt->rt_src;
-		iph->daddr=rt->rt_dst;
-		iph->check=0;
-		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
-		err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
-	}
-	else
-		err = getfrag(frag, (void *)iph, 0, length);
-
-	if (err)
-		goto error_fault;
-
-	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		      output_maybe_reroute);
-	if (err > 0)
-		err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0;
-	if (err)
-		goto error;
-out:
-	return 0;
-
-error_fault:
-	err = -EFAULT;
-	kfree_skb(skb);
-error:
-	IP_INC_STATS(IpOutDiscards);
-	return err; 
+#ifdef CONFIG_NET_SCHED
+	to->tc_index = from->tc_index;
+#endif
+#ifdef CONFIG_NETFILTER
+	to->nfmark = from->nfmark;
+	to->nfcache = from->nfcache;
+	/* Connection association is same as pre-frag packet */
+	nf_conntrack_put(to->nfct);
+	to->nfct = from->nfct;
+	nf_conntrack_get(to->nfct);
+#ifdef CONFIG_NETFILTER_DEBUG
+	to->nf_debug = from->nf_debug;
+#endif
+#endif
 }
 
 /*
@@ -746,8 +434,6 @@
  *	smaller pieces (each of size equal to IP header plus
  *	a block of the data of the original IP data part) that will yet fit in a
  *	single device frame, and queue such a frame for sending.
- *
- *	Yes this is inefficient, feel free to submit a quicker one.
  */
 
 int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
@@ -771,13 +457,111 @@
 
 	iph = skb->nh.iph;
 
+	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+			  htonl(dst_pmtu(&rt->u.dst)));
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
 	/*
 	 *	Setup starting values.
 	 */
 
 	hlen = iph->ihl * 4;
+	mtu = dst_pmtu(&rt->u.dst) - hlen;	/* Size of data space */
+
+	/* When frag_list is given, use it. First, check its validity:
+	 * some transformers could create wrong frag_list or break existing
+	 * one, it is not prohibited. In this case fall back to copying.
+	 *
+	 * LATER: this step can be merged to real generation of fragments,
+	 * we can switch to copy when see the first bad fragment.
+	 */
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *frag;
+		int first_len = skb_pagelen(skb);
+
+		if (first_len - hlen > mtu ||
+		    ((first_len - hlen) & 7) ||
+		    (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
+		    skb_cloned(skb))
+			goto slow_path;
+
+		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+			/* Correct geometry. */
+			if (frag->len > mtu ||
+			    ((frag->len & 7) && frag->next) ||
+			    skb_headroom(frag) < hlen)
+			    goto slow_path;
+
+			/* Correct socket ownership. */
+			if (frag->sk == NULL)
+				goto slow_path;
+
+			/* Partially cloned skb? */
+			if (skb_shared(frag))
+				goto slow_path;
+		}
+
+		/* Everything is OK. Generate! */
+
+		err = 0;
+		offset = 0;
+		frag = skb_shinfo(skb)->frag_list;
+		skb_shinfo(skb)->frag_list = 0;
+		skb->data_len = first_len - skb_headlen(skb);
+		skb->len = first_len;
+		iph->tot_len = htons(first_len);
+		iph->frag_off |= htons(IP_MF);
+		ip_send_check(iph);
+
+		for (;;) {
+			/* Prepare header of the next frame,
+			 * before previous one went down. */
+			if (frag) {
+				frag->h.raw = frag->data;
+				frag->nh.raw = __skb_push(frag, hlen);
+				memcpy(frag->nh.raw, iph, hlen);
+				iph = frag->nh.iph;
+				iph->tot_len = htons(frag->len);
+				ip_copy_metadata(frag, skb);
+				if (offset == 0)
+					ip_options_fragment(frag);
+				offset += skb->len - hlen;
+				iph->frag_off = htons(offset>>3);
+				if (frag->next != NULL)
+					iph->frag_off |= htons(IP_MF);
+				/* Ready, complete checksum */
+				ip_send_check(iph);
+			}
+
+			err = output(skb);
+
+			if (err || !frag)
+				break;
+
+			skb = frag;
+			frag = skb->next;
+			skb->next = NULL;
+		}
+
+		if (err == 0) {
+			IP_INC_STATS(IpFragOKs);
+			return 0;
+		}
+
+		while (frag) {
+			skb = frag->next;
+			kfree_skb(frag);
+			frag = skb;
+		}
+		IP_INC_STATS(IpFragFails);
+		return err;
+	}
+
+slow_path:
 	left = skb->len - hlen;		/* Space per frame */
-	mtu = rt->u.dst.pmtu - hlen;	/* Size of data space */
 	ptr = raw + hlen;		/* Where to start from */
 
 	/*
@@ -805,7 +589,7 @@
 		 *	Allocate buffer.
 		 */
 
-		if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
+		if ((skb2 = alloc_skb(len+hlen+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 			NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
 			err = -ENOMEM;
 			goto fail;
@@ -815,14 +599,11 @@
 		 *	Set up data on packet
 		 */
 
-		skb2->pkt_type = skb->pkt_type;
-		skb2->priority = skb->priority;
-		skb_reserve(skb2, (dev->hard_header_len+15)&~15);
+		ip_copy_metadata(skb2, skb);
+		skb_reserve(skb2, LL_RESERVED_SPACE(rt->u.dst.dev));
 		skb_put(skb2, len + hlen);
 		skb2->nh.raw = skb2->data;
 		skb2->h.raw = skb2->data + hlen;
-		skb2->protocol = skb->protocol;
-		skb2->security = skb->security;
 
 		/*
 		 *	Charge the memory for the fragment to any owner
@@ -831,8 +612,6 @@
 
 		if (skb->sk)
 			skb_set_owner_w(skb2, skb->sk);
-		skb2->dst = dst_clone(skb->dst);
-		skb2->dev = skb->dev;
 
 		/*
 		 *	Copy the packet header into the new buffer.
@@ -862,9 +641,6 @@
 		if (offset == 0)
 			ip_options_fragment(skb);
 
-		/* Copy the flags to each fragment. */
-		IPCB(skb2)->flags = IPCB(skb)->flags;
-
 		/*
 		 *	Added AC : If we are fragmenting a fragment that's not the
 		 *		   last fragment then keep MF on each bit
@@ -874,20 +650,6 @@
 		ptr += len;
 		offset += len;
 
-#ifdef CONFIG_NET_SCHED
-		skb2->tc_index = skb->tc_index;
-#endif
-#ifdef CONFIG_NETFILTER
-		skb2->nfmark = skb->nfmark;
-		skb2->nfcache = skb->nfcache;
-		/* Connection association is same as pre-frag packet */
-		skb2->nfct = skb->nfct;
-		nf_conntrack_get(skb2->nfct);
-#ifdef CONFIG_NETFILTER_DEBUG
-		skb2->nf_debug = skb->nf_debug;
-#endif
-#endif
-
 		/*
 		 *	Put this fragment into the sending queue.
 		 */
@@ -912,40 +674,552 @@
 	return err;
 }
 
+int
+ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
+{
+	struct iovec *iov = from;
+
+	if (skb->ip_summed == CHECKSUM_HW) {
+		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
+			return -EFAULT;
+	} else {
+		unsigned int csum = 0;
+		if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
+			return -EFAULT;
+		skb->csum = csum_block_add(skb->csum, csum, odd);
+	}
+	return 0;
+}
+
+static inline int
+skb_can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
+{
+	if (i) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+		return page == frag->page &&
+			off == frag->page_offset+frag->size;
+	}
+	return 0;
+}
+
+static inline unsigned int
+csum_page(struct page *page, int offset, int copy)
+{
+	char *kaddr;
+	unsigned int csum;
+	kaddr = kmap(page);
+	csum = csum_partial(kaddr + offset, copy, 0);
+	kunmap(page);
+	return csum;
+}
+
 /*
- *	Fetch data from kernel space and fill in checksum if needed.
+ *	ip_append_data() and ip_append_page() can make one large IP datagram
+ *	from many pieces of data. Each pieces will be holded on the socket
+ *	until ip_push_pending_frames() is called. Eache pieces can be a page
+ *	or non-page data.
+ *	
+ *	Not only UDP, other transport protocols - e.g. raw sockets - can use
+ *	this interface potentially.
+ *
+ *	LATER: length must be adjusted by pad at tail, when it is required.
  */
-static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset, 
-			      unsigned int fraglen)
+int ip_append_data(struct sock *sk,
+		   int getfrag(void *from, char *to, int offset, int len,
+			       int odd, struct sk_buff *skb),
+		   void *from, int length, int transhdrlen,
+		   struct ipcm_cookie *ipc, struct rtable *rt,
+		   unsigned int flags)
 {
-        struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
-	u16 *pktp = (u16 *)to;
-	struct iovec *iov; 
-	int len; 
-	int hdrflag = 1; 
-
-	iov = &dp->iov[0]; 
-	if (offset >= iov->iov_len) { 
-		offset -= iov->iov_len;
-		iov++; 
-		hdrflag = 0; 
-	}
-	len = iov->iov_len - offset;
-	if (fraglen > len) { /* overlapping. */ 
-		dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
-					     dp->csum);
-		offset = 0;
-		fraglen -= len; 
-		to += len; 
-		iov++;
+	struct inet_opt *inet = inet_sk(sk);
+	struct sk_buff *skb;
+
+	struct ip_options *opt = NULL;
+	int hh_len;
+	int exthdrlen;
+	int mtu;
+	int copy;
+	int err;
+	int offset = 0;
+	unsigned int maxfraglen, fragheaderlen;
+	int csummode = CHECKSUM_NONE;
+
+	if (flags&MSG_PROBE)
+		return 0;
+
+	if (skb_queue_empty(&sk->write_queue)) {
+		/*
+		 * setup for corking.
+		 */
+		opt = ipc->opt;
+		if (opt) {
+			if (inet->cork.opt == NULL)
+				inet->cork.opt = kmalloc(sizeof(struct ip_options)+40, sk->allocation);
+			memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
+			inet->cork.flags |= IPCORK_OPT;
+			inet->cork.addr = ipc->addr;
+		}
+		dst_hold(&rt->u.dst);
+		inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
+		inet->cork.rt = rt;
+		inet->cork.length = 0;
+		inet->sndmsg_page = NULL;
+		inet->sndmsg_off = 0;
+		if ((exthdrlen = rt->u.dst.header_len) != 0) {
+			length += exthdrlen;
+			transhdrlen += exthdrlen;
+		}
+	} else {
+		rt = inet->cork.rt;
+		if (inet->cork.flags & IPCORK_OPT)
+			opt = inet->cork.opt;
+
+		transhdrlen = 0;
+		exthdrlen = 0;
+		mtu = inet->cork.fragsize;
+	}
+	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+
+	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
+	maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
+
+	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
+		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu-exthdrlen);
+		return -EMSGSIZE;
 	}
 
-	dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen, 
-					     dp->csum); 
+	/*
+	 * transhdrlen > 0 means that this is the first fragment and we wish
+	 * it won't be fragmented in the future.
+	 */
+	if (transhdrlen &&
+	    length + fragheaderlen <= maxfraglen &&
+	    rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
+	    !exthdrlen)
+		csummode = CHECKSUM_HW;
+
+	inet->cork.length += length;
+
+	/* So, what's going on in the loop below?
+	 *
+	 * We use calculated fragment length to generate chained skb,
+	 * each of segments is IP fragment ready for sending to network after
+	 * adding appropriate IP header.
+	 *
+	 * Mistake is:
+	 *
+	 *    If mtu-fragheaderlen is not 0 modulo 8, we generate additional
+	 *    small fragment of length (mtu-fragheaderlen)%8, even though
+	 *    it is not necessary. Not a big bug, but needs a fix.
+	 */
+
+	if ((skb = skb_peek_tail(&sk->write_queue)) == NULL)
+		goto alloc_new_skb;
+
+	while (length > 0) {
+		if ((copy = maxfraglen - skb->len) <= 0) {
+			char *data;
+			unsigned int datalen;
+			unsigned int fraglen;
+			unsigned int alloclen;
+			BUG_TRAP(copy == 0);
+
+alloc_new_skb:
+			datalen = maxfraglen - fragheaderlen;
+			if (datalen > length)
+				datalen = length;
+
+			fraglen = datalen + fragheaderlen;
+			if ((flags & MSG_MORE) && 
+			    !(rt->u.dst.dev->features&NETIF_F_SG))
+				alloclen = maxfraglen;
+			else
+				alloclen = datalen + fragheaderlen;
+
+			/* The last fragment gets additional space at tail.
+			 * Note, with MSG_MORE we overallocate on fragments,
+			 * because we have no idea what fragment will be
+			 * the last.
+			 */
+			if (datalen == length)
+				alloclen += rt->u.dst.trailer_len;
+
+			if (transhdrlen) {
+				skb = sock_alloc_send_skb(sk, 
+						alloclen + hh_len + 15,
+						(flags & MSG_DONTWAIT), &err);
+			} else {
+				skb = NULL;
+				if (atomic_read(&sk->wmem_alloc) <= 2*sk->sndbuf)
+					skb = sock_wmalloc(sk, 
+							   alloclen + hh_len + 15, 1,
+							   sk->allocation);
+				if (unlikely(skb == NULL))
+					err = -ENOBUFS;
+			}
+			if (skb == NULL)
+				goto error;
+
+			/*
+			 *	Fill in the control structures
+			 */
+			skb->ip_summed = csummode;
+			skb->csum = 0;
+			skb_reserve(skb, hh_len);
+
+			/*
+			 *	Find where to start putting bytes.
+			 */
+			data = skb_put(skb, fraglen);
+			skb->nh.raw = data + exthdrlen;
+			data += fragheaderlen;
+			skb->h.raw = data + exthdrlen;
+
+			copy = datalen - transhdrlen;
+			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
+				err = -EFAULT;
+				kfree_skb(skb);
+				goto error;
+			}
 
-	if (hdrflag && dp->csumoffset)
-		*(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
-	return 0;	       
+			offset += copy;
+			length -= datalen;
+			transhdrlen = 0;
+			exthdrlen = 0;
+			csummode = CHECKSUM_NONE;
+
+			/*
+			 * Put the packet on the pending queue.
+			 */
+			__skb_queue_tail(&sk->write_queue, skb);
+			continue;
+		}
+
+		if (copy > length)
+			copy = length;
+
+		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+			unsigned int off;
+
+			off = skb->len;
+			if (getfrag(from, skb_put(skb, copy), 
+					offset, copy, off, skb) < 0) {
+				__skb_trim(skb, off);
+				err = -EFAULT;
+				goto error;
+			}
+		} else {
+			int i = skb_shinfo(skb)->nr_frags;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+			struct page *page = inet->sndmsg_page;
+			int off = inet->sndmsg_off;
+			unsigned int left;
+
+			if (page && (left = PAGE_SIZE - off) > 0) {
+				if (copy >= left)
+					copy = left;
+				if (page != frag->page) {
+					if (i == MAX_SKB_FRAGS) {
+						err = -EMSGSIZE;
+						goto error;
+					}
+					get_page(page);
+	 				skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
+					frag = &skb_shinfo(skb)->frags[i];
+				}
+			} else if (i < MAX_SKB_FRAGS) {
+				if (copy > PAGE_SIZE)
+					copy = PAGE_SIZE;
+				page = alloc_pages(sk->allocation, 0);
+				if (page == NULL)  {
+					err = -ENOMEM;
+					goto error;
+				}
+				inet->sndmsg_page = page;
+				inet->sndmsg_off = 0;
+
+				skb_fill_page_desc(skb, i, page, 0, 0);
+				frag = &skb_shinfo(skb)->frags[i];
+				skb->truesize += PAGE_SIZE;
+				atomic_add(PAGE_SIZE, &sk->wmem_alloc);
+			} else {
+				err = -EMSGSIZE;
+				goto error;
+			}
+			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
+				err = -EFAULT;
+				goto error;
+			}
+			inet->sndmsg_off += copy;
+			frag->size += copy;
+			skb->len += copy;
+			skb->data_len += copy;
+		}
+		offset += copy;
+		length -= copy;
+	}
+
+	return 0;
+
+error:
+	inet->cork.length -= length;
+	IP_INC_STATS(IpOutDiscards);
+	return err; 
+}
+
+ssize_t	ip_append_page(struct sock *sk, struct page *page,
+		       int offset, size_t size, int flags)
+{
+	struct inet_opt *inet = inet_sk(sk);
+	struct sk_buff *skb;
+	struct rtable *rt;
+	struct ip_options *opt = NULL;
+	int hh_len;
+	int mtu;
+	int len;
+	int err;
+	unsigned int maxfraglen, fragheaderlen;
+
+	if (inet->hdrincl)
+		return -EPERM;
+
+	if (flags&MSG_PROBE)
+		return 0;
+
+	if (skb_queue_empty(&sk->write_queue))
+		return -EINVAL;
+
+	rt = inet->cork.rt;
+	if (inet->cork.flags & IPCORK_OPT)
+		opt = inet->cork.opt;
+
+	if (!(rt->u.dst.dev->features&NETIF_F_SG))
+		return -EOPNOTSUPP;
+
+	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	mtu = inet->cork.fragsize;
+
+	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
+	maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
+
+	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
+		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
+		return -EMSGSIZE;
+	}
+
+	if ((skb = skb_peek_tail(&sk->write_queue)) == NULL)
+		return -EINVAL;
+
+	inet->cork.length += size;
+
+	while (size > 0) {
+		int i;
+		if ((len = maxfraglen - skb->len) <= 0) {
+			char *data;
+			struct iphdr *iph;
+			BUG_TRAP(len == 0);
+
+			skb = sock_wmalloc(sk, fragheaderlen + hh_len + 15, 1,
+					   sk->allocation);
+			if (unlikely(!skb)) {
+				err = -ENOBUFS;
+				goto error;
+			}
+
+			/*
+			 *	Fill in the control structures
+			 */
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->csum = 0;
+			skb_reserve(skb, hh_len);
+
+			/*
+			 *	Find where to start putting bytes.
+			 */
+			data = skb_put(skb, fragheaderlen);
+			skb->nh.iph = iph = (struct iphdr *)data;
+			data += fragheaderlen;
+			skb->h.raw = data;
+
+			/*
+			 * Put the packet on the pending queue.
+			 */
+			__skb_queue_tail(&sk->write_queue, skb);
+			continue;
+		}
+
+		i = skb_shinfo(skb)->nr_frags;
+		if (len > size)
+			len = size;
+		if (skb_can_coalesce(skb, i, page, offset)) {
+			skb_shinfo(skb)->frags[i-1].size += len;
+		} else if (i < MAX_SKB_FRAGS) {
+			get_page(page);
+			skb_fill_page_desc(skb, i, page, offset, len);
+		} else {
+			err = -EMSGSIZE;
+			goto error;
+		}
+
+		if (skb->ip_summed == CHECKSUM_NONE) {
+			unsigned int csum;
+			csum = csum_page(page, offset, len);
+			skb->csum = csum_block_add(skb->csum, csum, skb->len);
+		}
+
+		skb->len += len;
+		skb->data_len += len;
+		offset += len;
+		size -= len;
+	}
+	return 0;
+
+error:
+	inet->cork.length -= size;
+	IP_INC_STATS(IpOutDiscards);
+	return err;
+}
+
+/*
+ *	Combined all pending IP fragments on the socket as one IP datagram
+ *	and push them out.
+ */
+int ip_push_pending_frames(struct sock *sk)
+{
+	struct sk_buff *skb, *tmp_skb;
+	struct sk_buff **tail_skb;
+	struct inet_opt *inet = inet_sk(sk);
+	struct ip_options *opt = NULL;
+	struct rtable *rt = inet->cork.rt;
+	struct iphdr *iph;
+	int df = 0;
+	__u8 ttl;
+	int err = 0;
+
+	if ((skb = __skb_dequeue(&sk->write_queue)) == NULL)
+		goto out;
+	tail_skb = &(skb_shinfo(skb)->frag_list);
+
+	/* move skb->data to ip header from ext header */
+	if (skb->data < skb->nh.raw)
+		__skb_pull(skb, skb->nh.raw - skb->data);
+	while ((tmp_skb = __skb_dequeue(&sk->write_queue)) != NULL) {
+		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+		*tail_skb = tmp_skb;
+		tail_skb = &(tmp_skb->next);
+		skb->len += tmp_skb->len;
+		skb->data_len += tmp_skb->len;
+#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
+		skb->truesize += tmp_skb->truesize;
+		__sock_put(tmp_skb->sk);
+		tmp_skb->destructor = NULL;
+		tmp_skb->sk = NULL;
+#endif
+	}
+
+	/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
+	 * to fragment the frame generated here. No matter, what transforms
+	 * how transforms change size of the packet, it will come out.
+	 */
+	if (inet->pmtudisc != IP_PMTUDISC_DO)
+		skb->local_df = 1;
+
+	/* DF bit is set when we want to see DF on outgoing frames.
+	 * If local_df is set too, we still allow to fragment this frame
+	 * locally. */
+	if (inet->pmtudisc == IP_PMTUDISC_DO ||
+	    (!skb_shinfo(skb)->frag_list && ip_dont_fragment(sk, &rt->u.dst)))
+		df = htons(IP_DF);
+
+	if (inet->cork.flags & IPCORK_OPT)
+		opt = inet->cork.opt;
+
+	if (rt->rt_type == RTN_MULTICAST)
+		ttl = inet->mc_ttl;
+	else
+		ttl = ip_select_ttl(inet, &rt->u.dst);
+
+	iph = (struct iphdr *)skb->data;
+	iph->version = 4;
+	iph->ihl = 5;
+	if (opt) {
+		iph->ihl += opt->optlen>>2;
+		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
+	}
+	iph->tos = inet->tos;
+	iph->tot_len = htons(skb->len);
+	iph->frag_off = df;
+	if (!df) {
+		__ip_select_ident(iph, &rt->u.dst);
+	} else {
+		iph->id = htons(inet->id++);
+	}
+	iph->ttl = ttl;
+	iph->protocol = sk->protocol;
+	iph->saddr = rt->rt_src;
+	iph->daddr = rt->rt_dst;
+	ip_send_check(iph);
+
+	skb->priority = sk->priority;
+	skb->dst = dst_clone(&rt->u.dst);
+
+	/* Netfilter gets whole the not fragmented skb. */
+	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 
+		      skb->dst->dev, dst_output);
+	if (err) {
+		if (err > 0)
+			err = inet->recverr ? net_xmit_errno(err) : 0;
+		if (err)
+			goto error;
+	}
+
+out:
+	inet->cork.flags &= ~IPCORK_OPT;
+	if (inet->cork.rt) {
+		ip_rt_put(inet->cork.rt);
+		inet->cork.rt = NULL;
+	}
+	return err;
+
+error:
+	IP_INC_STATS(IpOutDiscards);
+	goto out;
+}
+
+/*
+ *	Throw away all pending data on the socket.
+ */
+void ip_flush_pending_frames(struct sock *sk)
+{
+	struct inet_opt *inet = inet_sk(sk);
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue_tail(&sk->write_queue)) != NULL)
+		kfree_skb(skb);
+
+	inet->cork.flags &= ~IPCORK_OPT;
+	if (inet->cork.opt) {
+		kfree(inet->cork.opt);
+		inet->cork.opt = NULL;
+	}
+	if (inet->cork.rt) {
+		ip_rt_put(inet->cork.rt);
+		inet->cork.rt = NULL;
+	}
+}
+
+
+/*
+ *	Fetch data from kernel space and fill in checksum if needed.
+ */
+static int ip_reply_glue_bits(void *dptr, char *to, int offset, 
+			      int len, int odd, struct sk_buff *skb)
+{
+	unsigned int csum;
+
+	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
+	skb->csum = csum_block_add(skb->csum, csum, odd);
+	return 0;  
 }
 
 /* 
@@ -954,6 +1228,8 @@
  *
  *	Should run single threaded per socket because it uses the sock 
  *     	structure to pass arguments.
+ *
+ *	LATER: switch from ip_build_xmit to ip_append_*
  */
 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
 		   unsigned int len)
@@ -979,8 +1255,19 @@
 			daddr = replyopts.opt.faddr;
 	}
 
-	if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
-		return;
+	{
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = daddr,
+						.saddr = rt->rt_spec_dst,
+						.tos = RT_TOS(skb->nh.iph->tos) } },
+				    /* Not quite clean, but right. */
+				    .uli_u = { .ports =
+					       { .sport = skb->h.th->dest,
+					         .dport = skb->h.th->source } },
+				    .proto = sk->protocol };
+		if (ip_route_output_key(&rt, &fl))
+			return;
+	}
 
 	/* And let IP do all the hard work.
 
@@ -992,7 +1279,15 @@
 	sk->protinfo.af_inet.tos = skb->nh.iph->tos;
 	sk->priority = skb->priority;
 	sk->protocol = skb->nh.iph->protocol;
-	ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
+	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
+		       &ipc, rt, MSG_DONTWAIT);
+	if ((skb = skb_peek(&sk->write_queue)) != NULL) {
+		if (arg->csumoffset >= 0)
+			*((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+		skb->ip_summed = CHECKSUM_NONE;
+		ip_push_pending_frames(sk);
+	}
+
 	bh_unlock_sock(sk);
 
 	ip_rt_put(rt);
Index: net/ipv4/ip_sockglue.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/ip_sockglue.c,v
retrieving revision 1.1.1.22
retrieving revision 1.1.1.22.2.1
diff -u -r1.1.1.22 -r1.1.1.22.2.1
--- a/net/ipv4/ip_sockglue.c	14 Apr 2004 13:05:41 -0000	1.1.1.22
+++ b/net/ipv4/ip_sockglue.c	16 Apr 2004 13:16:22 -0000	1.1.1.22.2.1
@@ -36,6 +36,7 @@
 #include <linux/route.h>
 #include <linux/mroute.h>
 #include <net/route.h>
+#include <net/xfrm.h>
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 #include <net/transp_v6.h>
 #endif
@@ -380,6 +381,7 @@
 
 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
 {
+	struct inet_opt *inet = inet_sk(sk);
 	int val=0,err;
 
 	if (level != SOL_IP)
@@ -431,8 +433,10 @@
 				    (!((1<<sk->state)&(TCPF_LISTEN|TCPF_CLOSE))
 				     && sk->daddr != LOOPBACK4_IPV6)) {
 #endif
+					if (inet->opt)
+						tp->ext_header_len -= inet->opt->optlen;
 					if (opt)
-						tp->ext_header_len = opt->optlen;
+						tp->ext_header_len += opt->optlen;
 					tcp_sync_mss(sk, tp->pmtu_cookie);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 				}
@@ -492,11 +496,9 @@
 		case IP_TTL:
 			if (optlen<1)
 				goto e_inval;
-			if(val==-1)
-				val = sysctl_ip_default_ttl;
-			if(val<1||val>255)
+			if (val != -1 && (val < 1 || val>255))
 				goto e_inval;
-			sk->protinfo.af_inet.ttl=val;
+			sk->protinfo.af_inet.uc_ttl = val;
 			break;
 		case IP_HDRINCL:
 			if(sk->type!=SOCK_RAW) {
@@ -839,6 +841,11 @@
 			sk->protinfo.af_inet.freebind = !!val; 
 	                break;			
  
+		case IP_IPSEC_POLICY:
+		case IP_XFRM_POLICY:
+			err = xfrm_user_policy(sk, optname, optval, optlen);
+			break;
+
 		default:
 #ifdef CONFIG_NETFILTER
 			err = nf_setsockopt(sk, PF_INET, optname, optval, 
@@ -926,7 +933,9 @@
 			val=sk->protinfo.af_inet.tos;
 			break;
 		case IP_TTL:
-			val=sk->protinfo.af_inet.ttl;
+			val = (sk->protinfo.af_inet.uc_ttl == -1 ?
+			       sysctl_ip_default_ttl :
+			       sk->protinfo.af_inet.uc_ttl);
 			break;
 		case IP_HDRINCL:
 			val=sk->protinfo.af_inet.hdrincl;
@@ -940,7 +949,7 @@
 			val = 0;
 			dst = sk_dst_get(sk);
 			if (dst) {
-				val = dst->pmtu;
+				val = dst_pmtu(dst) - dst->header_len;
 				dst_release(dst);
 			}
 			if (!val) {
Index: net/ipv4/ipcomp.c
===================================================================
RCS file: net/ipv4/ipcomp.c
diff -N net/ipv4/ipcomp.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv4/ipcomp.c	16 Apr 2004 13:16:22 -0000	1.7.2.1
@@ -0,0 +1,446 @@
+/*
+ * IP Payload Compression Protocol (IPComp) - RFC3173.
+ *
+ * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ *
+ * Todo:
+ *   - Tunable compression parameters.
+ *   - Compression stats.
+ *   - Adaptive compression.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/icmp.h>
+#include <net/ipcomp.h>
+
+static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err, plen, dlen;
+	struct iphdr *iph;
+	struct ipcomp_data *ipcd = x->data;
+	u8 *start, *scratch = ipcd->scratch;
+	
+	plen = skb->len;
+	dlen = IPCOMP_SCRATCH_SIZE;
+	start = skb->data;
+
+	err = crypto_comp_decompress(ipcd->tfm, start, plen, scratch, &dlen);
+	if (err)
+		goto out;
+
+	if (dlen < (plen + sizeof(struct ip_comp_hdr))) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
+	if (err)
+		goto out;
+		
+	skb_put(skb, dlen - plen);
+	memcpy(skb->data, scratch, dlen);
+	iph = skb->nh.iph;
+	iph->tot_len = htons(dlen + iph->ihl * 4);
+out:	
+	return err;
+}
+
+static int ipcomp_input(struct xfrm_state *x,
+                        struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	u8 nexthdr;
+	int err = 0;
+	struct iphdr *iph;
+	union {
+		struct iphdr	iph;
+		char 		buf[60];
+	} tmp_iph;
+
+
+	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+	    skb_linearize(skb, GFP_ATOMIC) != 0) {
+	    	err = -ENOMEM;
+	    	goto out;
+	}
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	/* Remove ipcomp header and decompress original payload */	
+	iph = skb->nh.iph;
+	memcpy(&tmp_iph, iph, iph->ihl * 4);
+	nexthdr = *(u8 *)skb->data;
+	skb_pull(skb, sizeof(struct ip_comp_hdr));
+	skb->nh.raw += sizeof(struct ip_comp_hdr);
+	memcpy(skb->nh.raw, &tmp_iph, tmp_iph.iph.ihl * 4);
+	iph = skb->nh.iph;
+	iph->tot_len = htons(ntohs(iph->tot_len) - sizeof(struct ip_comp_hdr));
+	iph->protocol = nexthdr;
+	skb->h.raw = skb->data;
+	err = ipcomp_decompress(x, skb);
+
+out:	
+	return err;
+}
+
+static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err, plen, dlen, ihlen;
+	struct iphdr *iph = skb->nh.iph;
+	struct ipcomp_data *ipcd = x->data;
+	u8 *start, *scratch = ipcd->scratch;
+	
+	ihlen = iph->ihl * 4;
+	plen = skb->len - ihlen;
+	dlen = IPCOMP_SCRATCH_SIZE;
+	start = skb->data + ihlen;
+
+	err = crypto_comp_compress(ipcd->tfm, start, plen, scratch, &dlen);
+	if (err)
+		goto out;
+
+	if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+	
+	memcpy(start, scratch, dlen);
+	pskb_trim(skb, ihlen + dlen);
+	
+out:	
+	return err;
+}
+
+static void ipcomp_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct iphdr *iph, *top_iph;
+
+	iph = skb->nh.iph;
+	top_iph = (struct iphdr *)skb_push(skb, sizeof(struct iphdr));
+	top_iph->ihl = 5;
+	top_iph->version = 4;
+	top_iph->tos = iph->tos;
+	top_iph->tot_len = htons(skb->len);
+	if (!(iph->frag_off&htons(IP_DF))) {
+#ifdef NETIF_F_TSO
+		__ip_select_ident(top_iph, dst, 0);
+#else
+		__ip_select_ident(top_iph, dst);
+#endif
+	}
+	top_iph->ttl = iph->ttl;
+	top_iph->check = 0;
+	top_iph->saddr = x->props.saddr.a4;
+	top_iph->daddr = x->id.daddr.a4;
+	top_iph->frag_off = iph->frag_off&~htons(IP_MF|IP_OFFSET);
+	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+	skb->nh.raw = skb->data;
+}
+
+static int ipcomp_output(struct sk_buff *skb)
+{
+	int err;
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x = dst->xfrm;
+	struct iphdr *iph, *top_iph;
+	struct ip_comp_hdr *ipch;
+	struct ipcomp_data *ipcd = x->data;
+	union {
+		struct iphdr	iph;
+		char 		buf[60];
+	} tmp_iph;
+	int hdr_len = 0;
+
+	if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) {
+		err = -EINVAL;
+		goto error_nolock;
+	}
+
+	spin_lock_bh(&x->lock);
+	err = xfrm_check_output(x, skb, AF_INET);
+	if (err)
+		goto error;
+
+	/* Don't bother compressing */
+	if (!x->props.mode) {
+		iph = skb->nh.iph;
+		hdr_len = iph->ihl * 4;
+	}
+	if ((skb->len - hdr_len) < ipcd->threshold) {
+		if (x->props.mode) {
+			ipcomp_tunnel_encap(x, skb);
+			iph = skb->nh.iph;
+			iph->protocol = IPPROTO_IPIP;
+			ip_send_check(iph);
+		}
+		goto out_ok;
+	}
+
+	if (x->props.mode) 
+		ipcomp_tunnel_encap(x, skb);
+
+	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+	    skb_linearize(skb, GFP_ATOMIC) != 0) {
+	    	err = -ENOMEM;
+	    	goto error;
+	}
+	
+	err = ipcomp_compress(x, skb);
+	if (err) {
+		if (err == -EMSGSIZE) {
+			if (x->props.mode) {
+				iph = skb->nh.iph;
+				iph->protocol = IPPROTO_IPIP;
+				ip_send_check(iph);
+			}
+			goto out_ok;
+		}
+		goto error;
+	}
+
+	/* Install ipcomp header, convert into ipcomp datagram. */
+	iph = skb->nh.iph;
+	memcpy(&tmp_iph, iph, iph->ihl * 4);
+	top_iph = (struct iphdr *)skb_push(skb, sizeof(struct ip_comp_hdr));
+	memcpy(top_iph, &tmp_iph, iph->ihl * 4);
+	iph = top_iph;
+	if (x->props.mode && (x->props.flags & XFRM_STATE_NOECN))
+		IP_ECN_clear(iph);
+	iph->tot_len = htons(skb->len);
+	iph->protocol = IPPROTO_COMP;
+	iph->check = 0;
+	ipch = (struct ip_comp_hdr *)((char *)iph + iph->ihl * 4);
+	ipch->nexthdr = x->props.mode ? IPPROTO_IPIP : tmp_iph.iph.protocol;
+	ipch->flags = 0;
+	ipch->cpi = htons((u16 )ntohl(x->id.spi));
+	ip_send_check(iph);
+	skb->nh.raw = skb->data;
+
+out_ok:
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+	spin_unlock_bh(&x->lock);
+	
+	if ((skb->dst = dst_pop(dst)) == NULL) {
+		err = -EHOSTUNREACH;
+		goto error_nolock;
+	}
+	err = NET_XMIT_BYPASS;
+
+out_exit:
+	return err;
+error:
+	spin_unlock_bh(&x->lock);
+error_nolock:
+	kfree_skb(skb);
+	goto out_exit;
+}
+
+static void ipcomp4_err(struct sk_buff *skb, u32 info)
+{
+	u32 spi;
+	struct iphdr *iph = (struct iphdr *)skb->data;
+	struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+	struct xfrm_state *x;
+
+	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
+	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+		return;
+
+	spi = ntohl(ntohs(ipch->cpi));
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr,
+	                      spi, IPPROTO_COMP, AF_INET);
+	if (!x)
+		return;
+	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n",
+	       spi, NIPQUAD(iph->daddr));
+	xfrm_state_put(x);
+}
+
+/* We always hold one tunnel user reference to indicate a tunnel */ 
+static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
+{
+	struct xfrm_state *t;
+	
+	t = xfrm_state_alloc();
+	if (t == NULL)
+		goto out;
+
+	t->id.proto = IPPROTO_IPIP;
+	t->id.spi = x->props.saddr.a4;
+	t->id.daddr.a4 = x->id.daddr.a4;
+	memcpy(&t->sel, &x->sel, sizeof(t->sel));
+	t->props.family = AF_INET;
+	t->props.mode = 1;
+	t->props.saddr.a4 = x->props.saddr.a4;
+	t->props.flags = x->props.flags;
+	
+	t->type = xfrm_get_type(IPPROTO_IPIP, t->props.family);
+	if (t->type == NULL)
+		goto error;
+		
+	if (t->type->init_state(t, NULL))
+		goto error;
+
+	t->km.state = XFRM_STATE_VALID;
+	atomic_set(&t->tunnel_users, 1);
+out:
+	return t;
+
+error:
+	t->km.state = XFRM_STATE_DEAD;
+	xfrm_state_put(t);
+	t = NULL;
+	goto out;
+}
+
+/*
+ * Must be protected by xfrm_cfg_sem.  State and tunnel user references are
+ * always incremented on success.
+ */
+static int ipcomp_tunnel_attach(struct xfrm_state *x)
+{
+	int err = 0;
+	struct xfrm_state *t;
+
+	t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4,
+	                      x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
+	if (!t) {
+		t = ipcomp_tunnel_create(x);
+		if (!t) {
+			err = -EINVAL;
+			goto out;
+		}
+		xfrm_state_insert(t);
+		xfrm_state_hold(t);
+	}
+	x->tunnel = t;
+	atomic_inc(&t->tunnel_users);
+out:
+	return err;
+}
+
+static void ipcomp_free_data(struct ipcomp_data *ipcd)
+{
+	if (ipcd->tfm)
+		crypto_free_tfm(ipcd->tfm);
+	if (ipcd->scratch)
+		kfree(ipcd->scratch);	
+}
+
+static void ipcomp_destroy(struct xfrm_state *x)
+{
+	struct ipcomp_data *ipcd = x->data;
+	if (!ipcd)
+		return;
+	ipcomp_free_data(ipcd);
+	kfree(ipcd);
+}
+
+static int ipcomp_init_state(struct xfrm_state *x, void *args)
+{
+	int err;
+	struct ipcomp_data *ipcd;
+	struct xfrm_algo_desc *calg_desc;
+
+	err = -EINVAL;
+	if (!x->calg)
+		goto out;
+
+	err = -ENOMEM;
+	ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
+	if (!ipcd)
+		goto error;
+
+	memset(ipcd, 0, sizeof(*ipcd));
+	x->props.header_len = sizeof(struct ip_comp_hdr);
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct iphdr);
+
+	ipcd->scratch = kmalloc(IPCOMP_SCRATCH_SIZE, GFP_KERNEL);
+	if (!ipcd->scratch)
+		goto error;
+	
+	ipcd->tfm = crypto_alloc_tfm(x->calg->alg_name, 0);
+	if (!ipcd->tfm)
+		goto error;
+
+	if (x->props.mode) {
+		err = ipcomp_tunnel_attach(x);
+		if (err)
+			goto error;
+	}
+
+	calg_desc = xfrm_calg_get_byname(x->calg->alg_name);
+	BUG_ON(!calg_desc);
+	ipcd->threshold = calg_desc->uinfo.comp.threshold;
+	x->data = ipcd;
+	err = 0;
+out:
+	return err;
+
+error:
+	if (ipcd) {
+		ipcomp_free_data(ipcd);
+		kfree(ipcd);
+	}
+	goto out;
+}
+
+static struct xfrm_type ipcomp_type =
+{
+	.description	= "IPCOMP4",
+	.proto	     	= IPPROTO_COMP,
+	.init_state	= ipcomp_init_state,
+	.destructor	= ipcomp_destroy,
+	.input		= ipcomp_input,
+	.output		= ipcomp_output
+};
+
+static struct inet_protocol ipcomp4_protocol = {
+	.handler	=	xfrm4_rcv,
+	.err_handler	=	ipcomp4_err,
+	.no_policy	=	1,
+};
+
+static int __init ipcomp4_init(void)
+{
+	SET_MODULE_OWNER(&ipcomp_type);
+	if (xfrm_register_type(&ipcomp_type, AF_INET) < 0) {
+		printk(KERN_INFO "ipcomp init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet_add_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) {
+		printk(KERN_INFO "ipcomp init: can't add protocol\n");
+		xfrm_unregister_type(&ipcomp_type, AF_INET);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit ipcomp4_fini(void)
+{
+	if (inet_del_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0)
+		printk(KERN_INFO "ip ipcomp close: can't remove protocol\n");
+	if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0)
+		printk(KERN_INFO "ip ipcomp close: can't remove xfrm type\n");
+}
+
+module_init(ipcomp4_init);
+module_exit(ipcomp4_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+
Index: net/ipv4/ipconfig.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/ipconfig.c,v
retrieving revision 1.1.1.26
retrieving revision 1.1.1.26.2.1
diff -u -r1.1.1.26 -r1.1.1.26.2.1
--- a/net/ipv4/ipconfig.c	28 Nov 2003 18:26:21 -0000	1.1.1.26
+++ b/net/ipv4/ipconfig.c	16 Apr 2004 13:16:22 -0000	1.1.1.26.2.1
@@ -655,7 +655,7 @@
 	struct net_device *dev = d->dev;
 	struct sk_buff *skb;
 	struct bootp_pkt *b;
-	int hh_len = (dev->hard_header_len + 15) & ~15;
+	int hh_len = LL_RESERVED_SPACE(dev);
 	struct iphdr *h;
 
 	/* Allocate packet */
Index: net/ipv4/ipip.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/ipip.c,v
retrieving revision 1.1.1.26
retrieving revision 1.1.1.26.2.1
diff -u -r1.1.1.26 -r1.1.1.26.2.1
--- a/net/ipv4/ipip.c	28 Nov 2003 18:26:21 -0000	1.1.1.26
+++ b/net/ipv4/ipip.c	16 Apr 2004 13:16:22 -0000	1.1.1.26.2.1
@@ -115,6 +115,7 @@
 #include <net/protocol.h>
 #include <net/ipip.h>
 #include <net/inet_ecn.h>
+#include <net/xfrm.h>
 
 #define HASH_SIZE  16
 #define HASH(addr) ((addr^(addr>>4))&0xF)
@@ -207,7 +208,7 @@
 	write_unlock_bh(&ipip_lock);
 }
 
-struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 {
 	u32 remote = parms->iph.daddr;
 	u32 local = parms->iph.saddr;
@@ -289,7 +290,7 @@
 	dev_put(dev);
 }
 
-void ipip_err(struct sk_buff *skb, u32 info)
+static void ipip_err(struct sk_buff *skb, void *__unused)
 {
 #ifndef I_WISH_WORLD_WERE_PERFECT
 
@@ -355,6 +356,7 @@
 	int rel_code = 0;
 	int rel_info = 0;
 	struct sk_buff *skb2;
+	struct flowi fl;
 	struct rtable *rt;
 
 	if (len < hlen + sizeof(struct iphdr))
@@ -417,7 +419,11 @@
 	skb2->nh.raw = skb2->data;
 
 	/* Try to guess incoming interface */
-	if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
+	memset(&fl, 0, sizeof(fl));
+	fl.fl4_daddr = eiph->saddr;
+	fl.fl4_tos = RT_TOS(eiph->tos);
+	fl.proto = IPPROTO_IPIP;
+	if (ip_route_output_key(&rt, &key)) {
 		kfree_skb(skb2);
 		return;
 	}
@@ -427,8 +433,11 @@
 	if (rt->rt_flags&RTCF_LOCAL) {
 		ip_rt_put(rt);
 		rt = NULL;
-		if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
-		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
+		fl.fl4_daddr = eiph->daddr;
+		fl.fl4_src = eiph->saddr;
+		fl.fl4_tos = eiph->tos;
+		if (ip_route_output_key(&rt, &fl) ||
+		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 			ip_rt_put(rt);
 			kfree_skb(skb2);
 			return;
@@ -436,7 +445,7 @@
 	} else {
 		ip_rt_put(rt);
 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
-		    skb2->dst->dev->type != ARPHRD_IPGRE) {
+		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
 			kfree_skb(skb2);
 			return;
 		}
@@ -444,11 +453,11 @@
 
 	/* change mtu on this route */
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-		if (rel_info > skb2->dst->pmtu) {
+		if (rel_info > dst_pmtu(skb2->dst)) {
 			kfree_skb(skb2);
 			return;
 		}
-		skb2->dst->pmtu = rel_info;
+		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
 		rel_info = htonl(rel_info);
 	} else if (type == ICMP_TIME_EXCEEDED) {
 		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
@@ -473,7 +482,7 @@
 		IP_ECN_set_ce(inner_iph);
 }
 
-int ipip_rcv(struct sk_buff *skb)
+static int ipip_rcv(struct sk_buff *skb)
 {
 	struct iphdr *iph;
 	struct ip_tunnel *tunnel;
@@ -482,14 +491,22 @@
 		goto out;
 
 	iph = skb->nh.iph;
-	skb->mac.raw = skb->nh.raw;
-	skb->nh.raw = skb->data;
-	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
-	skb->protocol = htons(ETH_P_IP);
-	skb->pkt_type = PACKET_HOST;
 
 	read_lock(&ipip_lock);
 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+			kfree_skb(skb);
+			return 0;
+		}
+
+		secpath_reset(skb);
+
+		skb->mac.raw = skb->nh.raw;
+		skb->nh.raw = skb->data;
+		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+		skb->protocol = htons(ETH_P_IP);
+		skb->pkt_type = PACKET_HOST;
+
 		tunnel->stat.rx_packets++;
 		tunnel->stat.rx_bytes += skb->len;
 		skb->dev = tunnel->dev;
@@ -509,16 +526,8 @@
 	}
 	read_unlock(&ipip_lock);
 
-	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
 out:
-	kfree_skb(skb);
-	return 0;
-}
-
-/* Need this wrapper because NF_HOOK takes the function address */
-static inline int do_ip_send(struct sk_buff *skb)
-{
-	return ip_send(skb);
+	return -1;
 }
 
 /*
@@ -562,9 +571,17 @@
 			goto tx_error_icmp;
 	}
 
-	if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
-		tunnel->stat.tx_carrier_errors++;
-		goto tx_error_icmp;
+	{
+		struct flowi fl = { .oif = tunnel->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = dst,
+						.saddr = tiph->saddr,
+						.tos = RT_TOS(tos) } },
+				    .proto = IPPROTO_IPIP };
+		if (ip_route_output_key(&rt, &fl)) {
+			tunnel->stat.tx_carrier_errors++;
+			goto tx_error_icmp;
+		}
 	}
 	tdev = rt->u.dst.dev;
 
@@ -575,17 +592,17 @@
 	}
 
 	if (tiph->frag_off)
-		mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+		mtu = dst_pmtu(&rt->u.dst) - sizeof(struct iphdr);
 	else
-		mtu = skb->dst ? skb->dst->pmtu : dev->mtu;
+		mtu = skb->dst ? dst_pmtu(skb->dst) : dev->mtu;
 
 	if (mtu < 68) {
 		tunnel->stat.collisions++;
 		ip_rt_put(rt);
 		goto tx_error;
 	}
-	if (skb->dst && mtu < skb->dst->pmtu)
-		skb->dst->pmtu = mtu;
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
 
 	df |= (old_iph->frag_off&htons(IP_DF));
 
@@ -606,7 +623,7 @@
 	/*
 	 * Okay, now see if we can stuff it in the buffer as-is.
 	 */
-	max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
+	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 
 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
@@ -824,8 +841,14 @@
 	ipip_tunnel_init_gen(dev);
 
 	if (iph->daddr) {
+		struct flowi fl = { .oif = tunnel->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = iph->daddr,
+						.saddr = iph->saddr,
+						.tos = RT_TOS(iph->tos) } },
+				    .proto = IPPROTO_IPIP };
 		struct rtable *rt;
-		if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
+		if (!ip_route_output_key(&rt, &fl)) {
 			tdev = rt->u.dst.dev;
 			ip_rt_put(rt);
 		}
@@ -858,7 +881,7 @@
 }
 #endif
 
-int __init ipip_fb_tunnel_init(struct net_device *dev)
+static int __init ipip_fb_tunnel_init(struct net_device *dev)
 {
 	struct iphdr *iph;
 
@@ -878,11 +901,9 @@
 	return 0;
 }
 
-static struct inet_protocol ipip_protocol = {
-	handler:	ipip_rcv,
-	err_handler:	ipip_err,
-	protocol:	IPPROTO_IPIP,
-	name:		"IPIP"
+static struct xfrm_tunnel ipip_handler = {
+	.handler	=	ipip_rcv,
+	.err_handler	=	ipip_err,
 };
 
 static char banner[] __initdata =
@@ -892,16 +913,20 @@
 {
 	printk(banner);
 
+	if (xfrm4_tunnel_register(&ipip_handler) < 0) {
+		printk(KERN_INFO "ipip init: can't register tunnel\n");
+		return -EAGAIN;
+	}
+
 	ipip_fb_tunnel_dev.priv = (void*)&ipip_fb_tunnel;
 	register_netdev(&ipip_fb_tunnel_dev);
-	inet_add_protocol(&ipip_protocol);
 	return 0;
 }
 
 static void __exit ipip_fini(void)
 {
-	if ( inet_del_protocol(&ipip_protocol) < 0 )
-		printk(KERN_INFO "ipip close: can't remove protocol\n");
+	if (xfrm4_tunnel_deregister(&ipip_handler) < 0)
+		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 
 	unregister_netdev(&ipip_fb_tunnel_dev);
 }
Index: net/ipv4/ipmr.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/ipmr.c,v
retrieving revision 1.1.1.23
retrieving revision 1.1.1.23.2.1
diff -u -r1.1.1.23 -r1.1.1.23.2.1
--- a/net/ipv4/ipmr.c	28 Nov 2003 18:26:21 -0000	1.1.1.23
+++ b/net/ipv4/ipmr.c	16 Apr 2004 13:16:22 -0000	1.1.1.23.2.1
@@ -108,7 +108,7 @@
 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 
-extern struct inet_protocol pim_protocol;
+static struct inet_protocol pim_protocol;
 
 static struct timer_list ipmr_expire_timer;
 
@@ -928,23 +928,28 @@
 #ifdef CONFIG_IP_PIMSM
 		case MRT_PIM:
 		{
-			int v;
+			int v, ret;
 			if(get_user(v,(int *)optval))
 				return -EFAULT;
 			v = (v)?1:0;
 			rtnl_lock();
+			ret = 0;
 			if (v != mroute_do_pim) {
 				mroute_do_pim = v;
 				mroute_do_assert = v;
 #ifdef CONFIG_IP_PIMSM_V2
 				if (mroute_do_pim)
-					inet_add_protocol(&pim_protocol);
+					ret = inet_add_protocol(&pim_protocol,
+								IPPROTO_PIM);
 				else
-					inet_del_protocol(&pim_protocol);
+					ret = inet_del_protocol(&pim_protocol,
+								IPPROTO_PIM);
+				if (ret < 0)
+					ret = -EAGAIN;
 #endif
 			}
 			rtnl_unlock();
-			return 0;
+			return ret;
 		}
 #endif
 		/*
@@ -1105,16 +1110,14 @@
 
 static inline int ipmr_forward_finish(struct sk_buff *skb)
 {
-	struct ip_options *opt = &(IPCB(skb)->opt);
-	struct dst_entry *dst = skb->dst;
+	struct ip_options * opt	= &(IPCB(skb)->opt);
+
+	IP_INC_STATS_BH(IpForwDatagrams);
 
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
-	if (skb->len <= dst->pmtu)
-		return dst->output(skb);
-	else
-		return ip_fragment(skb, dst->output);
+	return dst_output(skb);
 }
 
 /*
@@ -1146,17 +1149,28 @@
 #endif
 
 	if (vif->flags&VIFF_TUNNEL) {
-		if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
+		struct flowi fl = { .oif = vif->link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = vif->remote,
+						.saddr = vif->local,
+						.tos = RT_TOS(iph->tos) } },
+				    .proto = IPPROTO_IPIP };
+		if (ip_route_output_key(&rt, &fl))
 			return;
 		encap = sizeof(struct iphdr);
 	} else {
-		if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
+		struct flowi fl = { .oif = vif->link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = iph->daddr,
+						.tos = RT_TOS(iph->tos) } },
+				    .proto = IPPROTO_IPIP };
+		if (ip_route_output_key(&rt, &fl))
 			return;
 	}
 
 	dev = rt->u.dst.dev;
 
-	if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
+	if (skb->len+encap > dst_pmtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
 		/* Do not fragment multicasts. Alas, IPv4 does not
 		   allow to send ICMP, so that packets will disappear
 		   to blackhole.
@@ -1167,7 +1181,7 @@
 		return;
 	}
 
-	encap += dev->hard_header_len;
+	encap += LL_RESERVED_SPACE(dev);
 
 	if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
 		skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
@@ -1244,7 +1258,7 @@
 	if (vif_table[vif].dev != skb->dev) {
 		int true_vifi;
 
-		if (((struct rtable*)skb->dst)->key.iif == 0) {
+		if (((struct rtable*)skb->dst)->fl.iif == 0) {
 			/* It is our own packet, looped back.
 			   Very complicated situation...
 
@@ -1394,19 +1408,15 @@
 	struct net_device  *reg_dev = NULL;
 
 	if (skb_is_nonlinear(skb)) {
-		if (skb_linearize(skb, GFP_ATOMIC) != 0) {
-			kfree_skb(skb);
-			return -ENOMEM;
-		}
+		if (skb_linearize(skb, GFP_ATOMIC) != 0) 
+			goto drop;
 		pim = (struct igmphdr*)skb->h.raw;
 	}
 
         if (!mroute_do_pim ||
 	    skb->len < sizeof(*pim) + sizeof(*encap) ||
-	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
-		kfree_skb(skb);
-                return -EINVAL;
-        }
+	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 
+		goto drop;
 
 	encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
 	/*
@@ -1416,11 +1426,9 @@
 	   c. packet is not truncated
 	 */
 	if (!MULTICAST(encap->daddr) ||
-	    ntohs(encap->tot_len) == 0 ||
-	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
+	    encap->tot_len == 0 ||
+	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
+		goto drop;
 
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
@@ -1429,10 +1437,8 @@
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
 
-	if (reg_dev == NULL) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
+	if (reg_dev == NULL) 
+		goto drop;
 
 	skb->mac.raw = skb->nh.raw;
 	skb_pull(skb, (u8*)encap - skb->data);
@@ -1453,6 +1459,9 @@
 	netif_rx(skb);
 	dev_put(reg_dev);
 	return 0;
+ drop:
+	kfree_skb(skb);
+	return 0;
 }
 #endif
 
@@ -1464,10 +1473,8 @@
 	struct net_device  *reg_dev = NULL;
 
 	if (skb_is_nonlinear(skb)) {
-		if (skb_linearize(skb, GFP_ATOMIC) != 0) {
-			kfree_skb(skb);
-			return -ENOMEM;
-		}
+		if (skb_linearize(skb, GFP_ATOMIC) != 0) 
+			goto drop;
 		pim = (struct pimreghdr*)skb->h.raw;
 	}
 
@@ -1475,19 +1482,15 @@
 	    pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
 	    (pim->flags&PIM_NULL_REGISTER) ||
 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
-	     ip_compute_csum((void *)pim, skb->len))) {
-		kfree_skb(skb);
-                return -EINVAL;
-        }
+	     ip_compute_csum((void *)pim, skb->len))) 
+		goto drop;
 
 	/* check if the inner packet is destined to mcast group */
 	encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
 	if (!MULTICAST(encap->daddr) ||
-	    ntohs(encap->tot_len) == 0 ||
-	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
+	    encap->tot_len == 0 ||
+	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
+		goto drop;
 
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
@@ -1496,10 +1499,8 @@
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
 
-	if (reg_dev == NULL) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
+	if (reg_dev == NULL) 
+		goto drop;
 
 	skb->mac.raw = skb->nh.raw;
 	skb_pull(skb, (u8*)encap - skb->data);
@@ -1520,6 +1521,9 @@
 	netif_rx(skb);
 	dev_put(reg_dev);
 	return 0;
+ drop:
+	kfree_skb(skb);
+	return 0;
 }
 #endif
 
@@ -1732,15 +1736,8 @@
 #endif	
 
 #ifdef CONFIG_IP_PIMSM_V2
-struct inet_protocol pim_protocol = 
-{
-	pim_rcv,		/* PIM handler		*/
-	NULL,			/* PIM error control	*/
-	NULL,			/* next			*/
-	IPPROTO_PIM,		/* protocol ID		*/
-	0,			/* copy			*/
-	NULL,			/* data			*/
-	"PIM"			/* name			*/
+static struct inet_protocol pim_protocol = {
+	.handler	=	pim_rcv,
 };
 #endif
 
Index: net/ipv4/proc.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/proc.c,v
retrieving revision 1.1.1.16
retrieving revision 1.1.1.16.2.1
diff -u -r1.1.1.16 -r1.1.1.16.2.1
--- a/net/ipv4/proc.c	13 Jun 2003 14:51:39 -0000	1.1.1.16
+++ b/net/ipv4/proc.c	16 Apr 2004 13:16:22 -0000	1.1.1.16.2.1
@@ -116,7 +116,6 @@
  
 int snmp_get_info(char *buffer, char **start, off_t offset, int length)
 {
-	extern int sysctl_ip_default_ttl;
 	int len, i;
 
 	len = sprintf (buffer,
Index: net/ipv4/protocol.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/protocol.c,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/net/ipv4/protocol.c	20 May 2001 00:56:43 -0000	1.1.1.15
+++ b/net/ipv4/protocol.c	16 Apr 2004 13:16:22 -0000	1.1.1.15.2.1
@@ -48,134 +48,52 @@
 #include <net/ipip.h>
 #include <linux/igmp.h>
 
-#define IPPROTO_PREVIOUS NULL
-
-#ifdef CONFIG_IP_MULTICAST
-
-static struct inet_protocol igmp_protocol = {
-	handler:	igmp_rcv,
-	next:		IPPROTO_PREVIOUS,
-	protocol:	IPPROTO_IGMP,
-	name:		"IGMP"
-};
-
-#undef  IPPROTO_PREVIOUS
-#define IPPROTO_PREVIOUS &igmp_protocol
-
-#endif
-
-static struct inet_protocol tcp_protocol = {
-	handler:	tcp_v4_rcv,
-	err_handler:	tcp_v4_err,
-	next:		IPPROTO_PREVIOUS,
-	protocol:	IPPROTO_TCP,
-	name:		"TCP"
-};
-
-#undef  IPPROTO_PREVIOUS
-#define IPPROTO_PREVIOUS &tcp_protocol
-
-static struct inet_protocol udp_protocol = {
-	handler:	udp_rcv,
-	err_handler:	udp_err,
-	next:		IPPROTO_PREVIOUS,
-	protocol:	IPPROTO_UDP,
-	name:		"UDP"
-};
-
-#undef  IPPROTO_PREVIOUS
-#define IPPROTO_PREVIOUS &udp_protocol
-
-static struct inet_protocol icmp_protocol = {
-	handler:	icmp_rcv,
-	next:		IPPROTO_PREVIOUS,
-	protocol:	IPPROTO_ICMP,
-	name:		"ICMP"
-};
-
-#undef  IPPROTO_PREVIOUS
-#define IPPROTO_PREVIOUS &icmp_protocol
-
-
-struct inet_protocol *inet_protocol_base = IPPROTO_PREVIOUS;
-
 struct inet_protocol *inet_protos[MAX_INET_PROTOS];
 
 /*
  *	Add a protocol handler to the hash tables
  */
 
-void inet_add_protocol(struct inet_protocol *prot)
+int inet_add_protocol(struct inet_protocol *prot, unsigned char protocol)
 {
-	unsigned char hash;
-	struct inet_protocol *p2;
+	int hash, ret;
+
+	hash = protocol & (MAX_INET_PROTOS - 1);
 
-	hash = prot->protocol & (MAX_INET_PROTOS - 1);
 	br_write_lock_bh(BR_NETPROTO_LOCK);
-	prot ->next = inet_protos[hash];
-	inet_protos[hash] = prot;
-	prot->copy = 0;
-
-	/*
-	 *	Set the copy bit if we need to. 
-	 */
-	 
-	p2 = (struct inet_protocol *) prot->next;
-	while (p2) {
-		if (p2->protocol == prot->protocol) {
-			prot->copy = 1;
-			break;
-		}
-		p2 = (struct inet_protocol *) p2->next;
+
+	if (inet_protos[hash]) {
+		ret = -1;
+	} else {
+		inet_protos[hash] = prot;
+		ret = 0;
 	}
+
 	br_write_unlock_bh(BR_NETPROTO_LOCK);
+
+	return ret;
 }
 
 /*
  *	Remove a protocol from the hash tables.
  */
  
-int inet_del_protocol(struct inet_protocol *prot)
+int inet_del_protocol(struct inet_protocol *prot, unsigned char protocol)
 {
-	struct inet_protocol *p;
-	struct inet_protocol *lp = NULL;
-	unsigned char hash;
-
-	hash = prot->protocol & (MAX_INET_PROTOS - 1);
-	br_write_lock_bh(BR_NETPROTO_LOCK);
-	if (prot == inet_protos[hash]) {
-		inet_protos[hash] = (struct inet_protocol *) inet_protos[hash]->next;
-		br_write_unlock_bh(BR_NETPROTO_LOCK);
-		return 0;
-	}
+	int hash, ret;
 
-	p = (struct inet_protocol *) inet_protos[hash];
+	hash = protocol & (MAX_INET_PROTOS - 1);
 
-	if (p != NULL && p->protocol == prot->protocol)
-		lp = p;
-
-	while (p) {
-		/*
-		 * We have to worry if the protocol being deleted is
-		 * the last one on the list, then we may need to reset
-		 * someone's copied bit.
-		 */
-		if (p->next && p->next == prot) {
-			/*
-			 * if we are the last one with this protocol and
-			 * there is a previous one, reset its copy bit.
-			 */
-			if (prot->copy == 0 && lp != NULL)
-				lp->copy = 0;
-			p->next = prot->next;
-			br_write_unlock_bh(BR_NETPROTO_LOCK);
-			return 0;
-		}
-		if (p->next != NULL && p->next->protocol == prot->protocol) 
-			lp = p->next;
+	br_write_lock_bh(BR_NETPROTO_LOCK);
 
-		p = (struct inet_protocol *) p->next;
+	if (inet_protos[hash] == prot) {
+		inet_protos[hash] = NULL;
+		ret = 0;
+	} else {
+		ret = -1;
 	}
+
 	br_write_unlock_bh(BR_NETPROTO_LOCK);
-	return -1;
+
+	return ret;
 }
Index: net/ipv4/raw.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/raw.c,v
retrieving revision 1.1.1.22
retrieving revision 1.1.1.22.2.1
diff -u -r1.1.1.22 -r1.1.1.22.2.1
--- a/net/ipv4/raw.c	25 Aug 2003 11:44:44 -0000	1.1.1.22
+++ b/net/ipv4/raw.c	16 Apr 2004 13:16:22 -0000	1.1.1.22.2.1
@@ -64,6 +64,8 @@
 #include <net/raw.h>
 #include <net/inet_common.h>
 #include <net/checksum.h>
+#include <net/xfrm.h>
+#include <linux/netfilter_ipv4.h>
 
 struct sock *raw_v4_htable[RAWV4_HTABLE_SIZE];
 rwlock_t raw_v4_lock = RW_LOCK_UNLOCKED;
@@ -132,13 +134,12 @@
 }
 
 /* IP input processing comes here for RAW socket delivery.
- * This is fun as to avoid copies we want to make no surplus
- * copies.
+ * Caller owns SKB, so we must make clones.
  *
  * RFC 1122: SHOULD pass TOS value up to the transport layer.
  * -> It does. And not only TOS, but all IP header.
  */
-struct sock *raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
+void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
 {
 	struct sock *sk;
 
@@ -150,28 +151,19 @@
 			     skb->dev->ifindex);
 
 	while (sk) {
-		struct sock *sknext = __raw_v4_lookup(sk->next, iph->protocol,
-						      iph->saddr, iph->daddr,
-						      skb->dev->ifindex);
-		if (iph->protocol != IPPROTO_ICMP ||
-		    !icmp_filter(sk, skb)) {
-			struct sk_buff *clone;
-
-			if (!sknext)
-				break;
-			clone = skb_clone(skb, GFP_ATOMIC);
+		if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) {
+			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
+
 			/* Not releasing hash table! */
 			if (clone)
 				raw_rcv(sk, clone);
 		}
-		sk = sknext;
+		sk = __raw_v4_lookup(sk->next, iph->protocol,
+				     iph->saddr, iph->daddr,
+				     skb->dev->ifindex);
 	}
 out:
-	if (sk)
-		sock_hold(sk);
 	read_unlock(&raw_v4_lock);
-
-	return sk;
 }
 
 void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
@@ -244,71 +236,92 @@
 
 int raw_rcv(struct sock *sk, struct sk_buff *skb)
 {
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
 	skb_push(skb, skb->data - skb->nh.raw);
 
 	raw_rcv_skb(sk, skb);
 	return 0;
 }
 
-struct rawfakehdr 
-{
-	struct	iovec *iov;
-	u32	saddr;
-	struct	dst_entry *dst;
-};
+static int raw_send_hdrinc(struct sock *sk, void *from, int length,
+			struct rtable *rt, 
+			unsigned int flags)
+{
+	struct inet_opt *inet = inet_sk(sk);
+	int hh_len;
+	struct iphdr *iph;
+	struct sk_buff *skb;
+	int err;
 
-/*
- *	Send a RAW IP packet.
- */
+	if (length > rt->u.dst.dev->mtu) {
+		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport,
+			       rt->u.dst.dev->mtu);
+		return -EMSGSIZE;
+	}
+	if (flags&MSG_PROBE)
+		goto out;
 
-/*
- *	Callback support is trivial for SOCK_RAW
- */
-  
-static int raw_getfrag(const void *p, char *to, unsigned int offset,
-			unsigned int fraglen)
-{
-	struct rawfakehdr *rfh = (struct rawfakehdr *) p;
-	return memcpy_fromiovecend(to, rfh->iov, offset, fraglen);
-}
+	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
 
-/*
- *	IPPROTO_RAW needs extra work.
- */
- 
-static int raw_getrawfrag(const void *p, char *to, unsigned int offset,
-				unsigned int fraglen)
-{
-	struct rawfakehdr *rfh = (struct rawfakehdr *) p;
+	skb = sock_alloc_send_skb(sk, length+hh_len+15,
+				  flags&MSG_DONTWAIT, &err);
+	if (skb == NULL)
+		goto error; 
+	skb_reserve(skb, hh_len);
 
-	if (memcpy_fromiovecend(to, rfh->iov, offset, fraglen))
-		return -EFAULT;
+	skb->priority = sk->priority;
+	skb->dst = dst_clone(&rt->u.dst);
+
+	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
+
+	skb->ip_summed = CHECKSUM_NONE;
 
-	if (!offset) {
-		struct iphdr *iph = (struct iphdr *)to;
+	skb->h.raw = skb->nh.raw;
+	err = memcpy_fromiovecend((void *)iph, from, 0, length);
+	if (err)
+		goto error_fault;
+
+	/* We don't modify invalid header */
+	if (length >= sizeof(*iph) && iph->ihl * 4 <= length) {
 		if (!iph->saddr)
-			iph->saddr = rfh->saddr;
+			iph->saddr = rt->rt_src;
 		iph->check   = 0;
-		iph->tot_len = htons(fraglen); /* This is right as you can't
-						  frag RAW packets */
-		/*
-	 	 *	Deliberate breach of modularity to keep 
-	 	 *	ip_build_xmit clean (well less messy).
-		 */
+		iph->tot_len = htons(length);
 		if (!iph->id)
-			ip_select_ident(iph, rfh->dst, NULL);
+			ip_select_ident(iph, &rt->u.dst, NULL);
+
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 	}
+
+	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+		      dst_output);
+	if (err > 0)
+		err = inet->recverr ? net_xmit_errno(err) : 0;
+	if (err)
+		goto error;
+out:
 	return 0;
+
+error_fault:
+	err = -EFAULT;
+	kfree_skb(skb);
+error:
+	IP_INC_STATS(IpOutDiscards);
+	return err; 
 }
 
 static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
 {
+	struct inet_opt *inet = inet_sk(sk);
 	struct ipcm_cookie ipc;
-	struct rawfakehdr rfh;
 	struct rtable *rt = NULL;
 	int free = 0;
 	u32 daddr;
+	u32 saddr;
 	u8  tos;
 	int err;
 
@@ -378,7 +391,7 @@
 			free = 1;
 	}
 
-	rfh.saddr = ipc.addr;
+	saddr = ipc.addr;
 	ipc.addr = daddr;
 
 	if (!ipc.opt)
@@ -404,12 +417,19 @@
 	if (MULTICAST(daddr)) {
 		if (!ipc.oif)
 			ipc.oif = sk->protinfo.af_inet.mc_index;
-		if (!rfh.saddr)
-			rfh.saddr = sk->protinfo.af_inet.mc_addr;
+		if (!saddr)
+			saddr = sk->protinfo.af_inet.mc_addr;
 	}
 
-	err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif);
-
+	{
+		struct flowi fl = { .oif = ipc.oif,
+				    .nl_u = { .ip4_u =
+					      { .daddr = daddr,
+						.saddr = saddr,
+						.tos = tos } },
+				    .proto = inet->hdrincl ? IPPROTO_RAW : sk->protocol };
+		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
+	}
 	if (err)
 		goto done;
 
@@ -421,14 +441,22 @@
 		goto do_confirm;
 back_from_confirm:
 
-	rfh.iov		= msg->msg_iov;
-	rfh.saddr	= rt->rt_src;
-	rfh.dst		= &rt->u.dst;
-	if (!ipc.addr)
-		ipc.addr = rt->rt_dst;
-	err = ip_build_xmit(sk, sk->protinfo.af_inet.hdrincl ? raw_getrawfrag :
-		       	    raw_getfrag, &rfh, len, &ipc, rt, msg->msg_flags);
-
+	if (inet->hdrincl)
+		err = raw_send_hdrinc(sk, msg->msg_iov, len, 
+					rt, msg->msg_flags);
+	
+	 else {
+		if (!ipc.addr)
+			ipc.addr = rt->rt_dst;
+		lock_sock(sk);
+		err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
+					&ipc, rt, msg->msg_flags);
+		if (err)
+			ip_flush_pending_frames(sk);
+		else if (!(msg->msg_flags & MSG_MORE))
+			err = ip_push_pending_frames(sk);
+		release_sock(sk);
+	}
 done:
 	if (free)
 		kfree(ipc.opt);
Index: net/ipv4/route.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/route.c,v
retrieving revision 1.1.1.30
retrieving revision 1.1.1.30.2.1
diff -u -r1.1.1.30 -r1.1.1.30.2.1
--- a/net/ipv4/route.c	28 Nov 2003 18:26:21 -0000	1.1.1.30
+++ b/net/ipv4/route.c	16 Apr 2004 13:16:22 -0000	1.1.1.30.2.1
@@ -95,6 +95,7 @@
 #include <net/arp.h>
 #include <net/tcp.h>
 #include <net/icmp.h>
+#include <net/xfrm.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
@@ -132,11 +133,10 @@
  */
 
 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
-static struct dst_entry *ipv4_dst_reroute(struct dst_entry *dst,
-					   struct sk_buff *skb);
 static void		 ipv4_dst_destroy(struct dst_entry *dst);
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
+static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 static int rt_garbage_collect(void);
 
 
@@ -145,10 +145,10 @@
 	protocol:		__constant_htons(ETH_P_IP),
 	gc:			rt_garbage_collect,
 	check:			ipv4_dst_check,
-	reroute:		ipv4_dst_reroute,
 	destroy:		ipv4_dst_destroy,
 	negative_advice:	ipv4_negative_advice,
 	link_failure:		ipv4_link_failure,
+	update_pmtu:		ip_rt_update_pmtu,
 	entry_size:		sizeof(struct rtable),
 };
 
@@ -248,11 +248,12 @@
 				r->u.dst.__use,
 				0,
 				(unsigned long)r->rt_src,
-				(r->u.dst.advmss ?
-				 (int) r->u.dst.advmss + 40 : 0),
-				r->u.dst.window,
-				(int)((r->u.dst.rtt >> 3) + r->u.dst.rttvar),
-				r->key.tos,
+				(dst_metric(&r->u.dst, RTAX_ADVMSS) ?
+				 (int) dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
+				dst_metric(&r->u.dst, RTAX_WINDOW),
+				(int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3)
+				      + dst_metric(&r->u.dst, RTAX_RTTVAR)),
+				r->fl.fl4_tos,
 				r->u.dst.hh ?
 					atomic_read(&r->u.dst.hh->hh_refcnt) :
 					-1,
@@ -337,7 +338,7 @@
 	/* Kill broadcast/multicast entries very aggresively, if they
 	   collide in hash table with more useful entries */
 	return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
-		rth->key.iif && rth->u.rt_next;
+		rth->fl.iif && rth->u.rt_next;
 }
 
 static __inline__ int rt_valuable(struct rtable *rth)
@@ -382,7 +383,7 @@
 	if (rt_valuable(rt))
 		score |= (1<<31);
 
-	if (!rt->key.iif ||
+	if (!rt->fl.iif ||
 	    !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
 		score |= (1<<30);
 
@@ -647,6 +648,13 @@
 out:	return 0;
 }
 
+static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
+{
+	return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
+	       fl1->oif     == fl2->oif &&
+	       fl1->iif     == fl2->iif;
+}
+
 static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
 {
 	struct rtable	*rth, **rthp;
@@ -667,7 +675,7 @@
 
 	write_lock_bh(&rt_hash_table[hash].lock);
 	while ((rth = *rthp) != NULL) {
-		if (memcmp(&rth->key, &rt->key, sizeof(rt->key)) == 0) {
+		if (compare_keys(&rth->fl, &rt->fl)) {
 			/* Put it first */
 			*rthp = rth->u.rt_next;
 			rth->u.rt_next = rt_hash_table[hash].chain;
@@ -714,7 +722,7 @@
 	/* Try to bind route to arp only if it is output
 	   route or unicast forwarding path.
 	 */
-	if (rt->rt_type == RTN_UNICAST || rt->key.iif == 0) {
+	if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
 		int err = arp_bind_neighbour(&rt->u.dst);
 		if (err) {
 			write_unlock_bh(&rt_hash_table[hash].lock);
@@ -877,11 +885,11 @@
 			while ((rth = *rthp) != NULL) {
 				struct rtable *rt;
 
-				if (rth->key.dst != daddr ||
-				    rth->key.src != skeys[i] ||
-				    rth->key.tos != tos ||
-				    rth->key.oif != ikeys[k] ||
-				    rth->key.iif != 0) {
+				if (rth->fl.fl4_dst != daddr ||
+				    rth->fl.fl4_src != skeys[i] ||
+				    rth->fl.fl4_tos != tos ||
+				    rth->fl.oif != ikeys[k] ||
+				    rth->fl.iif != 0) {
 					rthp = &rth->u.rt_next;
 					continue;
 				}
@@ -907,12 +915,15 @@
 				*rt = *rth;
 				rt->u.dst.__use		= 1;
 				atomic_set(&rt->u.dst.__refcnt, 1);
+				rt->u.dst.child		= NULL;
 				if (rt->u.dst.dev)
 					dev_hold(rt->u.dst.dev);
+				rt->u.dst.obsolete	= 0;
 				rt->u.dst.lastuse	= jiffies;
+				rt->u.dst.path		= &rt->u.dst;
 				rt->u.dst.neighbour	= NULL;
 				rt->u.dst.hh		= NULL;
-				rt->u.dst.obsolete	= 0;
+				rt->u.dst.xfrm		= NULL;
 
 				rt->rt_flags		|= RTCF_REDIRECTED;
 
@@ -972,14 +983,14 @@
 			ret = NULL;
 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
 			   rt->u.dst.expires) {
-			unsigned hash = rt_hash_code(rt->key.dst,
-						     rt->key.src ^
-							(rt->key.oif << 5),
-						     rt->key.tos);
+			unsigned hash = rt_hash_code(rt->fl.fl4_dst,
+						     rt->fl.fl4_src ^
+							(rt->fl.oif << 5),
+						     rt->fl.fl4_tos);
 #if RT_CACHE_DEBUG >= 1
 			printk(KERN_DEBUG "ip_rt_advice: redirect to "
 					  "%u.%u.%u.%u/%02x dropped\n",
-				NIPQUAD(rt->rt_dst), rt->key.tos);
+				NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
 #endif
 			rt_del(hash, rt);
 			ret = NULL;
@@ -1124,34 +1135,34 @@
 		read_lock(&rt_hash_table[hash].lock);
 		for (rth = rt_hash_table[hash].chain; rth;
 		     rth = rth->u.rt_next) {
-			if (rth->key.dst == daddr &&
-			    rth->key.src == skeys[i] &&
+			if (rth->fl.fl4_dst == daddr &&
+			    rth->fl.fl4_src == skeys[i] &&
 			    rth->rt_dst  == daddr &&
 			    rth->rt_src  == iph->saddr &&
-			    rth->key.tos == tos &&
-			    rth->key.iif == 0 &&
-			    !(rth->u.dst.mxlock & (1 << RTAX_MTU))) {
+			    rth->fl.fl4_tos == tos &&
+			    rth->fl.iif == 0 &&
+			    !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) {
 				unsigned short mtu = new_mtu;
 
 				if (new_mtu < 68 || new_mtu >= old_mtu) {
 
 					/* BSD 4.2 compatibility hack :-( */
 					if (mtu == 0 &&
-					    old_mtu >= rth->u.dst.pmtu &&
+					    old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] &&
 					    old_mtu >= 68 + (iph->ihl << 2))
 						old_mtu -= iph->ihl << 2;
 
 					mtu = guess_mtu(old_mtu);
 				}
-				if (mtu <= rth->u.dst.pmtu) {
-					if (mtu < rth->u.dst.pmtu) { 
+				if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) {
+					if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { 
 						dst_confirm(&rth->u.dst);
 						if (mtu < ip_rt_min_pmtu) {
 							mtu = ip_rt_min_pmtu;
-							rth->u.dst.mxlock |=
+							rth->u.dst.metrics[RTAX_LOCK-1] |=
 								(1 << RTAX_MTU);
 						}
-						rth->u.dst.pmtu = mtu;
+						rth->u.dst.metrics[RTAX_MTU-1] = mtu;
 						dst_set_expires(&rth->u.dst,
 							ip_rt_mtu_expires);
 					}
@@ -1164,15 +1175,15 @@
 	return est_mtu ? : new_mtu;
 }
 
-void ip_rt_update_pmtu(struct dst_entry *dst, unsigned mtu)
+static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
-	if (dst->pmtu > mtu && mtu >= 68 &&
-	    !(dst->mxlock & (1 << RTAX_MTU))) {
+	if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 &&
+	    !(dst_metric_locked(dst, RTAX_MTU))) {
 		if (mtu < ip_rt_min_pmtu) {
 			mtu = ip_rt_min_pmtu;
-			dst->mxlock |= (1 << RTAX_MTU);
+			dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU);
 		}
-		dst->pmtu = mtu;
+		dst->metrics[RTAX_MTU-1] = mtu;
 		dst_set_expires(dst, ip_rt_mtu_expires);
 	}
 }
@@ -1183,12 +1194,6 @@
 	return NULL;
 }
 
-static struct dst_entry *ipv4_dst_reroute(struct dst_entry *dst,
-					  struct sk_buff *skb)
-{
-	return NULL;
-}
-
 static void ipv4_dst_destroy(struct dst_entry *dst)
 {
 	struct rtable *rt = (struct rtable *) dst;
@@ -1234,9 +1239,9 @@
 	u32 src;
 	struct fib_result res;
 
-	if (rt->key.iif == 0)
+	if (rt->fl.iif == 0)
 		src = rt->rt_src;
-	else if (fib_lookup(&rt->key, &res) == 0) {
+	else if (fib_lookup(&rt->fl, &res) == 0) {
 #ifdef CONFIG_IP_ROUTE_NAT
 		if (res.type == RTN_NAT)
 			src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
@@ -1269,28 +1274,30 @@
 		if (FIB_RES_GW(*res) &&
 		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = FIB_RES_GW(*res);
-		memcpy(&rt->u.dst.mxlock, fi->fib_metrics,
-			sizeof(fi->fib_metrics));
+		memcpy(rt->u.dst.metrics, fi->fib_metrics,
+		       sizeof(rt->u.dst.metrics));
 		if (fi->fib_mtu == 0) {
-			rt->u.dst.pmtu = rt->u.dst.dev->mtu;
-			if (rt->u.dst.mxlock & (1 << RTAX_MTU) &&
+			rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
+			if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) &&
 			    rt->rt_gateway != rt->rt_dst &&
-			    rt->u.dst.pmtu > 576)
-				rt->u.dst.pmtu = 576;
+			    rt->u.dst.dev->mtu > 576)
+				rt->u.dst.metrics[RTAX_MTU-1] = 576;
 		}
 #ifdef CONFIG_NET_CLS_ROUTE
 		rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
 	} else
-		rt->u.dst.pmtu	= rt->u.dst.dev->mtu;
+		rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
 
-	if (rt->u.dst.pmtu > IP_MAX_MTU)
-		rt->u.dst.pmtu = IP_MAX_MTU;
-	if (rt->u.dst.advmss == 0)
-		rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
+	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
+		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
+	if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU)
+		rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
+	if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0)
+		rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
 				       ip_rt_min_advmss);
-	if (rt->u.dst.advmss > 65535 - 40)
-		rt->u.dst.advmss = 65535 - 40;
+	if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40)
+		rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
 
 #ifdef CONFIG_NET_CLS_ROUTE
 #ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1335,13 +1342,15 @@
 
 	atomic_set(&rth->u.dst.__refcnt, 1);
 	rth->u.dst.flags= DST_HOST;
-	rth->key.dst	= daddr;
+	if (in_dev->cnf.no_policy)
+		rth->u.dst.flags |= DST_NOPOLICY;
+	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
-	rth->key.tos	= tos;
+	rth->fl.fl4_tos	= tos;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	rth->key.fwmark	= skb->nfmark;
+	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
-	rth->key.src	= saddr;
+	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_IP_ROUTE_NAT
 	rth->rt_dst_map	= daddr;
@@ -1351,10 +1360,10 @@
 	rth->u.dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
-	rth->key.iif	= dev->ifindex;
+	rth->fl.iif	= dev->ifindex;
 	rth->u.dst.dev	= &loopback_dev;
 	dev_hold(rth->u.dst.dev);
-	rth->key.oif	= 0;
+	rth->fl.oif	= 0;
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
 	rth->rt_type	= RTN_MULTICAST;
@@ -1396,10 +1405,19 @@
 int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
 			u8 tos, struct net_device *dev)
 {
-	struct rt_key	key;
 	struct fib_result res;
 	struct in_device *in_dev = in_dev_get(dev);
 	struct in_device *out_dev = NULL;
+	struct flowi fl = { .nl_u = { .ip4_u =
+				      { .daddr = daddr,
+					.saddr = saddr,
+					.tos = tos,
+					.scope = RT_SCOPE_UNIVERSE,
+#ifdef CONFIG_IP_ROUTE_FWMARK
+					.fwmark = skb->nfmark
+#endif
+				      } },
+			    .iif = dev->ifindex };
 	unsigned	flags = 0;
 	u32		itag = 0;
 	struct rtable * rth;
@@ -1413,17 +1431,7 @@
 	if (!in_dev)
 		goto out;
 
-	key.dst		= daddr;
-	key.src		= saddr;
-	key.tos		= tos;
-#ifdef CONFIG_IP_ROUTE_FWMARK
-	key.fwmark	= skb->nfmark;
-#endif
-	key.iif		= dev->ifindex;
-	key.oif		= 0;
-	key.scope	= RT_SCOPE_UNIVERSE;
-
-	hash = rt_hash_code(daddr, saddr ^ (key.iif << 5), tos);
+	hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos);
 
 	/* Check for the most weird martians, which can be not detected
 	   by fib_lookup.
@@ -1447,7 +1455,7 @@
 	/*
 	 *	Now we are ready to route packet.
 	 */
-	if ((err = fib_lookup(&key, &res)) != 0) {
+	if ((err = fib_lookup(&fl, &res)) != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
 			goto e_inval;
 		goto no_route;
@@ -1467,17 +1475,17 @@
 			src_map = fib_rules_policy(saddr, &res, &flags);
 
 		if (res.type == RTN_NAT) {
-			key.dst = fib_rules_map_destination(daddr, &res);
+			fl.fl4_dst = fib_rules_map_destination(daddr, &res);
 			fib_res_put(&res);
 			free_res = 0;
-			if (fib_lookup(&key, &res))
+			if (fib_lookup(&fl, &res))
 				goto e_inval;
 			free_res = 1;
 			if (res.type != RTN_UNICAST)
 				goto e_inval;
 			flags |= RTCF_DNAT;
 		}
-		key.src = src_map;
+		fl.fl4_src = src_map;
 	}
 #endif
 
@@ -1503,8 +1511,8 @@
 		goto martian_destination;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (res.fi->fib_nhs > 1 && key.oif == 0)
-		fib_select_multipath(&key, &res);
+	if (res.fi->fib_nhs > 1 && fl.oif == 0)
+		fib_select_multipath(&fl, &res);
 #endif
 	out_dev = in_dev_get(FIB_RES_DEV(res));
 	if (out_dev == NULL) {
@@ -1541,26 +1549,30 @@
 
 	atomic_set(&rth->u.dst.__refcnt, 1);
 	rth->u.dst.flags= DST_HOST;
-	rth->key.dst	= daddr;
+	if (in_dev->cnf.no_policy)
+		rth->u.dst.flags |= DST_NOPOLICY;
+	if (in_dev->cnf.no_xfrm)
+		rth->u.dst.flags |= DST_NOXFRM;
+	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
-	rth->key.tos	= tos;
+	rth->fl.fl4_tos	= tos;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	rth->key.fwmark	= skb->nfmark;
+	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
-	rth->key.src	= saddr;
+	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 	rth->rt_gateway	= daddr;
 #ifdef CONFIG_IP_ROUTE_NAT
-	rth->rt_src_map	= key.src;
-	rth->rt_dst_map	= key.dst;
+	rth->rt_src_map	= fl.fl4_src;
+	rth->rt_dst_map	= fl.fl4_dst;
 	if (flags&RTCF_DNAT)
-		rth->rt_gateway	= key.dst;
+		rth->rt_gateway	= fl.fl4_dst;
 #endif
 	rth->rt_iif 	=
-	rth->key.iif	= dev->ifindex;
+	rth->fl.iif	= dev->ifindex;
 	rth->u.dst.dev	= out_dev->dev;
 	dev_hold(rth->u.dst.dev);
-	rth->key.oif 	= 0;
+	rth->fl.oif 	= 0;
 	rth->rt_spec_dst= spec_dst;
 
 	rth->u.dst.input = ip_forward;
@@ -1618,26 +1630,27 @@
 
 	atomic_set(&rth->u.dst.__refcnt, 1);
 	rth->u.dst.flags= DST_HOST;
-	rth->key.dst	= daddr;
+	if (in_dev->cnf.no_policy)
+		rth->u.dst.flags |= DST_NOPOLICY;
+	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
-	rth->key.tos	= tos;
+	rth->fl.fl4_tos	= tos;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	rth->key.fwmark	= skb->nfmark;
+	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
-	rth->key.src	= saddr;
+	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_IP_ROUTE_NAT
-	rth->rt_dst_map	= key.dst;
-	rth->rt_src_map	= key.src;
+	rth->rt_dst_map	= fl.fl4_dst;
+	rth->rt_src_map	= fl.fl4_src;
 #endif
 #ifdef CONFIG_NET_CLS_ROUTE
 	rth->u.dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
-	rth->key.iif	= dev->ifindex;
+	rth->fl.iif	= dev->ifindex;
 	rth->u.dst.dev	= &loopback_dev;
 	dev_hold(rth->u.dst.dev);
-	rth->key.oif 	= 0;
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
 	rth->u.dst.input= ip_local_deliver;
@@ -1715,14 +1728,14 @@
 
 	read_lock(&rt_hash_table[hash].lock);
 	for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) {
-		if (rth->key.dst == daddr &&
-		    rth->key.src == saddr &&
-		    rth->key.iif == iif &&
-		    rth->key.oif == 0 &&
+		if (rth->fl.fl4_dst == daddr &&
+		    rth->fl.fl4_src == saddr &&
+		    rth->fl.iif == iif &&
+		    rth->fl.oif == 0 &&
 #ifdef CONFIG_IP_ROUTE_FWMARK
-		    rth->key.fwmark == skb->nfmark &&
+		    rth->fl.fl4_fwmark == skb->nfmark &&
 #endif
-		    rth->key.tos == tos) {
+		    rth->fl.fl4_tos == tos) {
 			rth->u.dst.lastuse = jiffies;
 			dst_hold(&rth->u.dst);
 			rth->u.dst.__use++;
@@ -1772,43 +1785,45 @@
  * Major route resolver routine.
  */
 
-int ip_route_output_slow(struct rtable **rp, const struct rt_key *oldkey)
+int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 {
-	struct rt_key key;
+	u32 tos	= oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK);
+	struct flowi fl = { .nl_u = { .ip4_u =
+				      { .daddr = oldflp->fl4_dst,
+					.saddr = oldflp->fl4_src,
+					.tos = tos & IPTOS_RT_MASK,
+					.scope = ((tos & RTO_ONLINK) ?
+						  RT_SCOPE_LINK :
+						  RT_SCOPE_UNIVERSE),
+#ifdef CONFIG_IP_ROUTE_FWMARK
+					.fwmark = oldflp->fl4_fwmark
+#endif
+				      } },
+			    .iif = loopback_dev.ifindex,
+			    .oif = oldflp->oif };
 	struct fib_result res;
 	unsigned flags = 0;
 	struct rtable *rth;
 	struct net_device *dev_out = NULL;
+	struct in_device *in_dev = NULL;
 	unsigned hash;
 	int free_res = 0;
 	int err;
-	u32 tos;
 
-	tos		= oldkey->tos & (IPTOS_RT_MASK | RTO_ONLINK);
-	key.dst		= oldkey->dst;
-	key.src		= oldkey->src;
-	key.tos		= tos & IPTOS_RT_MASK;
-	key.iif		= loopback_dev.ifindex;
-	key.oif		= oldkey->oif;
-#ifdef CONFIG_IP_ROUTE_FWMARK
-	key.fwmark	= oldkey->fwmark;
-#endif
-	key.scope	= (tos & RTO_ONLINK) ? RT_SCOPE_LINK :
-						RT_SCOPE_UNIVERSE;
 	res.fi		= NULL;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	res.r		= NULL;
 #endif
 
-	if (oldkey->src) {
+	if (oldflp->fl4_src) {
 		err = -EINVAL;
-		if (MULTICAST(oldkey->src) ||
-		    BADCLASS(oldkey->src) ||
-		    ZERONET(oldkey->src))
+		if (MULTICAST(oldflp->fl4_src) ||
+		    BADCLASS(oldflp->fl4_src) ||
+		    ZERONET(oldflp->fl4_src))
 			goto out;
 
 		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-		dev_out = ip_dev_find(oldkey->src);
+		dev_out = ip_dev_find(oldflp->fl4_src);
 		if (dev_out == NULL)
 			goto out;
 
@@ -1820,8 +1835,8 @@
 		      of another iface. --ANK
 		 */
 
-		if (oldkey->oif == 0
-		    && (MULTICAST(oldkey->dst) || oldkey->dst == 0xFFFFFFFF)) {
+		if (oldflp->oif == 0
+		    && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF)) {
 			/* Special hack: user can direct multicasts
 			   and limited broadcast via necessary interface
 			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
@@ -1837,15 +1852,15 @@
 			   Luckily, this hack is good workaround.
 			 */
 
-			key.oif = dev_out->ifindex;
+			fl.oif = dev_out->ifindex;
 			goto make_route;
 		}
 		if (dev_out)
 			dev_put(dev_out);
 		dev_out = NULL;
 	}
-	if (oldkey->oif) {
-		dev_out = dev_get_by_index(oldkey->oif);
+	if (oldflp->oif) {
+		dev_out = dev_get_by_index(oldflp->oif);
 		err = -ENODEV;
 		if (dev_out == NULL)
 			goto out;
@@ -1854,39 +1869,39 @@
 			goto out;	/* Wrong error code */
 		}
 
-		if (LOCAL_MCAST(oldkey->dst) || oldkey->dst == 0xFFFFFFFF) {
-			if (!key.src)
-				key.src = inet_select_addr(dev_out, 0,
-								RT_SCOPE_LINK);
+		if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF) {
+			if (!fl.fl4_src)
+				fl.fl4_src = inet_select_addr(dev_out, 0,
+							      RT_SCOPE_LINK);
 			goto make_route;
 		}
-		if (!key.src) {
-			if (MULTICAST(oldkey->dst))
-				key.src = inet_select_addr(dev_out, 0,
-								key.scope);
-			else if (!oldkey->dst)
-				key.src = inet_select_addr(dev_out, 0,
-								RT_SCOPE_HOST);
+		if (!fl.fl4_src) {
+			if (MULTICAST(oldflp->fl4_dst))
+				fl.fl4_src = inet_select_addr(dev_out, 0,
+							      fl.fl4_scope);
+			else if (!oldflp->fl4_dst)
+				fl.fl4_src = inet_select_addr(dev_out, 0,
+							      RT_SCOPE_HOST);
 		}
 	}
 
-	if (!key.dst) {
-		key.dst = key.src;
-		if (!key.dst)
-			key.dst = key.src = htonl(INADDR_LOOPBACK);
+	if (!fl.fl4_dst) {
+		fl.fl4_dst = fl.fl4_src;
+		if (!fl.fl4_dst)
+			fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
 		if (dev_out)
 			dev_put(dev_out);
 		dev_out = &loopback_dev;
 		dev_hold(dev_out);
-		key.oif = loopback_dev.ifindex;
+		fl.oif = loopback_dev.ifindex;
 		res.type = RTN_LOCAL;
 		flags |= RTCF_LOCAL;
 		goto make_route;
 	}
 
-	if (fib_lookup(&key, &res)) {
+	if (fib_lookup(&fl, &res)) {
 		res.fi = NULL;
-		if (oldkey->oif) {
+		if (oldflp->oif) {
 			/* Apparently, routing tables are wrong. Assume,
 			   that the destination is on link.
 
@@ -1905,9 +1920,9 @@
 			   likely IPv6, but we do not.
 			 */
 
-			if (key.src == 0)
-				key.src = inet_select_addr(dev_out, 0,
-							   RT_SCOPE_LINK);
+			if (fl.fl4_src == 0)
+				fl.fl4_src = inet_select_addr(dev_out, 0,
+							      RT_SCOPE_LINK);
 			res.type = RTN_UNICAST;
 			goto make_route;
 		}
@@ -1922,13 +1937,13 @@
 		goto e_inval;
 
 	if (res.type == RTN_LOCAL) {
-		if (!key.src)
-			key.src = key.dst;
+		if (!fl.fl4_src)
+			fl.fl4_src = fl.fl4_dst;
 		if (dev_out)
 			dev_put(dev_out);
 		dev_out = &loopback_dev;
 		dev_hold(dev_out);
-		key.oif = dev_out->ifindex;
+		fl.oif = dev_out->ifindex;
 		if (res.fi)
 			fib_info_put(res.fi);
 		res.fi = NULL;
@@ -1937,36 +1952,40 @@
 	}
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (res.fi->fib_nhs > 1 && key.oif == 0)
-		fib_select_multipath(&key, &res);
+	if (res.fi->fib_nhs > 1 && fl.oif == 0)
+		fib_select_multipath(&fl, &res);
 	else
 #endif
-	if (!res.prefixlen && res.type == RTN_UNICAST && !key.oif)
-		fib_select_default(&key, &res);
+	if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
+		fib_select_default(&fl, &res);
 
-	if (!key.src)
-		key.src = FIB_RES_PREFSRC(res);
+	if (!fl.fl4_src)
+		fl.fl4_src = FIB_RES_PREFSRC(res);
 
 	if (dev_out)
 		dev_put(dev_out);
 	dev_out = FIB_RES_DEV(res);
 	dev_hold(dev_out);
-	key.oif = dev_out->ifindex;
+	fl.oif = dev_out->ifindex;
 
 make_route:
-	if (LOOPBACK(key.src) && !(dev_out->flags&IFF_LOOPBACK))
+	if (LOOPBACK(fl.fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
 		goto e_inval;
 
-	if (key.dst == 0xFFFFFFFF)
+	if (fl.fl4_dst == 0xFFFFFFFF)
 		res.type = RTN_BROADCAST;
-	else if (MULTICAST(key.dst))
+	else if (MULTICAST(fl.fl4_dst))
 		res.type = RTN_MULTICAST;
-	else if (BADCLASS(key.dst) || ZERONET(key.dst))
+	else if (BADCLASS(fl.fl4_dst) || ZERONET(fl.fl4_dst))
 		goto e_inval;
 
 	if (dev_out->flags & IFF_LOOPBACK)
 		flags |= RTCF_LOCAL;
 
+	in_dev = in_dev_get(dev_out);
+	if (!in_dev)
+		goto e_inval;
+
 	if (res.type == RTN_BROADCAST) {
 		flags |= RTCF_BROADCAST | RTCF_LOCAL;
 		if (res.fi) {
@@ -1975,11 +1994,8 @@
 		}
 	} else if (res.type == RTN_MULTICAST) {
 		flags |= RTCF_MULTICAST|RTCF_LOCAL;
-		read_lock(&inetdev_lock);
-		if (!__in_dev_get(dev_out) ||
-		    !ip_check_mc(__in_dev_get(dev_out),oldkey->dst,oldkey->src))
+		if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src))
 			flags &= ~RTCF_LOCAL;
-		read_unlock(&inetdev_lock);
 		/* If multicast route do not exist use
 		   default one, but do not gateway in this case.
 		   Yes, it is hack.
@@ -1996,25 +2012,28 @@
 
 	atomic_set(&rth->u.dst.__refcnt, 1);
 	rth->u.dst.flags= DST_HOST;
-	rth->key.dst	= oldkey->dst;
-	rth->key.tos	= tos;
-	rth->key.src	= oldkey->src;
-	rth->key.iif	= 0;
-	rth->key.oif	= oldkey->oif;
+	if (in_dev->cnf.no_xfrm)
+		rth->u.dst.flags |= DST_NOXFRM;
+	if (in_dev->cnf.no_policy)
+		rth->u.dst.flags |= DST_NOPOLICY;
+	rth->fl.fl4_dst	= oldflp->fl4_dst;
+	rth->fl.fl4_tos	= tos;
+	rth->fl.fl4_src	= oldflp->fl4_src;
+	rth->fl.oif	= oldflp->oif;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	rth->key.fwmark	= oldkey->fwmark;
+	rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
 #endif
-	rth->rt_dst	= key.dst;
-	rth->rt_src	= key.src;
+	rth->rt_dst	= fl.fl4_dst;
+	rth->rt_src	= fl.fl4_src;
 #ifdef CONFIG_IP_ROUTE_NAT
-	rth->rt_dst_map	= key.dst;
-	rth->rt_src_map	= key.src;
+	rth->rt_dst_map	= fl.fl4_dst;
+	rth->rt_src_map	= fl.fl4_src;
 #endif
-	rth->rt_iif	= oldkey->oif ? : dev_out->ifindex;
+	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
 	rth->u.dst.dev	= dev_out;
 	dev_hold(dev_out);
-	rth->rt_gateway = key.dst;
-	rth->rt_spec_dst= key.src;
+	rth->rt_gateway = fl.fl4_dst;
+	rth->rt_spec_dst= fl.fl4_src;
 
 	rth->u.dst.output=ip_output;
 
@@ -2022,40 +2041,39 @@
 
 	if (flags & RTCF_LOCAL) {
 		rth->u.dst.input = ip_local_deliver;
-		rth->rt_spec_dst = key.dst;
+		rth->rt_spec_dst = fl.fl4_dst;
 	}
 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
-		rth->rt_spec_dst = key.src;
+		rth->rt_spec_dst = fl.fl4_src;
 		if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) {
 			rth->u.dst.output = ip_mc_output;
 			rt_cache_stat[smp_processor_id()].out_slow_mc++;
 		}
 #ifdef CONFIG_IP_MROUTE
 		if (res.type == RTN_MULTICAST) {
-			struct in_device *in_dev = in_dev_get(dev_out);
-			if (in_dev) {
-				if (IN_DEV_MFORWARD(in_dev) &&
-				    !LOCAL_MCAST(oldkey->dst)) {
-					rth->u.dst.input = ip_mr_input;
-					rth->u.dst.output = ip_mc_output;
-				}
-				in_dev_put(in_dev);
+			if (IN_DEV_MFORWARD(in_dev) &&
+			    !LOCAL_MCAST(oldflp->fl4_dst)) {
+				rth->u.dst.input = ip_mr_input;
+				rth->u.dst.output = ip_mc_output;
 			}
 		}
 #endif
 	}
 
 	rt_set_nexthop(rth, &res, 0);
+	
 
 	rth->rt_flags = flags;
 
-	hash = rt_hash_code(oldkey->dst, oldkey->src ^ (oldkey->oif << 5), tos);
+	hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src ^ (oldflp->oif << 5), tos);
 	err = rt_intern_hash(hash, rth, rp);
 done:
 	if (free_res)
 		fib_res_put(&res);
 	if (dev_out)
 		dev_put(dev_out);
+	if (in_dev)
+		in_dev_put(in_dev);
 out:	return err;
 
 e_inval:
@@ -2066,23 +2084,23 @@
 	goto done;
 }
 
-int ip_route_output_key(struct rtable **rp, const struct rt_key *key)
+int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
 {
 	unsigned hash;
 	struct rtable *rth;
 
-	hash = rt_hash_code(key->dst, key->src ^ (key->oif << 5), key->tos);
+	hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos);
 
 	read_lock_bh(&rt_hash_table[hash].lock);
 	for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) {
-		if (rth->key.dst == key->dst &&
-		    rth->key.src == key->src &&
-		    rth->key.iif == 0 &&
-		    rth->key.oif == key->oif &&
+		if (rth->fl.fl4_dst == flp->fl4_dst &&
+		    rth->fl.fl4_src == flp->fl4_src &&
+		    rth->fl.iif == 0 &&
+		    rth->fl.oif == flp->oif &&
 #ifdef CONFIG_IP_ROUTE_FWMARK
-		    rth->key.fwmark == key->fwmark &&
+		    rth->fl.fl4_fwmark == flp->fl4_fwmark &&
 #endif
-		    !((rth->key.tos ^ key->tos) &
+		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
 			rth->u.dst.lastuse = jiffies;
 			dst_hold(&rth->u.dst);
@@ -2096,8 +2114,26 @@
 	}
 	read_unlock_bh(&rt_hash_table[hash].lock);
 
-	return ip_route_output_slow(rp, key);
-}	
+	return ip_route_output_slow(rp, flp);
+}
+
+int ip_route_output_key(struct rtable **rp, struct flowi *flp)
+{
+	int err;
+
+	if ((err = __ip_route_output_key(rp, flp)) != 0)
+		return err;
+	return flp->proto ? xfrm_lookup((struct dst_entry**)rp, flp, NULL, 0) : 0;
+}
+
+int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
+{
+	int err;
+
+	if ((err = __ip_route_output_key(rp, flp)) != 0)
+		return err;
+	return flp->proto ? xfrm_lookup((struct dst_entry**)rp, flp, sk, flags) : 0;
+}
 
 static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 			int nowait)
@@ -2116,7 +2152,7 @@
 	r->rtm_family	 = AF_INET;
 	r->rtm_dst_len	= 32;
 	r->rtm_src_len	= 0;
-	r->rtm_tos	= rt->key.tos;
+	r->rtm_tos	= rt->fl.fl4_tos;
 	r->rtm_table	= RT_TABLE_MAIN;
 	r->rtm_type	= rt->rt_type;
 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
@@ -2125,9 +2161,9 @@
 	if (rt->rt_flags & RTCF_NOTIFY)
 		r->rtm_flags |= RTM_F_NOTIFY;
 	RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
-	if (rt->key.src) {
+	if (rt->fl.fl4_src) {
 		r->rtm_src_len = 32;
-		RTA_PUT(skb, RTA_SRC, 4, &rt->key.src);
+		RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src);
 	}
 	if (rt->u.dst.dev)
 		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
@@ -2135,13 +2171,13 @@
 	if (rt->u.dst.tclassid)
 		RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);
 #endif
-	if (rt->key.iif)
+	if (rt->fl.iif)
 		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
-	else if (rt->rt_src != rt->key.src)
+	else if (rt->rt_src != rt->fl.fl4_src)
 		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
 	if (rt->rt_dst != rt->rt_gateway)
 		RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
-	if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
+	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
 		goto rtattr_failure;
 	ci.rta_lastuse	= jiffies - rt->u.dst.lastuse;
 	ci.rta_used	= rt->u.dst.__use;
@@ -2163,7 +2199,7 @@
 	eptr = (struct rtattr*)skb->tail;
 #endif
 	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
-	if (rt->key.iif) {
+	if (rt->fl.iif) {
 #ifdef CONFIG_IP_MROUTE
 		u32 dst = rt->rt_dst;
 
@@ -2183,7 +2219,7 @@
 			}
 		} else
 #endif
-			RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->key.iif);
+			RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
 	}
 
 	nlh->nlmsg_len = skb->tail - b;
@@ -2237,10 +2273,14 @@
 		if (!err && rt->u.dst.error)
 			err = -rt->u.dst.error;
 	} else {
+		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst,
+							 .saddr = src,
+							 .tos = rtm->rtm_tos } } };
 		int oif = 0;
 		if (rta[RTA_OIF - 1])
 			memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
-		err = ip_route_output(&rt, dst, src, rtm->rtm_tos, oif);
+		fl.oif = oif;
+		err = ip_route_output_key(&rt, &fl);
 	}
 	if (err)
 		goto out_free;
@@ -2629,4 +2669,8 @@
 #ifdef CONFIG_NET_CLS_ROUTE
 	create_proc_read_entry("net/rt_acct", 0, 0, ip_rt_acct_read, NULL);
 #endif
+#ifdef CONFIG_XFRM
+	xfrm_init();
+	xfrm4_init();
+#endif
 }
Index: net/ipv4/syncookies.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/syncookies.c,v
retrieving revision 1.1.1.21
retrieving revision 1.1.1.21.2.1
diff -u -r1.1.1.21 -r1.1.1.21.2.1
--- a/net/ipv4/syncookies.c	3 Aug 2002 00:39:46 -0000	1.1.1.21
+++ b/net/ipv4/syncookies.c	16 Apr 2004 13:16:22 -0000	1.1.1.21.2.1
@@ -169,18 +169,25 @@
 	 * hasn't changed since we received the original syn, but I see
 	 * no easy way to do this. 
 	 */
-	if (ip_route_output(&rt,
-			    opt && 
-			    opt->srr ? opt->faddr : req->af.v4_req.rmt_addr,
-			    req->af.v4_req.loc_addr,
-			    RT_CONN_FLAGS(sk),
-			    0)) { 
-		tcp_openreq_free(req);
-		goto out; 
+	{
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = ((opt && opt->srr) ?
+							  opt->faddr :
+							  req->af.v4_req.rmt_addr),
+						.saddr = req->af.v4_req.loc_addr,
+						.tos = RT_CONN_FLAGS(sk) } },
+				    .proto = IPPROTO_TCP,
+				    .uli_u = { .ports =
+					       { .sport = skb->h.th->dest,
+						 .dport = skb->h.th->source } } };
+		if (ip_route_output_key(&rt, &fl)) {
+			tcp_openreq_free(req);
+			goto out; 
+		}
 	}
 
 	/* Try to redo what tcp_v4_send_synack did. */
-	req->window_clamp = rt->u.dst.window;  
+	req->window_clamp = dst_metric(&rt->u.dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp, 
 				  0, &rcv_wscale);
Index: net/ipv4/sysctl_net_ipv4.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/sysctl_net_ipv4.c,v
retrieving revision 1.1.1.20
retrieving revision 1.1.1.20.2.1
diff -u -r1.1.1.20 -r1.1.1.20.2.1
--- a/net/ipv4/sysctl_net_ipv4.c	14 Apr 2004 13:05:41 -0000	1.1.1.20
+++ b/net/ipv4/sysctl_net_ipv4.c	16 Apr 2004 13:16:22 -0000	1.1.1.20.2.1
@@ -82,14 +82,39 @@
 			 void *newval, size_t newlen, 
 			 void **context)
 {
+	int *valp = table->data;
 	int new;
+
+	if (!newval || !newlen)
+		return 0;
+
 	if (newlen != sizeof(int))
 		return -EINVAL;
-	if (get_user(new,(int *)newval))
-		return -EFAULT; 
-	if (new != ipv4_devconf.forwarding) 
-		inet_forward_change(new); 
-	return 0; /* caller does change again and handles handles oldval */ 
+
+	if (get_user(new, (int *)newval))
+		return -EFAULT;
+
+	if (new == *valp)
+		return 0;
+
+	if (oldval && oldlenp) {
+		size_t len;
+
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+
+		if (len) {
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if (copy_to_user(oldval, valp, len))
+				return -EFAULT;
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	inet_forward_change(new);
+	return 1;
 }
 
 ctl_table ipv4_table[] = {
@@ -110,7 +135,7 @@
          &ipv4_sysctl_forward,&ipv4_sysctl_forward_strategy},
         {NET_IPV4_DEFAULT_TTL, "ip_default_ttl",
          &sysctl_ip_default_ttl, sizeof(int), 0644, NULL,
-         &proc_dointvec},
+         &ipv4_doint_and_flush, &ipv4_doint_and_flush_strategy},
         {NET_IPV4_AUTOCONFIG, "ip_autoconfig",
          &ipv4_config.autoconfig, sizeof(int), 0644, NULL,
          &proc_dointvec},
Index: net/ipv4/tcp.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/tcp.c,v
retrieving revision 1.1.1.28
retrieving revision 1.1.1.28.2.1
diff -u -r1.1.1.28 -r1.1.1.28.2.1
--- a/net/ipv4/tcp.c	18 Feb 2004 13:36:32 -0000	1.1.1.28
+++ b/net/ipv4/tcp.c	16 Apr 2004 13:16:22 -0000	1.1.1.28.2.1
@@ -204,6 +204,8 @@
  *		Andi Kleen 	:	Make poll agree with SIGIO
  *	Salvatore Sanfilippo	:	Support SO_LINGER with linger == 1 and
  *					lingertime == 0 (RFC 793 ABORT Call)
+ *	Hirokazu Takahashi	:	Use copy_from_user() instead of
+ *					csum_and_copy_from_user() if possible.
  *					
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -256,6 +258,7 @@
 
 #include <net/icmp.h>
 #include <net/tcp.h>
+#include <net/xfrm.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -953,8 +956,8 @@
 	return res;
 }
 
-#define TCP_PAGE(sk)	(sk->tp_pinfo.af_tcp.sndmsg_page)
-#define TCP_OFF(sk)	(sk->tp_pinfo.af_tcp.sndmsg_off)
+#define TCP_PAGE(sk)	(inet_sk(sk)->sndmsg_page)
+#define TCP_OFF(sk)	(inet_sk(sk)->sndmsg_off)
 
 static inline int
 tcp_copy_to_page(struct sock *sk, char *from, struct sk_buff *skb,
@@ -963,18 +966,22 @@
 	int err = 0;
 	unsigned int csum;
 
-	csum = csum_and_copy_from_user(from, page_address(page)+off,
+	if (skb->ip_summed == CHECKSUM_NONE) {
+		csum = csum_and_copy_from_user(from, page_address(page) + off,
 				       copy, 0, &err);
-	if (!err) {
-		if (skb->ip_summed == CHECKSUM_NONE)
-			skb->csum = csum_block_add(skb->csum, csum, skb->len);
-		skb->len += copy;
-		skb->data_len += copy;
-		skb->truesize += copy;
-		sk->wmem_queued += copy;
-		sk->forward_alloc -= copy;
+		if (err) return err;
+		skb->csum = csum_block_add(skb->csum, csum, skb->len);
+	} else {
+		if (copy_from_user(page_address(page) + off, from, copy))
+			return -EFAULT;
 	}
-	return err;
+
+	skb->len += copy;
+	skb->data_len += copy;
+	skb->truesize += copy;
+	sk->wmem_queued += copy;
+	sk->forward_alloc -= copy;
+	return 0;
 }
 
 static inline int
@@ -984,11 +991,16 @@
 	unsigned int csum;
 	int off = skb->len;
 
-	csum = csum_and_copy_from_user(from, skb_put(skb, copy),
+	if (skb->ip_summed == CHECKSUM_NONE) {
+		csum = csum_and_copy_from_user(from, skb_put(skb, copy),
 				       copy, 0, &err);
-	if (!err) {
-		skb->csum = csum_block_add(skb->csum, csum, off);
-		return 0;
+		if (!err) {
+			skb->csum = csum_block_add(skb->csum, csum, off);
+			return 0;
+		}
+	} else {
+		if (!copy_from_user(skb_put(skb, copy), from, copy))
+			return 0;
 	}
 
 	__skb_trim(skb, off);
@@ -1070,6 +1082,12 @@
 				if (skb == NULL)
 					goto wait_for_memory;
 
+				/*
+				 * Check whether we can use HW checksum.
+				 */
+				if (sk->route_caps & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM))
+					skb->ip_summed = CHECKSUM_HW;
+
 				skb_entail(sk, tp, skb);
 				copy = mss_now;
 			}
@@ -1890,6 +1908,8 @@
 
 	tcp_kill_sk_queues(sk);
 
+	xfrm_sk_free_policy(sk);
+
 #ifdef INET_REFCNT_DEBUG
 	if (atomic_read(&sk->refcnt) != 1) {
 		printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n", sk, atomic_read(&sk->refcnt));
Index: net/ipv4/tcp_input.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/tcp_input.c,v
retrieving revision 1.1.1.32
retrieving revision 1.1.1.32.2.1
diff -u -r1.1.1.32 -r1.1.1.32.2.1
--- a/net/ipv4/tcp_input.c	14 Apr 2004 13:05:41 -0000	1.1.1.32
+++ b/net/ipv4/tcp_input.c	16 Apr 2004 13:16:22 -0000	1.1.1.32.2.1
@@ -529,25 +529,25 @@
 			 * Probably, no packets returned in time.
 			 * Reset our results.
 			 */
-			if (!(dst->mxlock&(1<<RTAX_RTT)))
-				dst->rtt = 0;
+			if (!(dst_metric_locked(dst, RTAX_RTT)))
+				dst->metrics[RTAX_RTT-1] = 0;
 			return;
 		}
 
-		m = dst->rtt - tp->srtt;
+		m = dst_metric(dst, RTAX_RTT) - tp->srtt;
 
 		/* If newly calculated rtt larger than stored one,
 		 * store new one. Otherwise, use EWMA. Remember,
 		 * rtt overestimation is always better than underestimation.
 		 */
-		if (!(dst->mxlock&(1<<RTAX_RTT))) {
+		if (!(dst_metric_locked(dst, RTAX_RTT))) {
 			if (m <= 0)
-				dst->rtt = tp->srtt;
+				dst->metrics[RTAX_RTT-1] = tp->srtt;
 			else
-				dst->rtt -= (m>>3);
+				dst->metrics[RTAX_RTT-1] -= (m>>3);
 		}
 
-		if (!(dst->mxlock&(1<<RTAX_RTTVAR))) {
+		if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
 			if (m < 0)
 				m = -m;
 
@@ -556,67 +556,61 @@
 			if (m < tp->mdev)
 				m = tp->mdev;
 
-			if (m >= dst->rttvar)
-				dst->rttvar = m;
+			if (m >= dst_metric(dst, RTAX_RTTVAR))
+				dst->metrics[RTAX_RTTVAR-1] = m;
 			else
-				dst->rttvar -= (dst->rttvar - m)>>2;
+				dst->metrics[RTAX_RTTVAR-1] -=
+					(dst->metrics[RTAX_RTTVAR-1] - m)>>2;
 		}
 
 		if (tp->snd_ssthresh >= 0xFFFF) {
 			/* Slow start still did not finish. */
-			if (dst->ssthresh &&
-			    !(dst->mxlock&(1<<RTAX_SSTHRESH)) &&
-			    (tp->snd_cwnd>>1) > dst->ssthresh)
-				dst->ssthresh = (tp->snd_cwnd>>1);
-			if (!(dst->mxlock&(1<<RTAX_CWND)) &&
-			    tp->snd_cwnd > dst->cwnd)
-				dst->cwnd = tp->snd_cwnd;
+			if (dst_metric(dst, RTAX_SSTHRESH) &&
+			    !dst_metric_locked(dst, RTAX_SSTHRESH) &&
+			    (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
+				dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
+			if (!dst_metric_locked(dst, RTAX_CWND) &&
+			    tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
+				dst->metrics[RTAX_CWND-1] = tp->snd_cwnd;
 		} else if (tp->snd_cwnd > tp->snd_ssthresh &&
 			   tp->ca_state == TCP_CA_Open) {
 			/* Cong. avoidance phase, cwnd is reliable. */
-			if (!(dst->mxlock&(1<<RTAX_SSTHRESH)))
-				dst->ssthresh = max(tp->snd_cwnd>>1, tp->snd_ssthresh);
-			if (!(dst->mxlock&(1<<RTAX_CWND)))
-				dst->cwnd = (dst->cwnd + tp->snd_cwnd)>>1;
+			if (!dst_metric_locked(dst, RTAX_SSTHRESH))
+				dst->metrics[RTAX_SSTHRESH-1] =
+					max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
+			if (!dst_metric_locked(dst, RTAX_CWND))
+				dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1;
 		} else {
 			/* Else slow start did not finish, cwnd is non-sense,
 			   ssthresh may be also invalid.
 			 */
-			if (!(dst->mxlock&(1<<RTAX_CWND)))
-				dst->cwnd = (dst->cwnd + tp->snd_ssthresh)>>1;
-			if (dst->ssthresh &&
-			    !(dst->mxlock&(1<<RTAX_SSTHRESH)) &&
-			    tp->snd_ssthresh > dst->ssthresh)
-				dst->ssthresh = tp->snd_ssthresh;
+			if (!dst_metric_locked(dst, RTAX_CWND))
+				dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1;
+			if (dst->metrics[RTAX_SSTHRESH-1] &&
+			    !dst_metric_locked(dst, RTAX_SSTHRESH) &&
+			    tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1])
+				dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
 		}
 
-		if (!(dst->mxlock&(1<<RTAX_REORDERING))) {
-			if (dst->reordering < tp->reordering &&
+		if (!dst_metric_locked(dst, RTAX_REORDERING)) {
+			if (dst->metrics[RTAX_REORDERING-1] < tp->reordering &&
 			    tp->reordering != sysctl_tcp_reordering)
-				dst->reordering = tp->reordering;
+				dst->metrics[RTAX_REORDERING-1] = tp->reordering;
 		}
 	}
 }
 
-/* Increase initial CWND conservatively: if estimated
- * RTT is low enough (<20msec) or if we have some preset ssthresh.
- *
- * Numbers are taken from RFC2414.
- */
-__u32 tcp_init_cwnd(struct tcp_opt *tp)
+/* Numbers are taken from RFC2414.  */
+__u32 tcp_init_cwnd(struct tcp_opt *tp, struct dst_entry *dst)
 {
-	__u32 cwnd;
-
-	if (tp->mss_cache > 1460)
-		return 2;
-
-	cwnd = (tp->mss_cache > 1095) ? 3 : 4;
-
-	if (!tp->srtt || (tp->snd_ssthresh >= 0xFFFF && tp->srtt > ((HZ/50)<<3)))
-		cwnd = 2;
-	else if (cwnd > tp->snd_ssthresh)
-		cwnd = tp->snd_ssthresh;
+	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
+	if (!cwnd) {
+		if (tp->mss_cache > 1460)
+			cwnd = 2;
+		else
+			cwnd = (tp->mss_cache > 1095) ? 3 : 4;
+	}
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 
@@ -632,22 +626,23 @@
 
 	dst_confirm(dst);
 
-	if (dst->mxlock&(1<<RTAX_CWND))
-		tp->snd_cwnd_clamp = dst->cwnd;
-	if (dst->ssthresh) {
-		tp->snd_ssthresh = dst->ssthresh;
+	if (dst_metric_locked(dst, RTAX_CWND))
+		tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
+	if (dst_metric(dst, RTAX_SSTHRESH)) {
+		tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
 		if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
 			tp->snd_ssthresh = tp->snd_cwnd_clamp;
 	}
-	if (dst->reordering && tp->reordering != dst->reordering) {
+	if (dst_metric(dst, RTAX_REORDERING) &&
+	    tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
 		tp->sack_ok &= ~2;
-		tp->reordering = dst->reordering;
+		tp->reordering = dst_metric(dst, RTAX_REORDERING);
 	}
 
-	if (dst->rtt == 0)
+	if (dst_metric(dst, RTAX_RTT) == 0)
 		goto reset;
 
-	if (!tp->srtt && dst->rtt < (TCP_TIMEOUT_INIT<<3))
+	if (!tp->srtt && dst_metric(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
 		goto reset;
 
 	/* Initial rtt is determined from SYN,SYN-ACK.
@@ -664,17 +659,17 @@
 	 * to low value, and then abruptly stops to do it and starts to delay
 	 * ACKs, wait for troubles.
 	 */
-	if (dst->rtt > tp->srtt)
-		tp->srtt = dst->rtt;
-	if (dst->rttvar > tp->mdev) {
-		tp->mdev = dst->rttvar;
+	if (dst_metric(dst, RTAX_RTT) > tp->srtt)
+		tp->srtt = dst_metric(dst, RTAX_RTT);
+	if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
+		tp->mdev = dst_metric(dst, RTAX_RTTVAR);
 		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
 	}
 	tcp_set_rto(tp);
 	tcp_bound_rto(tp);
 	if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp)
 		goto reset;
-	tp->snd_cwnd = tcp_init_cwnd(tp);
+	tp->snd_cwnd = tcp_init_cwnd(tp, dst);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 	return;
 
@@ -3923,7 +3918,24 @@
 
 		tcp_sync_mss(sk, tp->pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
+
+		/* Remember, tcp_poll() does not lock socket!
+		 * Change state from SYN-SENT only after copied_seq
+		 * is initialized. */
+		tp->copied_seq = tp->rcv_nxt;
+		mb();
+		tcp_set_state(sk, TCP_ESTABLISHED);
+
+		/* Make sure socket is routed, for correct metrics.  */
+		tp->af_specific->rebuild_header(sk);
+
 		tcp_init_metrics(sk);
+
+		/* Prevent spurious tcp_cwnd_restart() on first data
+		 * packet.
+		 */
+		tp->lsndtime = tcp_time_stamp;
+
 		tcp_init_buffer_space(sk);
 
 		if (sk->keepopen)
@@ -3934,13 +3946,6 @@
 		else
 			tp->pred_flags = 0;
 
-		/* Remember, tcp_poll() does not lock socket!
-		 * Change state from SYN-SENT only after copied_seq
-		 * is initialized. */
-		tp->copied_seq = tp->rcv_nxt;
-		mb();
-		tcp_set_state(sk, TCP_ESTABLISHED);
-
 		if(!sk->dead) {
 			sk->state_change(sk);
 			sk_wake_async(sk, 0, POLL_OUT);
@@ -4186,7 +4191,18 @@
 				if (tp->tstamp_ok)
 					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
 
+				/* Make sure socket is routed, for
+				 * correct metrics.
+				 */
+				tp->af_specific->rebuild_header(sk);
+
 				tcp_init_metrics(sk);
+
+				/* Prevent spurious tcp_cwnd_restart() on
+				 * first data packet.
+				 */
+				tp->lsndtime = tcp_time_stamp;
+
 				tcp_initialize_rcv_mss(sk);
 				tcp_init_buffer_space(sk);
 				tcp_fast_path_on(tp);
Index: net/ipv4/tcp_ipv4.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/tcp_ipv4.c,v
retrieving revision 1.1.1.29
retrieving revision 1.1.1.29.2.1
diff -u -r1.1.1.29 -r1.1.1.29.2.1
--- a/net/ipv4/tcp_ipv4.c	14 Apr 2004 13:05:41 -0000	1.1.1.29
+++ b/net/ipv4/tcp_ipv4.c	16 Apr 2004 13:16:22 -0000	1.1.1.29.2.1
@@ -63,13 +63,12 @@
 #include <net/tcp.h>
 #include <net/ipv6.h>
 #include <net/inet_common.h>
+#include <net/xfrm.h>
 
 #include <linux/inet.h>
 #include <linux/stddef.h>
-#include <linux/ipsec.h>
 
 extern int sysctl_ip_dynaddr;
-extern int sysctl_ip_default_ttl;
 int sysctl_tcp_tw_reuse = 0;
 int sysctl_tcp_low_latency = 0;
 
@@ -785,7 +784,9 @@
 	}
 
 	tmp = ip_route_connect(&rt, nexthop, sk->saddr,
-			       RT_CONN_FLAGS(sk), sk->bound_dev_if);
+			       RT_CONN_FLAGS(sk), sk->bound_dev_if,
+			       IPPROTO_TCP,
+			       sk->sport, usin->sin_port, sk);
 	if (tmp < 0)
 		return tmp;
 
@@ -794,9 +795,6 @@
 		return -ENETUNREACH;
 	}
 
-	__sk_dst_set(sk, &rt->u.dst);
-	sk->route_caps = rt->u.dst.dev->features;
-
 	if (!sk->protinfo.af_inet.opt || !sk->protinfo.af_inet.opt->srr)
 		daddr = rt->rt_dst;
 
@@ -846,6 +844,15 @@
 	if (err)
 		goto failure;
 
+	err = ip_route_newports(&rt, sk->sport, sk->dport, sk);
+	if (err)
+		goto failure;
+
+	/* OK, now commit destination to socket.  */
+	__sk_dst_set(sk, &rt->u.dst);
+	sk->route_caps = rt->u.dst.dev->features;
+	tp->ext2_header_len = rt->u.dst.header_len;
+
 	if (!tp->write_seq)
 		tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,
 							   sk->sport, usin->sin_port);
@@ -853,14 +860,16 @@
 	sk->protinfo.af_inet.id = tp->write_seq^jiffies;
 
 	err = tcp_connect(sk);
+	rt = NULL;
 	if (err)
 		goto failure;
 
 	return 0;
 
 failure:
+	/* This unhashes the socket and releases the local port, if necessary. */
 	tcp_set_state(sk, TCP_CLOSE);
-	__sk_dst_reset(sk);
+	ip_rt_put(rt);
 	sk->route_caps = 0;
 	sk->dport = 0;
 	return err;
@@ -922,7 +931,7 @@
 /* 
  * This routine does path mtu discovery as defined in RFC1191.
  */
-static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip, unsigned mtu)
+static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip, u32 mtu)
 {
 	struct dst_entry *dst;
 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
@@ -943,17 +952,19 @@
 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
 		return;
 
-	ip_rt_update_pmtu(dst, mtu);
+	dst->ops->update_pmtu(dst, mtu);
 
 	/* Something is about to be wrong... Remember soft error
 	 * for the case, if this connection will not able to recover.
 	 */
-	if (mtu < dst->pmtu && ip_dont_fragment(sk, dst))
+	if (mtu < dst_pmtu(dst) && ip_dont_fragment(sk, dst))
 		sk->err_soft = EMSGSIZE;
 
+	mtu = dst_pmtu(dst);
+
 	if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT &&
-	    tp->pmtu_cookie > dst->pmtu) {
-		tcp_sync_mss(sk, dst->pmtu);
+	    tp->pmtu_cookie > mtu) {
+		tcp_sync_mss(sk, mtu);
 
 		/* Resend the TCP packet because it's  
 		 * clear that the old packet has been
@@ -1191,10 +1202,8 @@
 				      sizeof(struct tcphdr),
 				      IPPROTO_TCP,
 				      0); 
-	arg.n_iov = 1;
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2; 
 
-	tcp_socket->sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl;
 	ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
 
 	TCP_INC_STATS_BH(TcpOutSegs);
@@ -1219,7 +1228,6 @@
 
 	arg.iov[0].iov_base = (unsigned char *)&rep; 
 	arg.iov[0].iov_len  = sizeof(rep.th);
-	arg.n_iov = 1;
 	if (ts) {
 		rep.tsopt[0] = htonl((TCPOPT_NOP << 24) |
 				     (TCPOPT_NOP << 16) |
@@ -1270,14 +1278,20 @@
 static struct dst_entry* tcp_v4_route_req(struct sock *sk, struct open_request *req)
 {
 	struct rtable *rt;
-	struct ip_options *opt;
+	struct ip_options *opt = req->af.v4_req.opt;
+	struct flowi fl = { .oif = sk->bound_dev_if,
+			    .nl_u = { .ip4_u =
+				      { .daddr = ((opt && opt->srr) ?
+						  opt->faddr :
+						  req->af.v4_req.rmt_addr),
+					.saddr = req->af.v4_req.loc_addr,
+					.tos = RT_CONN_FLAGS(sk) } },
+			    .proto = IPPROTO_TCP,
+			    .uli_u = { .ports =
+				       { .sport = sk->sport,
+					 .dport = req->rmt_port } } };
 
-	opt = req->af.v4_req.opt;
-	if(ip_route_output(&rt, ((opt && opt->srr) ?
-				 opt->faddr :
-				 req->af.v4_req.rmt_addr),
-			   req->af.v4_req.loc_addr,
-			   RT_CONN_FLAGS(sk), sk->bound_dev_if)) {
+	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IpOutNoRoutes);
 		return NULL;
 	}
@@ -1500,7 +1514,7 @@
 			 (sysctl_max_syn_backlog - tcp_synq_len(sk)
 			  < (sysctl_max_syn_backlog>>2)) &&
 			 (!peer || !peer->tcp_ts_stamp) &&
-			 (!dst || !dst->rtt)) {
+			 (!dst || !dst_metric(dst, RTAX_RTT))) {
 			/* Without syncookies last quarter of
 			 * backlog is filled with destinations, proven to be alive.
 			 * It means that we continue to communicate
@@ -1572,10 +1586,11 @@
 	newtp->ext_header_len = 0;
 	if (newsk->protinfo.af_inet.opt)
 		newtp->ext_header_len = newsk->protinfo.af_inet.opt->optlen;
+	newtp->ext2_header_len = dst->header_len;
 	newsk->protinfo.af_inet.id = newtp->write_seq^jiffies;
 
-	tcp_sync_mss(newsk, dst->pmtu);
-	newtp->advmss = dst->advmss;
+	tcp_sync_mss(newsk, dst_pmtu(dst));
+	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);;
 	tcp_initialize_rcv_mss(newsk);
 
 	__tcp_v4_hash(newsk, 0);
@@ -1760,12 +1775,12 @@
 		goto no_tcp_socket;
 
 process:
-	if(!ipsec_sk_policy(sk,skb))
-		goto discard_and_relse;
-
 	if (sk->state == TCP_TIME_WAIT)
 		goto do_time_wait;
 
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto discard_and_relse;
+
 	if (sk_filter(sk, skb, 0))
 		goto discard_and_relse;
 
@@ -1785,6 +1800,9 @@
 	return ret;
 
 no_tcp_socket:
+	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+		goto discard_it;
+
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
 bad_packet:
 		TCP_INC_STATS_BH(TcpInErrs);
@@ -1802,6 +1820,9 @@
 	goto discard_it;
 
 do_time_wait:
+	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+		goto discard_and_relse;
+
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
 		TCP_INC_STATS_BH(TcpInErrs);
 		tcp_tw_put((struct tcp_tw_bucket *) sk);
@@ -1856,12 +1877,15 @@
 	/* Query new route. */
 	err = ip_route_connect(&rt, daddr, 0,
 			       RT_TOS(sk->protinfo.af_inet.tos)|sk->localroute,
-			       sk->bound_dev_if);
+			       sk->bound_dev_if,
+			       IPPROTO_TCP,
+			       sk->sport, sk->dport, sk);
 	if (err)
 		return err;
 
 	__sk_dst_set(sk, &rt->u.dst);
 	sk->route_caps = rt->u.dst.dev->features;
+	tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
 
 	new_saddr = rt->rt_src;
 
@@ -1904,11 +1928,23 @@
 	if(sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr)
 		daddr = sk->protinfo.af_inet.opt->faddr;
 
-	err = ip_route_output(&rt, daddr, sk->saddr,
-			      RT_CONN_FLAGS(sk), sk->bound_dev_if);
+	{
+		struct flowi fl = { .oif = sk->bound_dev_if,
+				    .nl_u = { .ip4_u =
+					      { .daddr = daddr,
+						.saddr = sk->saddr,
+						.tos = RT_CONN_FLAGS(sk) } },
+				    .proto = IPPROTO_TCP,
+				    .uli_u = { .ports =
+					       { .sport = sk->sport,
+						 .dport = sk->dport } } };
+						
+		err = ip_route_output_flow(&rt, &fl, sk, 0);
+	}
 	if (!err) {
 		__sk_dst_set(sk, &rt->u.dst);
 		sk->route_caps = rt->u.dst.dev->features;
+		tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
 		return 0;
 	}
 
@@ -2070,8 +2106,8 @@
 		tcp_put_port(sk);
 
 	/* If sendmsg cached page exists, toss it. */
-	if (tp->sndmsg_page != NULL)
-		__free_page(tp->sndmsg_page);
+	if (inet_sk(sk)->sndmsg_page)
+		__free_page(inet_sk(sk)->sndmsg_page);
 
 	atomic_dec(&tcp_sockets_allocated);
 
@@ -2329,7 +2365,7 @@
 	if ((err=ops->create(tcp_socket, IPPROTO_TCP))<0)
 		panic("Failed to create the TCP control socket.\n");
 	tcp_socket->sk->allocation=GFP_ATOMIC;
-	tcp_socket->sk->protinfo.af_inet.ttl = MAXTTL;
+	tcp_socket->sk->protinfo.af_inet.uc_ttl = -1;
 
 	/* Unhash it so that IP input processing does not even
 	 * see it, we do not wish this socket to see incoming
Index: net/ipv4/tcp_minisocks.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/tcp_minisocks.c,v
retrieving revision 1.1.1.22
retrieving revision 1.1.1.22.2.1
diff -u -r1.1.1.22 -r1.1.1.22.2.1
--- a/net/ipv4/tcp_minisocks.c	25 Aug 2003 11:44:44 -0000	1.1.1.22
+++ b/net/ipv4/tcp_minisocks.c	16 Apr 2004 13:16:23 -0000	1.1.1.22.2.1
@@ -25,6 +25,7 @@
 #include <linux/sysctl.h>
 #include <net/tcp.h>
 #include <net/inet_common.h>
+#include <net/xfrm.h>
 
 #ifdef CONFIG_SYSCTL
 #define SYNC_INIT 0 /* let the user enable it */
@@ -683,6 +684,13 @@
 		if ((filter = newsk->filter) != NULL)
 			sk_filter_charge(newsk, filter);
 #endif
+		if (unlikely(xfrm_sk_clone_policy(newsk))) {
+			/* It is still raw copy of parent, so invalidate
+			 * destructor and make plain sk_free() */
+			newsk->destruct = NULL;
+			sk_free(newsk);
+			return NULL;
+		}
 
 		/* Now setup tcp_opt */
 		newtp = &(newsk->tp_pinfo.af_tcp);
Index: net/ipv4/tcp_output.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/tcp_output.c,v
retrieving revision 1.1.1.27
retrieving revision 1.1.1.27.2.1
diff -u -r1.1.1.27 -r1.1.1.27.2.1
--- a/net/ipv4/tcp_output.c	28 Nov 2003 18:26:21 -0000	1.1.1.27
+++ b/net/ipv4/tcp_output.c	16 Apr 2004 13:16:23 -0000	1.1.1.27.2.1
@@ -89,8 +89,8 @@
 	struct dst_entry *dst = __sk_dst_get(sk);
 	int mss = tp->advmss;
 
-	if (dst && dst->advmss < mss) {
-		mss = dst->advmss;
+	if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) {
+		mss = dst_metric(dst, RTAX_ADVMSS);
 		tp->advmss = mss;
 	}
 
@@ -99,10 +99,10 @@
 
 /* RFC2861. Reset CWND after idle period longer RTO to "restart window".
  * This is the first part of cwnd validation mechanism. */
-static void tcp_cwnd_restart(struct tcp_opt *tp)
+static void tcp_cwnd_restart(struct tcp_opt *tp, struct dst_entry *dst)
 {
 	s32 delta = tcp_time_stamp - tp->lsndtime;
-	u32 restart_cwnd = tcp_init_cwnd(tp);
+	u32 restart_cwnd = tcp_init_cwnd(tp, dst);
 	u32 cwnd = tp->snd_cwnd;
 
 	tp->snd_ssthresh = tcp_current_ssthresh(tp);
@@ -115,12 +115,12 @@
 	tp->snd_cwnd_used = 0;
 }
 
-static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *skb)
+static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *skb, struct sock *sk)
 {
 	u32 now = tcp_time_stamp;
 
 	if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
-		tcp_cwnd_restart(tp);
+		tcp_cwnd_restart(tp, __sk_dst_get(sk));
 
 	tp->lsndtime = now;
 
@@ -271,7 +271,7 @@
 			tcp_event_ack_sent(sk);
 
 		if (skb->len != tcp_header_size)
-			tcp_event_data_sent(tp, skb);
+			tcp_event_data_sent(tp, skb, sk);
 
 		TCP_INC_STATS(TcpOutSegs);
 
@@ -502,13 +502,16 @@
 
 int tcp_sync_mss(struct sock *sk, u32 pmtu)
 {
-	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct tcp_opt *tp = tcp_sk(sk);
+	struct dst_entry *dst = __sk_dst_get(sk);
 	int mss_now;
 
+	if (dst && dst->ops->get_mss)
+		pmtu = dst->ops->get_mss(dst, pmtu);
+
 	/* Calculate base mss without TCP options:
 	   It is MMS_S - sizeof(tcphdr) of rfc1122
 	 */
-
 	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
 
 	/* Clamp it (mss_clamp does not include tcp options) */
@@ -516,7 +519,7 @@
 		mss_now = tp->mss_clamp;
 
 	/* Now subtract optional transport overhead */
-	mss_now -= tp->ext_header_len;
+	mss_now -= tp->ext_header_len + tp->ext2_header_len;
 
 	/* Then reserve room for full set of TCP options and 8 bytes of data */
 	if (mss_now < 48)
@@ -1131,10 +1134,10 @@
 	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
 		__u8 rcv_wscale; 
 		/* Set this up on the first call only */
-		req->window_clamp = tp->window_clamp ? : dst->window;
+		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
 		/* tcp_full_space because it is guaranteed to be the first packet */
 		tcp_select_initial_window(tcp_full_space(sk), 
-			dst->advmss - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+			dst_metric(dst, RTAX_ADVMSS) - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
 			&req->rcv_wnd,
 			&req->window_clamp,
 			req->wscale_ok,
@@ -1146,7 +1149,7 @@
 	th->window = htons(req->rcv_wnd);
 
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	tcp_syn_build_options((__u32 *)(th + 1), dst->advmss, req->tstamp_ok,
+	tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), req->tstamp_ok,
 			      req->sack_ok, req->wscale_ok, req->rcv_wscale,
 			      TCP_SKB_CB(skb)->when,
 			      req->ts_recent);
@@ -1175,11 +1178,11 @@
 	if (tp->user_mss)
 		tp->mss_clamp = tp->user_mss;
 	tp->max_window = 0;
-	tcp_sync_mss(sk, dst->pmtu);
+	tcp_sync_mss(sk, dst_pmtu(dst));
 
 	if (!tp->window_clamp)
-		tp->window_clamp = dst->window;
-	tp->advmss = dst->advmss;
+		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
+	tp->advmss = dst_metric(dst, RTAX_ADVMSS);
 	tcp_initialize_rcv_mss(sk);
 
 	tcp_select_initial_window(tcp_full_space(sk),
Index: net/ipv4/udp.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/udp.c,v
retrieving revision 1.1.1.25
retrieving revision 1.1.1.25.2.1
diff -u -r1.1.1.25 -r1.1.1.25.2.1
--- a/net/ipv4/udp.c	14 Apr 2004 13:05:41 -0000	1.1.1.25
+++ b/net/ipv4/udp.c	16 Apr 2004 13:16:23 -0000	1.1.1.25.2.1
@@ -11,6 +11,7 @@
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  *		Alan Cox, <Alan.Cox@linux.org>
+ *		Hirokazu Takahashi, <taka@valinux.co.jp>
  *
  * Fixes:
  *		Alan Cox	:	verify_area() calls
@@ -64,6 +65,10 @@
  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
  *	Alexey Kuznetsov:		allow both IPv4 and IPv6 sockets to bind
  *					a single port at the same time.
+ *	Hirokazu Takahashi	:	HW checksumming for outgoing UDP
+ *					datagrams.
+ *	Hirokazu Takahashi	:	sendfile() on UDP works now.
+ *	Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
  *
  *
  *		This program is free software; you can redistribute it and/or
@@ -97,6 +102,7 @@
 #include <net/route.h>
 #include <net/inet_common.h>
 #include <net/checksum.h>
+#include <net/xfrm.h>
 
 /*
  *	Snmp MIB for the UDP layer
@@ -371,80 +377,119 @@
 	sock_put(sk);
 }
 
-
-static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
-{
-	return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
-}
-
-struct udpfakehdr 
-{
-	struct udphdr uh;
-	u32 saddr;
-	u32 daddr;
-	struct iovec *iov;
-	u32 wcheck;
-};
-
 /*
- *	Copy and checksum a UDP packet from user space into a buffer.
+ * Throw away all pending data and cancel the corking. Socket is locked.
  */
- 
-static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen) 
+static void udp_flush_pending_frames(struct sock *sk)
 {
-	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
-	if (offset==0) {
-		if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
-						   fraglen-sizeof(struct udphdr), &ufh->wcheck))
-			return -EFAULT;
- 		ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
-					   ufh->wcheck);
-		ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr, 
-					  ntohs(ufh->uh.len),
-					  IPPROTO_UDP, ufh->wcheck);
-		if (ufh->uh.check == 0)
-			ufh->uh.check = -1;
-		memcpy(to, ufh, sizeof(struct udphdr));
-		return 0;
+	struct udp_opt *up = udp_sk(sk);
+
+	if (up->pending) {
+		up->len = 0;
+		up->pending = 0;
+		ip_flush_pending_frames(sk);
 	}
-	if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
-					   fraglen, &ufh->wcheck))
-		return -EFAULT;
-	return 0;
 }
 
 /*
- *	Copy a UDP packet from user space into a buffer without checksumming.
+ * Push out all pending data as one UDP datagram. Socket is locked.
  */
- 
-static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen) 
+static int udp_push_pending_frames(struct sock *sk, struct udp_opt *up)
 {
-	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
+	struct sk_buff *skb;
+	struct udphdr *uh;
+	int err = 0;
+
+	/* Grab the skbuff where UDP header space exists. */
+	if ((skb = skb_peek(&sk->write_queue)) == NULL)
+		goto out;
+
+	/*
+	 * Create a UDP header
+	 */
+	uh = skb->h.uh;
+	uh->source = up->sport;
+	uh->dest = up->dport;
+	uh->len = htons(up->len);
+	uh->check = 0;
 
-	if (offset==0) {
-		memcpy(to, ufh, sizeof(struct udphdr));
-		return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
-					   fraglen-sizeof(struct udphdr));
+	if (sk->no_check == UDP_CSUM_NOXMIT) {
+		skb->ip_summed = CHECKSUM_NONE;
+		goto send;
 	}
-	return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
-				   fraglen);
+
+	if (skb_queue_len(&sk->write_queue) == 1) {
+		/*
+		 * Only one fragment on the socket.
+		 */
+		if (skb->ip_summed == CHECKSUM_HW) {
+			skb->csum = offsetof(struct udphdr, check);
+			uh->check = ~csum_tcpudp_magic(up->saddr, up->daddr,
+					up->len, IPPROTO_UDP, 0);
+		} else {
+			skb->csum = csum_partial((char *)uh,
+					sizeof(struct udphdr), skb->csum);
+			uh->check = csum_tcpudp_magic(up->saddr, up->daddr,
+					up->len, IPPROTO_UDP, skb->csum);
+			if (uh->check == 0)
+				uh->check = -1;
+		}
+	} else {
+		unsigned int csum = 0;
+		/*
+		 * HW-checksum won't work as there are two or more 
+		 * fragments on the socket so that all csums of sk_buffs
+		 * should be together.
+		 */
+		if (skb->ip_summed == CHECKSUM_HW) {
+			int offset = (unsigned char *)uh - skb->data;
+			skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+			skb->ip_summed = CHECKSUM_NONE;
+		} else {
+			skb->csum = csum_partial((char *)uh,
+					sizeof(struct udphdr), skb->csum);
+		}
+
+		skb_queue_walk(&sk->write_queue, skb) {
+			csum = csum_add(csum, skb->csum);
+		}
+		uh->check = csum_tcpudp_magic(up->saddr, up->daddr,
+				up->len, IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = -1;
+	}
+send:
+	err = ip_push_pending_frames(sk);
+out:
+	up->len = 0;
+	up->pending = 0;
+	return err;
+}
+
+
+static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
+{
+	return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
 }
 
 int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
 {
-	int ulen = len + sizeof(struct udphdr);
+	struct udp_opt *up = udp_sk(sk);
+	int ulen = len;
 	struct ipcm_cookie ipc;
-	struct udpfakehdr ufh;
 	struct rtable *rt = NULL;
 	int free = 0;
 	int connected = 0;
-	u32 daddr;
+	u32 daddr, faddr, saddr;
+	u16 dport;
 	u8  tos;
 	int err;
+	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
 
 	/* This check is ONLY to check for arithmetic overflow
 	   on integer(!) len. Not more! Real check will be made
-	   in ip_build_xmit --ANK
+	   in ip_append_* --ANK
 
 	   BTW socket.c -> af_*.c -> ... make multiple
 	   invalid conversions size_t -> int. We MUST repair it f.e.
@@ -463,10 +508,23 @@
 	if (msg->msg_flags&MSG_OOB)	/* Mirror BSD error message compatibility */
 		return -EOPNOTSUPP;
 
+	ipc.opt = NULL;
+
+	if (up->pending) {
+		/*
+		 * There are pending frames.
+	 	 * The socket lock must be held while it's corked.
+		 */
+		lock_sock(sk);
+		if (likely(up->pending))
+ 			goto do_append_data;
+		release_sock(sk);
+	}
+	ulen += sizeof(struct udphdr);
+
 	/*
 	 *	Get and verify the address. 
 	 */
-	 
 	if (msg->msg_name) {
 		struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
 		if (msg->msg_namelen < sizeof(*usin))
@@ -476,24 +534,22 @@
 				return -EINVAL;
 		}
 
-		ufh.daddr = usin->sin_addr.s_addr;
-		ufh.uh.dest = usin->sin_port;
-		if (ufh.uh.dest == 0)
+		daddr = usin->sin_addr.s_addr;
+		dport = usin->sin_port;
+		if (dport == 0)
 			return -EINVAL;
 	} else {
 		if (sk->state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
-		ufh.daddr = sk->daddr;
-		ufh.uh.dest = sk->dport;
+		daddr = sk->daddr;
+		dport = sk->dport;
 		/* Open fast path for connected socket.
 		   Route will not be used, if at least one option is set.
 		 */
 		connected = 1;
   	}
 	ipc.addr = sk->saddr;
-	ufh.uh.source = sk->sport;
 
-	ipc.opt = NULL;
 	ipc.oif = sk->bound_dev_if;
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(msg, &ipc);
@@ -506,13 +562,13 @@
 	if (!ipc.opt)
 		ipc.opt = sk->protinfo.af_inet.opt;
 
-	ufh.saddr = ipc.addr;
-	ipc.addr = daddr = ufh.daddr;
+	saddr = ipc.addr;
+	ipc.addr = faddr = daddr;
 
 	if (ipc.opt && ipc.opt->srr) {
 		if (!daddr)
 			return -EINVAL;
-		daddr = ipc.opt->faddr;
+		faddr = ipc.opt->faddr;
 		connected = 0;
 	}
 	tos = RT_TOS(sk->protinfo.af_inet.tos);
@@ -525,8 +581,8 @@
 	if (MULTICAST(daddr)) {
 		if (!ipc.oif)
 			ipc.oif = sk->protinfo.af_inet.mc_index;
-		if (!ufh.saddr)
-			ufh.saddr = sk->protinfo.af_inet.mc_addr;
+		if (!saddr)
+			saddr = sk->protinfo.af_inet.mc_addr;
 		connected = 0;
 	}
 
@@ -534,7 +590,16 @@
 		rt = (struct rtable*)sk_dst_check(sk, 0);
 
 	if (rt == NULL) {
-		err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
+		struct flowi fl = { .oif = ipc.oif,
+				    .nl_u = { .ip4_u =
+					      { .daddr = faddr,
+						.saddr = saddr,
+						.tos = tos } },
+				    .proto = IPPROTO_UDP,
+				    .uli_u = { .ports =
+					       { .sport = sk->sport,
+						 .dport = dport } } };
+		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
 		if (err)
 			goto out;
 
@@ -549,23 +614,39 @@
 		goto do_confirm;
 back_from_confirm:
 
-	ufh.saddr = rt->rt_src;
+	saddr = rt->rt_src;
 	if (!ipc.addr)
-		ufh.daddr = ipc.addr = rt->rt_dst;
-	ufh.uh.len = htons(ulen);
-	ufh.uh.check = 0;
-	ufh.iov = msg->msg_iov;
-	ufh.wcheck = 0;
-
-	/* RFC1122: OK.  Provides the checksumming facility (MUST) as per */
-	/* 4.1.3.4. It's configurable by the application via setsockopt() */
-	/* (MAY) and it defaults to on (MUST). */
-
-	err = ip_build_xmit(sk,
-			    (sk->no_check == UDP_CSUM_NOXMIT ?
-			     udp_getfrag_nosum :
-			     udp_getfrag),
-			    &ufh, ulen, &ipc, rt, msg->msg_flags);
+		daddr = ipc.addr = rt->rt_dst;
+
+	lock_sock(sk);
+	if (unlikely(up->pending)) {
+		/* The socket is already corked while preparing it. */
+		/* ... which is an evident application bug. --ANK */
+		release_sock(sk);
+
+		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n"));
+		err = -EINVAL;
+		goto out;
+	}
+	/*
+	 *	Now cork the socket to pend data.
+	 */
+	up->daddr = daddr;
+	up->dport = dport;
+	up->saddr = saddr;
+	up->sport = sk->sport;
+	up->pending = 1;
+
+do_append_data:
+	up->len += ulen;
+	err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, 
+			sizeof(struct udphdr), &ipc, rt, 
+			corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
+	if (err)
+		udp_flush_pending_frames(sk);
+	else if (!corkreq)
+		err = udp_push_pending_frames(sk, up);
+	release_sock(sk);
 
 out:
 	ip_rt_put(rt);
@@ -585,6 +666,52 @@
 	goto out;
 }
 
+int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags)
+{
+	struct udp_opt *up = udp_sk(sk);
+	int ret;
+
+	if (!up->pending) {
+		struct msghdr msg = {	.msg_flags = flags|MSG_MORE };
+
+		/* Call udp_sendmsg to specify destination address which
+		 * sendpage interface can't pass.
+		 * This will succeed only when the socket is connected.
+		 */
+		ret = udp_sendmsg(sk, &msg, 0);
+		if (ret < 0)
+			return ret;
+	}
+
+	lock_sock(sk);
+
+	if (unlikely(!up->pending)) {
+		release_sock(sk);
+
+		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 3\n"));
+		return -EINVAL;
+	}
+
+	ret = ip_append_page(sk, page, offset, size, flags);
+	if (ret == -EOPNOTSUPP) {
+		release_sock(sk);
+		return sock_no_sendpage(sk->socket, page, offset, size, flags);
+	}
+	if (ret < 0) {
+		udp_flush_pending_frames(sk);
+		goto out;
+	}
+
+	up->len += size;
+	if (!(up->corkflag || (flags&MSG_MORE)))
+		ret = udp_push_pending_frames(sk, up);
+	if (!ret)
+		ret = size;
+out:
+	release_sock(sk);
+	return ret;
+}
+
 /*
  *	IOCTL requests applicable to the UDP protocol
  */
@@ -754,7 +881,9 @@
 			saddr = sk->protinfo.af_inet.mc_addr;
 	}
 	err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
-			       RT_CONN_FLAGS(sk), oif);
+			       RT_CONN_FLAGS(sk), oif,
+			       IPPROTO_UDP,
+			       sk->sport, usin->sin_port, sk);
 	if (err)
 		return err;
 	if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
@@ -805,11 +934,129 @@
 	inet_sock_release(sk);
 }
 
+/* return:
+ * 	1  if the the UDP system should process it
+ *	0  if we should drop this packet
+ * 	-1 if it should get processed by xfrm4_rcv_encap
+ */
+static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
+{
+#ifndef CONFIG_XFRM
+	return 1; 
+#else
+	struct udp_opt *up = udp_sk(sk);
+  	struct udphdr *uh = skb->h.uh;
+	struct iphdr *iph;
+	int iphlen, len;
+  
+	__u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr);
+	__u32 *udpdata32 = (__u32 *)udpdata;
+	__u16 encap_type = up->encap_type;
+
+	/* if we're overly short, let UDP handle it */
+	if (udpdata > skb->tail)
+		return 1;
+
+	/* if this is not encapsulated socket, then just return now */
+	if (!encap_type)
+		return 1;
+
+	len = skb->tail - udpdata;
+
+	switch (encap_type) {
+	case UDP_ENCAP_ESPINUDP:
+		/* Check if this is a keepalive packet.  If so, eat it. */
+		if (len == 1 && udpdata[0] == 0xff) {
+			return 0;
+		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0 ) {
+			/* ESP Packet without Non-ESP header */
+			len = sizeof(struct udphdr);
+		} else
+			/* Must be an IKE packet.. pass it through */
+			return 1;
+
+		/* At this point we are sure that this is an ESPinUDP packet,
+		 * so we need to remove 'len' bytes from the packet (the UDP
+		 * header and optional ESP marker bytes) and then modify the
+		 * protocol to ESP, and then call into the transform receiver.
+		 */
+
+		/* Now we can update and verify the packet length... */
+		iph = skb->nh.iph;
+		iphlen = iph->ihl << 2;
+		iph->tot_len = htons(ntohs(iph->tot_len) - len);
+		if (skb->len < iphlen + len) {
+			/* packet is too small!?! */
+			return 0;
+		}
+
+		/* pull the data buffer up to the ESP header and set the
+		 * transport header to point to ESP.  Keep UDP on the stack
+		 * for later.
+		 */
+		skb->h.raw = skb_pull(skb, len);
+
+		/* modify the protocol (it's ESP!) */
+		iph->protocol = IPPROTO_ESP;
+
+		/* and let the caller know to send this into the ESP processor... */
+		return -1;
+
+	default:
+		if (net_ratelimit())
+			printk(KERN_INFO "udp_encap_rcv(): Unhandled UDP encap type: %u\n",
+			       encap_type);
+		return 1;
+	}
+#endif
+}
+
+/* returns:
+ *  -1: error
+ *   0: success
+ *  >0: "udp encap" protocol resubmission
+ *
+ * Note that in the success and error cases, the skb is assumed to
+ * have either been requeued or freed.
+ */
 static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
+	struct udp_opt *up = udp_sk(sk);
+
 	/*
 	 *	Charge it to the socket, dropping if the queue is full.
 	 */
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+		kfree_skb(skb);
+		return -1;
+	}
+
+	if (up->encap_type) {
+		/*
+		 * This is an encapsulation socket, so let's see if this is
+		 * an encapsulated packet.
+		 * If it's a keepalive packet, then just eat it.
+		 * If it's an encapsulateed packet, then pass it to the
+		 * IPsec xfrm input and return the response
+		 * appropriately.  Otherwise, just fall through and
+		 * pass this up the UDP socket.
+		 */
+		int ret;
+
+		ret = udp_encap_rcv(sk, skb);
+		if (ret == 0) {
+			/* Eat the packet .. */
+			kfree_skb(skb);
+			return 0;
+		}
+		if (ret < 0) {
+			/* process the ESP packet */
+			ret = xfrm4_rcv_encap(skb, up->encap_type);
+			UDP_INC_STATS_BH(UdpInDatagrams);
+			return -ret;
+		}
+		/* FALLTHROUGH -- it's a UDP Packet */
+	}
 
 #if defined(CONFIG_FILTER)
 	if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
@@ -862,8 +1109,13 @@
 			if(sknext)
 				skb1 = skb_clone(skb, GFP_ATOMIC);
 
-			if(skb1)
-				udp_queue_rcv_skb(sk, skb1);
+			if(skb1) {
+				int ret = udp_queue_rcv_skb(sk, skb1);
+				if (ret > 0)
+					/* we should probably re-process instead
+					 * of dropping packets here. */
+					kfree_skb(skb1);
+			}
 			sk = sknext;
 		} while(sknext);
 	} else
@@ -938,11 +1190,20 @@
 	sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
 
 	if (sk != NULL) {
-		udp_queue_rcv_skb(sk, skb);
+		int ret = udp_queue_rcv_skb(sk, skb);
 		sock_put(sk);
+
+		/* a return value > 0 means to resubmit the input, but
+		 * it it wants the return to be -protocol, or 0
+		 */
+		if (ret > 0)
+			return -ret;
 		return 0;
 	}
 
+	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+		goto drop;
+
 	/* No socket. Drop packet silently, if checksum is wrong */
 	if (udp_checksum_complete(skb))
 		goto csum_error;
@@ -983,6 +1244,7 @@
 			NIPQUAD(daddr),
 			ntohs(uh->dest),
 			ulen));
+drop:
 	UDP_INC_STATS_BH(UdpInErrors);
 	kfree_skb(skb);
 	return(0);
@@ -1047,16 +1309,107 @@
 	return len;
 }
 
+static int udp_destroy_sock(struct sock *sk)
+{
+	lock_sock(sk);
+	udp_flush_pending_frames(sk);
+	release_sock(sk);
+	return 0;
+}
+
+/*
+ *	Socket option code for UDP
+ */
+static int udp_setsockopt(struct sock *sk, int level, int optname, 
+			  char *optval, int optlen)
+{
+	struct udp_opt *up = udp_sk(sk);
+	int val;
+	int err = 0;
+
+	if (level != SOL_UDP)
+		return ip_setsockopt(sk, level, optname, optval, optlen);
+
+	if(optlen<sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int *)optval))
+		return -EFAULT;
+
+	switch(optname) {
+	case UDP_CORK:
+		if (val != 0) {
+			up->corkflag = 1;
+		} else {
+			up->corkflag = 0;
+			lock_sock(sk);
+			udp_push_pending_frames(sk, up);
+			release_sock(sk);
+		}
+		break;
+		
+	case UDP_ENCAP:
+		up->encap_type = val;
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	};
+
+	return err;
+}
+
+static int udp_getsockopt(struct sock *sk, int level, int optname, 
+			  char *optval, int *optlen)
+{
+	struct udp_opt *up = udp_sk(sk);
+	int val, len;
+
+	if (level != SOL_UDP)
+		return ip_getsockopt(sk, level, optname, optval, optlen);
+
+	if(get_user(len,optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+	
+	if(len < 0)
+		return -EINVAL;
+
+	switch(optname) {
+	case UDP_CORK:
+		val = up->corkflag;
+		break;
+
+	case UDP_ENCAP:
+		val = up->encap_type;
+		break;
+
+	default:
+		return -ENOPROTOOPT;
+	};
+
+  	if(put_user(len, optlen))
+  		return -EFAULT;
+	if(copy_to_user(optval, &val,len))
+		return -EFAULT;
+  	return 0;
+}
+
+
 struct proto udp_prot = {
  	name:		"UDP",
 	close:		udp_close,
 	connect:	udp_connect,
 	disconnect:	udp_disconnect,
 	ioctl:		udp_ioctl,
-	setsockopt:	ip_setsockopt,
-	getsockopt:	ip_getsockopt,
+	destroy:	udp_destroy_sock,
+	setsockopt:	udp_setsockopt,
+	getsockopt:	udp_getsockopt,
 	sendmsg:	udp_sendmsg,
 	recvmsg:	udp_recvmsg,
+	sendpage:	udp_sendpage,
 	backlog_rcv:	udp_queue_rcv_skb,
 	hash:		udp_v4_hash,
 	unhash:		udp_v4_unhash,
Index: net/ipv4/xfrm4_input.c
===================================================================
RCS file: net/ipv4/xfrm4_input.c
diff -N net/ipv4/xfrm4_input.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv4/xfrm4_input.c	16 Apr 2004 13:16:23 -0000	1.6.14.1
@@ -0,0 +1,158 @@
+/*
+ * xfrm4_input.c
+ *
+ * Changes:
+ *	YOSHIFUJI Hideaki @USAGI
+ *		Split up af-specific portion
+ *	Derek Atkins <derek@ihtfp.com>
+ *		Add Encapsulation support
+ * 	
+ */
+
+#include <linux/string.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+
+int xfrm4_rcv(struct sk_buff *skb)
+{
+	return xfrm4_rcv_encap(skb, 0);
+}
+
+static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
+{
+	struct iphdr *outer_iph = skb->nh.iph;
+	struct iphdr *inner_iph = skb->h.ipiph;
+
+	if (INET_ECN_is_ce(outer_iph->tos) &&
+	    INET_ECN_is_not_ce(inner_iph->tos))
+		IP_ECN_set_ce(inner_iph);
+}
+
+static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
+{
+	switch (nexthdr) {
+	case IPPROTO_IPIP:
+		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+			return -EINVAL;
+		*spi = skb->nh.iph->saddr;
+		*seq = 0;
+		return 0;
+	}
+
+	return xfrm_parse_spi(skb, nexthdr, spi, seq);
+}
+
+int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
+{
+	int err;
+	u32 spi, seq;
+	struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH];
+	struct xfrm_state *x;
+	int xfrm_nr = 0;
+	int decaps = 0;
+	int hhlen;
+
+	hhlen = skb->nh.raw - skb->mac.raw;
+
+	if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) != 0)
+		goto drop;
+
+	do {
+		struct iphdr *iph = skb->nh.iph;
+
+		if (xfrm_nr == XFRM_MAX_DEPTH)
+			goto drop_put;
+
+		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, iph->protocol, AF_INET);
+		if (x == NULL)
+			goto drop_put;
+
+		spin_lock(&x->lock);
+		if (unlikely(x->km.state != XFRM_STATE_VALID))
+			goto drop_unlock;
+
+		if (x->props.replay_window && xfrm_replay_check(x, seq))
+			goto drop_unlock;
+
+		if (xfrm_state_check_expire(x))
+			goto drop_unlock;
+
+		xfrm_vec[xfrm_nr].decap.decap_type = encap_type;
+		if (x->type->input(x, &(xfrm_vec[xfrm_nr].decap), skb))
+			goto drop_unlock;
+
+		/* only the first xfrm gets the encap type */
+		encap_type = 0;
+
+		if (x->props.replay_window)
+			xfrm_replay_advance(x, seq);
+
+		x->curlft.bytes += skb->len;
+		x->curlft.packets++;
+
+		spin_unlock(&x->lock);
+
+		xfrm_vec[xfrm_nr++].xvec = x;
+
+		if (x->props.mode) {
+			if (skb->nh.iph->protocol != IPPROTO_IPIP)
+				goto drop_put;
+			decaps = 1;
+			break;
+		}
+
+		if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) < 0)
+			goto drop_put;
+	} while (!err);
+
+	/* Allocate new secpath or COW existing one. */
+
+	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
+		struct sec_path *sp;
+		sp = secpath_dup(skb->sp);
+		if (!sp)
+			goto drop_put;
+		if (skb->sp)
+			secpath_put(skb->sp);
+		skb->sp = sp;
+	}
+	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
+		goto drop_put;
+
+	memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
+	skb->sp->len += xfrm_nr;
+
+	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		goto drop;
+
+	if (decaps) {
+		skb->mac.raw = memmove(skb->data - hhlen, skb->mac.raw, hhlen);
+		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+			goto drop;
+		if (!(x->props.flags & XFRM_STATE_NOECN))
+			ipip_ecn_decapsulate(skb);
+		skb->nh.raw = skb->data;
+		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+		if (!(skb->dev->flags&IFF_LOOPBACK)) {
+			dst_release(skb->dst);
+			skb->dst = NULL;
+		}
+		netif_rx(skb);
+		return 0;
+	} else {
+		skb->mac.raw = memmove(skb->nh.raw - hhlen, skb->mac.raw,
+				       hhlen);
+		return -skb->nh.iph->protocol;
+	}
+
+drop_unlock:
+	spin_unlock(&x->lock);
+	xfrm_state_put(x);
+drop_put:
+	while (--xfrm_nr >= 0)
+		xfrm_state_put(xfrm_vec[xfrm_nr].xvec);
+drop:
+	kfree_skb(skb);
+	return 0;
+}
Index: net/ipv4/xfrm4_policy.c
===================================================================
RCS file: net/ipv4/xfrm4_policy.c
diff -N net/ipv4/xfrm4_policy.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv4/xfrm4_policy.c	16 Apr 2004 13:16:23 -0000	1.4.18.1
@@ -0,0 +1,278 @@
+/* 
+ * xfrm4_policy.c
+ *
+ * Changes:
+ *	Kazunori MIYAZAWA @USAGI
+ * 	YOSHIFUJI Hideaki @USAGI
+ *		Split up af-specific portion
+ * 	
+ */
+
+#include <linux/config.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+
+extern struct dst_ops xfrm4_dst_ops;
+extern struct xfrm_policy_afinfo xfrm4_policy_afinfo;
+
+static struct xfrm_type_map xfrm4_type_map = { .lock = RW_LOCK_UNLOCKED };
+
+static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
+{
+	return __ip_route_output_key((struct rtable**)dst, fl);
+}
+
+/* Check that the bundle accepts the flow and its components are
+ * still valid.
+ */
+
+static int __xfrm4_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl)
+{
+	do {
+		if (xdst->u.dst.ops != &xfrm4_dst_ops)
+			return 1;
+
+		if (!xfrm_selector_match(&xdst->u.dst.xfrm->sel, fl, AF_INET))
+			return 0;
+		if (xdst->u.dst.xfrm->km.state != XFRM_STATE_VALID ||
+		    xdst->u.dst.path->obsolete > 0)
+			return 0;
+		xdst = (struct xfrm_dst*)xdst->u.dst.child;
+	} while (xdst);
+	return 0;
+}
+
+static struct dst_entry *
+__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
+{
+	struct dst_entry *dst;
+
+	read_lock_bh(&policy->lock);
+	for (dst = policy->bundles; dst; dst = dst->next) {
+		struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
+		if (xdst->u.rt.fl.oif == fl->oif &&	/*XXX*/
+		    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
+	    	    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
+		    __xfrm4_bundle_ok(xdst, fl)) {
+			dst_clone(dst);
+			break;
+		}
+	}
+	read_unlock_bh(&policy->lock);
+	return dst;
+}
+
+/* Allocate chain of dst_entry's, attach known xfrm's, calculate
+ * all the metrics... Shortly, bundle a bundle.
+ */
+
+static int
+__xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
+		      struct flowi *fl, struct dst_entry **dst_p)
+{
+	struct dst_entry *dst, *dst_prev;
+	struct rtable *rt0 = (struct rtable*)(*dst_p);
+	struct rtable *rt = rt0;
+	u32 remote = fl->fl4_dst;
+	u32 local  = fl->fl4_src;
+	int i;
+	int err;
+	int header_len = 0;
+	int trailer_len = 0;
+
+	dst = dst_prev = NULL;
+
+	for (i = 0; i < nx; i++) {
+		struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
+
+		if (unlikely(dst1 == NULL)) {
+			err = -ENOBUFS;
+			goto error;
+		}
+
+		dst1->xfrm = xfrm[i];
+		if (!dst)
+			dst = dst1;
+		else {
+			dst_prev->child = dst1;
+			dst1->flags |= DST_NOHASH;
+			dst_clone(dst1);
+		}
+		dst_prev = dst1;
+		if (xfrm[i]->props.mode) {
+			remote = xfrm[i]->id.daddr.a4;
+			local  = xfrm[i]->props.saddr.a4;
+		}
+		header_len += xfrm[i]->props.header_len;
+		trailer_len += xfrm[i]->props.trailer_len;
+	}
+
+	if (remote != fl->fl4_dst) {
+		struct flowi fl_tunnel = { .nl_u = { .ip4_u =
+						     { .daddr = remote,
+						       .saddr = local }
+					           }
+				         };
+		err = xfrm_dst_lookup((struct xfrm_dst**)&rt, &fl_tunnel, AF_INET);
+		if (err)
+			goto error;
+	} else {
+		dst_hold(&rt->u.dst);
+	}
+	dst_prev->child = &rt->u.dst;
+	for (dst_prev = dst; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
+		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
+		x->u.rt.fl = *fl;
+
+		dst_prev->dev = rt->u.dst.dev;
+		if (rt->u.dst.dev)
+			dev_hold(rt->u.dst.dev);
+		dst_prev->obsolete	= -1;
+		dst_prev->flags	       |= DST_HOST;
+		dst_prev->lastuse	= jiffies;
+		dst_prev->header_len	= header_len;
+		dst_prev->trailer_len	= trailer_len;
+		memcpy(&dst_prev->metrics, &rt->u.dst.metrics, sizeof(dst_prev->metrics));
+		dst_prev->path		= &rt->u.dst;
+
+		/* Copy neighbout for reachability confirmation */
+		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
+		dst_prev->input		= rt->u.dst.input;
+		dst_prev->output	= dst_prev->xfrm->type->output;
+		if (rt->peer)
+			atomic_inc(&rt->peer->refcnt);
+		x->u.rt.peer = rt->peer;
+		/* Sheit... I remember I did this right. Apparently,
+		 * it was magically lost, so this code needs audit */
+		x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
+		x->u.rt.rt_type = rt->rt_type;
+		x->u.rt.rt_src = rt0->rt_src;
+		x->u.rt.rt_dst = rt0->rt_dst;
+		x->u.rt.rt_gateway = rt->rt_gateway;
+		x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
+		header_len -= x->u.dst.xfrm->props.header_len;
+		trailer_len -= x->u.dst.xfrm->props.trailer_len;
+	}
+	*dst_p = dst;
+	return 0;
+
+error:
+	if (dst)
+		dst_free(dst);
+	return err;
+}
+
+static void
+_decode_session4(struct sk_buff *skb, struct flowi *fl)
+{
+	struct iphdr *iph = skb->nh.iph;
+	u8 *xprth = skb->nh.raw + iph->ihl*4;
+
+	memset(fl, 0, sizeof(struct flowi));
+	if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
+		switch (iph->protocol) {
+		case IPPROTO_UDP:
+		case IPPROTO_TCP:
+		case IPPROTO_SCTP:
+			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
+				u16 *ports = (u16 *)xprth;
+
+				fl->fl_ip_sport = ports[0];
+				fl->fl_ip_dport = ports[1];
+			}
+			break;
+
+		case IPPROTO_ESP:
+			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
+				u32 *ehdr = (u32 *)xprth;
+
+				fl->fl_ipsec_spi = ehdr[0];
+			}
+			break;
+
+		case IPPROTO_AH:
+			if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
+				u32 *ah_hdr = (u32*)xprth;
+
+				fl->fl_ipsec_spi = ah_hdr[1];
+			}
+			break;
+
+		case IPPROTO_COMP:
+			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
+				u16 *ipcomp_hdr = (u16 *)xprth;
+
+				fl->fl_ipsec_spi = ntohl(ntohs(ipcomp_hdr[1]));
+			}
+			break;
+		default:
+			fl->fl_ipsec_spi = 0;
+			break;
+		};
+	}
+	fl->proto = iph->protocol;
+	fl->fl4_dst = iph->daddr;
+	fl->fl4_src = iph->saddr;
+}
+
+static inline int xfrm4_garbage_collect(void)
+{
+	read_lock(&xfrm4_policy_afinfo.lock);
+	xfrm4_policy_afinfo.garbage_collect();
+	read_unlock(&xfrm4_policy_afinfo.lock);
+	return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
+}
+
+static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+	struct dst_entry *path = dst->path;
+
+	if (mtu < 68 + dst->header_len)
+		return;
+
+	path->ops->update_pmtu(path, mtu);
+}
+
+struct dst_ops xfrm4_dst_ops = {
+	.family =		AF_INET,
+	.protocol =		__constant_htons(ETH_P_IP),
+	.gc =			xfrm4_garbage_collect,
+	.update_pmtu =		xfrm4_update_pmtu,
+	.gc_thresh =		1024,
+	.entry_size =		sizeof(struct xfrm_dst),
+};
+
+struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
+	.family = 		AF_INET,
+	.lock = 		RW_LOCK_UNLOCKED,
+	.type_map = 		&xfrm4_type_map,
+	.dst_ops =		&xfrm4_dst_ops,
+	.dst_lookup =		xfrm4_dst_lookup,
+	.find_bundle = 		__xfrm4_find_bundle,
+	.bundle_create =	__xfrm4_bundle_create,
+	.decode_session =	_decode_session4,
+};
+
+void __init xfrm4_policy_init(void)
+{
+	xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
+}
+
+void __exit xfrm4_policy_fini(void)
+{
+	xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
+}
+
+void __init xfrm4_init(void)
+{
+	xfrm4_state_init();
+	xfrm4_policy_init();
+}
+
+void __exit xfrm4_fini(void)
+{
+	//xfrm4_input_fini();
+	xfrm4_policy_fini();
+	xfrm4_state_fini();
+}
+
Index: net/ipv4/xfrm4_state.c
===================================================================
RCS file: net/ipv4/xfrm4_state.c
diff -N net/ipv4/xfrm4_state.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv4/xfrm4_state.c	16 Apr 2004 13:16:23 -0000	1.3.18.1
@@ -0,0 +1,128 @@
+/*
+ * xfrm4_state.c
+ *
+ * Changes:
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific portion
+ *
+ */
+
+#include <net/xfrm.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+
+extern struct xfrm_state_afinfo xfrm4_state_afinfo;
+
+static void
+__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
+		     struct xfrm_tmpl *tmpl,
+		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	x->sel.daddr.a4 = fl->fl4_dst;
+	x->sel.saddr.a4 = fl->fl4_src;
+	x->sel.dport = fl->fl_ip_dport;
+	x->sel.dport_mask = ~0;
+	x->sel.sport = fl->fl_ip_sport;
+	x->sel.sport_mask = ~0;
+	x->sel.prefixlen_d = 32;
+	x->sel.prefixlen_s = 32;
+	x->sel.proto = fl->proto;
+	x->sel.ifindex = fl->oif;
+	x->id = tmpl->id;
+	if (x->id.daddr.a4 == 0)
+		x->id.daddr.a4 = daddr->a4;
+	x->props.saddr = tmpl->saddr;
+	if (x->props.saddr.a4 == 0)
+		x->props.saddr.a4 = saddr->a4;
+	x->props.mode = tmpl->mode;
+	x->props.reqid = tmpl->reqid;
+	x->props.family = AF_INET;
+}
+
+static struct xfrm_state *
+__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
+{
+	unsigned h = __xfrm4_spi_hash(daddr, spi, proto);
+	struct xfrm_state *x;
+
+	list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) {
+		if (x->props.family == AF_INET &&
+		    spi == x->id.spi &&
+		    daddr->a4 == x->id.daddr.a4 &&
+		    proto == x->id.proto) {
+			xfrm_state_hold(x);
+			return x;
+		}
+	}
+	return NULL;
+}
+
+static struct xfrm_state *
+__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, 
+		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
+		 int create)
+{
+	struct xfrm_state *x, *x0;
+	unsigned h = __xfrm4_dst_hash(daddr);
+
+	x0 = NULL;
+
+	list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) {
+		if (x->props.family == AF_INET &&
+		    daddr->a4 == x->id.daddr.a4 &&
+		    mode == x->props.mode &&
+		    proto == x->id.proto &&
+		    saddr->a4 == x->props.saddr.a4 &&
+		    reqid == x->props.reqid &&
+		    x->km.state == XFRM_STATE_ACQ) {
+			    if (!x0)
+				    x0 = x;
+			    if (x->id.spi)
+				    continue;
+			    x0 = x;
+			    break;
+		    }
+	}
+	if (x0) {
+		xfrm_state_hold(x0);
+	} else if (create && (x0 = xfrm_state_alloc()) != NULL) {
+		x0->sel.daddr.a4 = daddr->a4;
+		x0->sel.saddr.a4 = saddr->a4;
+		x0->sel.prefixlen_d = 32;
+		x0->sel.prefixlen_s = 32;
+		x0->props.saddr.a4 = saddr->a4;
+		x0->km.state = XFRM_STATE_ACQ;
+		x0->id.daddr.a4 = daddr->a4;
+		x0->id.proto = proto;
+		x0->props.family = AF_INET;
+		x0->props.mode = mode;
+		x0->props.reqid = reqid;
+		x0->props.family = AF_INET;
+		x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
+		xfrm_state_hold(x0);
+		mod_timer(&x0->timer, jiffies + XFRM_ACQ_EXPIRES*HZ);
+		xfrm_state_hold(x0);
+		list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h);
+		wake_up(&km_waitq);
+	}
+	return x0;
+}
+
+static struct xfrm_state_afinfo xfrm4_state_afinfo = {
+	.family			= AF_INET,
+	.lock			= RW_LOCK_UNLOCKED,
+	.init_tempsel		= __xfrm4_init_tempsel,
+	.state_lookup		= __xfrm4_state_lookup,
+	.find_acq		= __xfrm4_find_acq,
+};
+
+void __init xfrm4_state_init(void)
+{
+	xfrm_state_register_afinfo(&xfrm4_state_afinfo);
+}
+
+void __exit xfrm4_state_fini(void)
+{
+	xfrm_state_unregister_afinfo(&xfrm4_state_afinfo);
+}
+
Index: net/ipv4/xfrm4_tunnel.c
===================================================================
RCS file: net/ipv4/xfrm4_tunnel.c
diff -N net/ipv4/xfrm4_tunnel.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv4/xfrm4_tunnel.c	16 Apr 2004 13:16:23 -0000	1.5.14.1
@@ -0,0 +1,204 @@
+/* xfrm4_tunnel.c: Generic IP tunnel transformer.
+ *
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/skbuff.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/inet_ecn.h>
+
+int xfrm4_tunnel_check_size(struct sk_buff *skb)
+{
+	int mtu, ret = 0;
+	struct dst_entry *dst;
+	struct iphdr *iph = skb->nh.iph;
+
+	if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
+		goto out;
+
+	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
+	
+	if (!(iph->frag_off & htons(IP_DF)))
+		goto out;
+
+	dst = skb->dst;
+	mtu = dst_pmtu(dst) - dst->header_len - dst->trailer_len;
+	if (skb->len > mtu) {
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+		ret = -EMSGSIZE;
+	}
+out:
+	return ret;
+}
+
+static int ipip_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x = dst->xfrm;
+	struct iphdr *iph, *top_iph;
+	int tos, err;
+
+	if ((err = xfrm4_tunnel_check_size(skb)) != 0)
+		goto error_nolock;
+		
+	iph = skb->nh.iph;
+
+	spin_lock_bh(&x->lock);
+
+	tos = iph->tos;
+
+	top_iph = (struct iphdr *) skb_push(skb, x->props.header_len);
+	top_iph->ihl = 5;
+	top_iph->version = 4;
+	top_iph->tos = INET_ECN_encapsulate(tos, iph->tos);
+	top_iph->tot_len = htons(skb->len);
+	top_iph->frag_off = iph->frag_off & ~htons(IP_MF|IP_OFFSET);
+	if (!(iph->frag_off & htons(IP_DF))) {
+#ifdef NETIF_F_TSO
+		__ip_select_ident(top_iph, dst, 0);
+#else
+		__ip_select_ident(top_iph, dst);
+#endif
+	}
+	top_iph->ttl = iph->ttl;
+	top_iph->protocol = IPPROTO_IPIP;
+	top_iph->check = 0;
+	top_iph->saddr = x->props.saddr.a4;
+	top_iph->daddr = x->id.daddr.a4;
+	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+	ip_send_check(top_iph);
+
+	skb->nh.raw = skb->data;
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+
+	spin_unlock_bh(&x->lock);
+
+	if ((skb->dst = dst_pop(dst)) == NULL) {
+		kfree_skb(skb);
+		err = -EHOSTUNREACH;
+		goto error_nolock;
+	}
+	return NET_XMIT_BYPASS;
+
+error_nolock:
+	kfree_skb(skb);
+	return err;
+}
+
+static int ipip_xfrm_rcv(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static struct xfrm_tunnel *ipip_handler;
+static DECLARE_MUTEX(xfrm4_tunnel_sem);
+
+int xfrm4_tunnel_register(struct xfrm_tunnel *handler)
+{
+	int ret;
+
+	down(&xfrm4_tunnel_sem);
+	ret = 0;
+	if (ipip_handler != NULL)
+		ret = -EINVAL;
+	if (!ret)
+		ipip_handler = handler;
+	up(&xfrm4_tunnel_sem);
+
+	return ret;
+}
+
+int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler)
+{
+	int ret;
+
+	down(&xfrm4_tunnel_sem);
+	ret = 0;
+	if (ipip_handler != handler)
+		ret = -EINVAL;
+	if (!ret)
+		ipip_handler = NULL;
+	up(&xfrm4_tunnel_sem);
+
+	synchronize_net();
+
+	return ret;
+}
+
+static int ipip_rcv(struct sk_buff *skb)
+{
+	struct xfrm_tunnel *handler = ipip_handler;
+
+	/* Tunnel devices take precedence.  */
+	if (handler && handler->handler(skb) == 0)
+		return 0;
+
+	return xfrm4_rcv_encap(skb, 0);
+}
+
+static void ipip_err(struct sk_buff *skb, u32 info)
+{
+	struct xfrm_tunnel *handler = ipip_handler;
+	u32 arg = info;
+
+	if (handler)
+		handler->err_handler(skb, &arg);
+}
+
+static int ipip_init_state(struct xfrm_state *x, void *args)
+{
+	if (!x->props.mode)
+		return -EINVAL;
+	x->props.header_len = sizeof(struct iphdr);
+
+	return 0;
+}
+
+static void ipip_destroy(struct xfrm_state *x)
+{
+}
+
+static struct xfrm_type ipip_type = {
+	.description	= "IPIP",
+	.proto	     	= IPPROTO_IPIP,
+	.init_state	= ipip_init_state,
+	.destructor	= ipip_destroy,
+	.input		= ipip_xfrm_rcv,
+	.output		= ipip_output
+};
+
+static struct inet_protocol ipip_protocol = {
+	.handler	=	ipip_rcv,
+	.err_handler	=	ipip_err,
+	.no_policy	=	1,
+};
+
+static int __init ipip_init(void)
+{
+	SET_MODULE_OWNER(&ipip_type);
+	if (xfrm_register_type(&ipip_type, AF_INET) < 0) {
+		printk(KERN_INFO "ipip init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet_add_protocol(&ipip_protocol, IPPROTO_IPIP) < 0) {
+		printk(KERN_INFO "ipip init: can't add protocol\n");
+		xfrm_unregister_type(&ipip_type, AF_INET);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit ipip_fini(void)
+{
+	if (inet_del_protocol(&ipip_protocol, IPPROTO_IPIP) < 0)
+		printk(KERN_INFO "ipip close: can't remove protocol\n");
+	if (xfrm_unregister_type(&ipip_type, AF_INET) < 0)
+		printk(KERN_INFO "ipip close: can't remove xfrm type\n");
+}
+
+module_init(ipip_init);
+module_exit(ipip_fini);
+MODULE_LICENSE("GPL");
Index: net/ipv4/ipvs/ip_vs_conn.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/ipvs/ip_vs_conn.c,v
retrieving revision 1.1.1.7
retrieving revision 1.1.1.7.2.1
diff -u -r1.1.1.7 -r1.1.1.7.2.1
--- a/net/ipv4/ipvs/ip_vs_conn.c	14 Apr 2004 13:05:41 -0000	1.1.1.7
+++ b/net/ipv4/ipvs/ip_vs_conn.c	16 Apr 2004 13:16:23 -0000	1.1.1.7.2.1
@@ -606,17 +606,25 @@
 	struct iphdr  *iph = skb->nh.iph;
 	u8     tos = iph->tos;
 	int    mtu;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			.ip4_u = {
+				.daddr = iph->daddr,
+				.saddr = 0,
+				.tos = RT_TOS(tos), } },
+	};
 
 	EnterFunction(10);
 
-	if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(tos), 0)) {
+	if (ip_route_output_key(&rt, &fl)) {
 		IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
 			     "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
 		goto tx_error_icmp;
 	}
 
 	/* MTU checking */
-	mtu = rt->u.dst.pmtu;
+	mtu = dst_pmtu(&rt->u.dst);
 	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -642,8 +650,7 @@
 #ifdef CONFIG_NETFILTER_DEBUG
 	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
 #endif /* CONFIG_NETFILTER_DEBUG */
-	skb->nfcache |= NFC_IPVS_PROPERTY;
-	ip_send(skb);
+	IP_VS_XMIT(skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -742,7 +749,7 @@
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = rt->u.dst.pmtu;
+	mtu = dst_pmtu(&rt->u.dst);
 	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -814,8 +821,7 @@
 #ifdef CONFIG_NETFILTER_DEBUG
 	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
 #endif /* CONFIG_NETFILTER_DEBUG */
-	skb->nfcache |= NFC_IPVS_PROPERTY;
-	ip_send(skb);
+	IP_VS_XMIT(skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -870,14 +876,14 @@
 
 	tdev = rt->u.dst.dev;
 
-	mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+	mtu = dst_pmtu(&rt->u.dst) - sizeof(struct iphdr);
 	if (mtu < 68) {
 		ip_rt_put(rt);
 		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
 		goto tx_error;
 	}
-	if (skb->dst && mtu < skb->dst->pmtu)
-		skb->dst->pmtu = mtu;
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
 
 	df |= (old_iph->frag_off&__constant_htons(IP_DF));
 
@@ -939,8 +945,7 @@
 #ifdef CONFIG_NETFILTER_DEBUG
 	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
 #endif /* CONFIG_NETFILTER_DEBUG */
-	skb->nfcache |= NFC_IPVS_PROPERTY;
-	ip_send(skb);
+	IP_VS_XMIT(skb, rt);
 
 	LeaveFunction(10);
 
@@ -969,7 +974,7 @@
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = rt->u.dst.pmtu;
+	mtu = dst_pmtu(&rt->u.dst);
 	if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
@@ -995,8 +1000,7 @@
 #ifdef CONFIG_NETFILTER_DEBUG
 	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
 #endif /* CONFIG_NETFILTER_DEBUG */
-	skb->nfcache |= NFC_IPVS_PROPERTY;
-	ip_send(skb);
+	IP_VS_XMIT(skb, rt);
 
 #if 0000
 	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
Index: net/ipv4/ipvs/ip_vs_core.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/ipvs/ip_vs_core.c,v
retrieving revision 1.1.1.7
retrieving revision 1.1.1.7.2.1
diff -u -r1.1.1.7 -r1.1.1.7.2.1
--- a/net/ipv4/ipvs/ip_vs_core.c	28 Nov 2003 18:26:21 -0000	1.1.1.7
+++ b/net/ipv4/ipvs/ip_vs_core.c	16 Apr 2004 13:16:23 -0000	1.1.1.7.2.1
@@ -953,7 +953,7 @@
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = rt->u.dst.pmtu;
+	mtu = dst_pmtu(&rt->u.dst);
 	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -1001,7 +1001,7 @@
 #ifdef CONFIG_NETFILTER_DEBUG
 	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
 #endif /* CONFIG_NETFILTER_DEBUG */
-	ip_send(skb);
+	IP_VS_XMIT(skb, rt);
 	ip_vs_conn_put(cp);
 	return NF_STOLEN;
 
Index: net/ipv4/netfilter/ip_conntrack_standalone.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/netfilter/ip_conntrack_standalone.c,v
retrieving revision 1.1.1.22
retrieving revision 1.1.1.22.2.1
diff -u -r1.1.1.22 -r1.1.1.22.2.1
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c	18 Feb 2004 13:36:32 -0000	1.1.1.22
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c	16 Apr 2004 13:16:23 -0000	1.1.1.22.2.1
@@ -204,7 +204,7 @@
 	/* Local packets are never produced too large for their
 	   interface.  We degfragment them at LOCAL_OUT, however,
 	   so we have to refragment them here. */
-	if ((*pskb)->len > rt->u.dst.pmtu) {
+	if ((*pskb)->len > dst_pmtu(&rt->u.dst)) {
 		/* No hook can be after us, so this should be OK. */
 		ip_fragment(*pskb, okfn);
 		return NF_STOLEN;
Index: net/ipv4/netfilter/ip_fw_compat_masq.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/netfilter/ip_fw_compat_masq.c,v
retrieving revision 1.1.1.18
retrieving revision 1.1.1.18.2.1
diff -u -r1.1.1.18 -r1.1.1.18.2.1
--- a/net/ipv4/netfilter/ip_fw_compat_masq.c	5 Jan 2004 13:53:56 -0000	1.1.1.18
+++ b/net/ipv4/netfilter/ip_fw_compat_masq.c	16 Apr 2004 13:16:23 -0000	1.1.1.18.2.1
@@ -68,12 +68,13 @@
 	/* Setup the masquerade, if not already */
 	if (!info->initialized) {
 		u_int32_t newsrc;
+		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->daddr } } };
 		struct rtable *rt;
 		struct ip_nat_multi_range range;
 
 		/* Pass 0 instead of saddr, since it's going to be changed
 		   anyway. */
-		if (ip_route_output(&rt, iph->daddr, 0, 0, 0) != 0) {
+		if (ip_route_output_key(&rt, &fl) != 0) {
 			DEBUGP("ipnat_rule_masquerade: Can't reroute.\n");
 			return NF_DROP;
 		}
Index: net/ipv4/netfilter/ip_nat_core.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/netfilter/ip_nat_core.c,v
retrieving revision 1.1.1.23
retrieving revision 1.1.1.23.2.1
diff -u -r1.1.1.23 -r1.1.1.23.2.1
--- a/net/ipv4/netfilter/ip_nat_core.c	18 Feb 2004 13:36:32 -0000	1.1.1.23
+++ b/net/ipv4/netfilter/ip_nat_core.c	16 Apr 2004 13:16:23 -0000	1.1.1.23.2.1
@@ -204,10 +204,11 @@
 static int
 do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp)
 {
+	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = var_ip } } };
 	struct rtable *rt;
 
 	/* FIXME: IPTOS_TOS(iph->tos) --RR */
-	if (ip_route_output(&rt, var_ip, 0, 0, 0) != 0) {
+	if (ip_route_output_key(&rt, &fl) != 0) {
 		DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n",
 		       NIPQUAD(var_ip));
 		return 0;
Index: net/ipv4/netfilter/ipt_MASQUERADE.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/netfilter/ipt_MASQUERADE.c,v
retrieving revision 1.1.1.18
retrieving revision 1.1.1.18.2.1
diff -u -r1.1.1.18 -r1.1.1.18.2.1
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c	14 Apr 2004 13:05:41 -0000	1.1.1.18
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c	16 Apr 2004 13:16:23 -0000	1.1.1.18.2.1
@@ -69,7 +69,6 @@
 	struct ip_nat_multi_range newrange;
 	u_int32_t newsrc;
 	struct rtable *rt;
-	struct rt_key key;
 
 	IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
 
@@ -84,25 +83,28 @@
 
 	mr = targinfo;
 
-	key.dst = (*pskb)->nh.iph->daddr;
-	key.src = 0; /* Unknown: that's what we're trying to establish */
-	key.tos = RT_TOS((*pskb)->nh.iph->tos)|RTO_CONN;
-	key.oif = 0;
+	{
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = (*pskb)->nh.iph->daddr,
+						.tos = (RT_TOS((*pskb)->nh.iph->tos) |
+							RTO_CONN),
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	key.fwmark = (*pskb)->nfmark;
+						.fwmark = (*pskb)->nfmark
 #endif
-	if (ip_route_output_key(&rt, &key) != 0) {
-                /* Funky routing can do this. */
-                if (net_ratelimit())
-                        printk("MASQUERADE:"
-                               " No route: Rusty's brain broke!\n");
-                return NF_DROP;
-        }
-        if (rt->u.dst.dev != out) {
-                if (net_ratelimit())
-                        printk("MASQUERADE:"
-                               " Route sent us somewhere else.\n");
-		return NF_DROP;
+					      } } };
+		if (ip_route_output_key(&rt, &fl) != 0) {
+	                /* Funky routing can do this. */
+	                if (net_ratelimit())
+				printk("MASQUERADE:"
+				       " No route: Rusty's brain broke!\n");
+			return NF_DROP;
+		}
+	        if (rt->u.dst.dev != out) {
+	                if (net_ratelimit())
+	                        printk("MASQUERADE:"
+	                               " Route sent us somewhere else.\n");
+			return NF_DROP;
+		}
 	}
 
 	newsrc = rt->rt_src;
Index: net/ipv4/netfilter/ipt_MIRROR.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/netfilter/ipt_MIRROR.c,v
retrieving revision 1.1.1.18
retrieving revision 1.1.1.18.2.1
diff -u -r1.1.1.18 -r1.1.1.18.2.1
--- a/net/ipv4/netfilter/ipt_MIRROR.c	25 Aug 2003 11:44:44 -0000	1.1.1.18
+++ b/net/ipv4/netfilter/ipt_MIRROR.c	16 Apr 2004 13:16:23 -0000	1.1.1.18.2.1
@@ -44,21 +44,21 @@
 {
         struct iphdr *iph = skb->nh.iph;
 	struct dst_entry *odst;
-	struct rt_key key = {};
+	struct flowi fl = {};
 	struct rtable *rt;
 
 	if (local) {
-		key.dst = iph->saddr;
-		key.src = iph->daddr;
-		key.tos = RT_TOS(iph->tos);
+		fl.nl_u.ip4_u.daddr = iph->saddr;
+		fl.nl_u.ip4_u.saddr = iph->daddr;
+		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
 
-		if (ip_route_output_key(&rt, &key) != 0)
+		if (ip_route_output_key(&rt, &fl) != 0)
 			return NULL;
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
-		key.dst = iph->daddr;
-		if (ip_route_output_key(&rt, &key) != 0)
+		fl.nl_u.ip4_u.daddr = iph->daddr;
+		if (ip_route_output_key(&rt, &fl) != 0)
 			return NULL;
 
 		odst = skb->dst;
Index: net/ipv4/netfilter/ipt_REJECT.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/netfilter/ipt_REJECT.c,v
retrieving revision 1.1.1.23
retrieving revision 1.1.1.23.2.1
diff -u -r1.1.1.23 -r1.1.1.23.2.1
--- a/net/ipv4/netfilter/ipt_REJECT.c	28 Nov 2003 18:26:21 -0000	1.1.1.23
+++ b/net/ipv4/netfilter/ipt_REJECT.c	16 Apr 2004 13:16:23 -0000	1.1.1.23.2.1
@@ -38,22 +38,22 @@
 {
 	struct iphdr *iph = skb->nh.iph;
 	struct dst_entry *odst;
-	struct rt_key key = {};
+	struct flowi fl = {};
 	struct rtable *rt;
 
 	if (hook != NF_IP_FORWARD) {
-		key.dst = iph->saddr;
+		fl.nl_u.ip4_u.daddr = iph->saddr;
 		if (hook == NF_IP_LOCAL_IN)
-			key.src = iph->daddr;
-		key.tos = RT_TOS(iph->tos);
+			fl.nl_u.ip4_u.saddr = iph->daddr;
+		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
 
-		if (ip_route_output_key(&rt, &key) != 0)
+		if (ip_route_output_key(&rt, &fl) != 0)
 			return NULL;
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
-		key.dst = iph->daddr;
-		if (ip_route_output_key(&rt, &key) != 0)
+		fl.nl_u.ip4_u.daddr = iph->daddr;
+		if (ip_route_output_key(&rt, &fl) != 0)
 			return NULL;
 
 		odst = skb->dst;
@@ -188,7 +188,7 @@
 					   nskb->nh.iph->ihl);
 
 	/* "Never happens" */
-	if (nskb->len > nskb->dst->pmtu)
+	if (nskb->len > dst_pmtu(nskb->dst))
 		goto free_nskb;
 
 	connection_attach(nskb, oldskb->nfct);
@@ -268,14 +268,19 @@
 
 	tos = (iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL;
 
-	if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0))
-		return;
-
+	{
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = iph->saddr,
+						.saddr = saddr,
+						.tos = RT_TOS(tos) } } };
+		if (ip_route_output_key(&rt, &fl))
+			return;
+	}
 	/* RFC says return as much as we can without exceeding 576 bytes. */
 	length = skb_in->len + sizeof(struct iphdr) + sizeof(struct icmphdr);
 
-	if (length > rt->u.dst.pmtu)
-		length = rt->u.dst.pmtu;
+	if (length > dst_pmtu(&rt->u.dst))
+		length = dst_pmtu(&rt->u.dst);
 	if (length > 576)
 		length = 576;
 
Index: net/ipv4/netfilter/ipt_TCPMSS.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/netfilter/ipt_TCPMSS.c,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/net/ipv4/netfilter/ipt_TCPMSS.c	21 Dec 2001 17:42:05 -0000	1.1.1.15
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c	16 Apr 2004 13:16:23 -0000	1.1.1.15.2.1
@@ -85,14 +85,14 @@
 			return NF_DROP; /* or IPT_CONTINUE ?? */
 		}
 
-		if((*pskb)->dst->pmtu <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) {
+		if(dst_pmtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) {
 			if (net_ratelimit())
 				printk(KERN_ERR
-		       			"ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", (*pskb)->dst->pmtu);
+		       			"ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_pmtu((*pskb)->dst));
 			return NF_DROP; /* or IPT_CONTINUE ?? */
 		}
 
-		newmss = (*pskb)->dst->pmtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
+		newmss = dst_pmtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr);
 	} else
 		newmss = tcpmssinfo->mss;
 
Index: net/ipv4/netfilter/ipt_multiport.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv4/netfilter/ipt_multiport.c,v
retrieving revision 1.1.1.13
retrieving revision 1.1.1.13.2.1
diff -u -r1.1.1.13 -r1.1.1.13.2.1
--- a/net/ipv4/netfilter/ipt_multiport.c	13 Jun 2003 14:51:39 -0000	1.1.1.13
+++ b/net/ipv4/netfilter/ipt_multiport.c	16 Apr 2004 13:16:23 -0000	1.1.1.13.2.1
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <linux/udp.h>
 #include <linux/skbuff.h>
+#include <linux/socket.h>
 
 #include <linux/netfilter_ipv4/ipt_multiport.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
Index: net/ipv6/Config.in
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/Config.in,v
retrieving revision 1.1.1.16
retrieving revision 1.1.1.16.2.1
diff -u -r1.1.1.16 -r1.1.1.16.2.1
--- a/net/ipv6/Config.in	21 Dec 2001 17:42:05 -0000	1.1.1.16
+++ b/net/ipv6/Config.in	16 Apr 2004 13:16:23 -0000	1.1.1.16.2.1
@@ -2,9 +2,14 @@
 # IPv6 configuration
 # 
 
-#bool '    IPv6: flow policy support' CONFIG_RT6_POLICY
-#bool '    IPv6: firewall support' CONFIG_IPV6_FIREWALL
+bool 'IPv6: Privacy Extensions (RFC 3041) support' CONFIG_IPV6_PRIVACY
 
 if [ "$CONFIG_NETFILTER" != "n" ]; then
    source net/ipv6/netfilter/Config.in
 fi
+
+tristate 'IPv6: AH transformation' CONFIG_INET6_AH
+tristate 'IPv6: ESP transformation' CONFIG_INET6_ESP
+tristate 'IPv6: IPComp transformation' CONFIG_INET6_IPCOMP
+
+tristate 'IPv6: IPv6-in-IPv6 tunnel' CONFIG_IPV6_TUNNEL
Index: net/ipv6/Makefile
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/Makefile,v
retrieving revision 1.1.1.20
retrieving revision 1.1.1.20.2.1
diff -u -r1.1.1.20 -r1.1.1.20.2.1
--- a/net/ipv6/Makefile	28 Nov 2003 18:26:21 -0000	1.1.1.20
+++ b/net/ipv6/Makefile	16 Apr 2004 13:16:23 -0000	1.1.1.20.2.1
@@ -9,7 +9,13 @@
 
 O_TARGET := ipv6.o
 
-obj-y :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
+mod-subdirs := netfilter
+
+ifeq ($(CONFIG_IPV6),m)
+obj-m += ipv6.o
+endif
+
+ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
 		protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
 		exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
@@ -17,8 +23,29 @@
 
 export-objs := ipv6_syms.o
 
-obj-m  := $(O_TARGET)
+ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o
+ipv6-objs += $(ipv6-y)
+
+obj-$(CONFIG_INET6_AH) += ah6.o
+obj-$(CONFIG_INET6_ESP) += esp6.o
+obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
+
+obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
 
-#obj-$(CONFIG_IPV6_FIREWALL) += ip6_fw.o
+subdir-$(CONFIG_NETFILTER) += netfilter
+
+ifeq ($(CONFIG_NETFILTER),y)
+obj-y += netfilter/netfilter.o
+endif
+
+ifeq ($(CONFIG_IPV6),y)
+obj-y += $(ipv6-objs)
+endif
 
 include $(TOPDIR)/Rules.make
+
+
+ifeq ($(CONFIG_IPV6),m)
+ipv6.o: $(ipv6-objs)
+	$(LD) -r -o $@ $(ipv6-objs)
+endif
Index: net/ipv6/addrconf.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/addrconf.c,v
retrieving revision 1.1.1.28
retrieving revision 1.1.1.28.2.1
diff -u -r1.1.1.28 -r1.1.1.28.2.1
--- a/net/ipv6/addrconf.c	28 Nov 2003 18:26:21 -0000	1.1.1.28
+++ b/net/ipv6/addrconf.c	16 Apr 2004 13:16:23 -0000	1.1.1.28.2.1
@@ -28,6 +28,8 @@
  *						packets.
  *	YOSHIFUJI Hideaki @USAGI	:	improved accuracy of
  *						address validation timer.
+ *	YOSHIFUJI Hideaki @USAGI	:	Privacy Extensions (RFC3041)
+ *						support.
  *	Yuji SEKIYA @USAGI		:	Don't assign a same IPv6
  *						address on a same interface.
  *	YOSHIFUJI Hideaki @USAGI	:	ARCnet support
@@ -66,6 +68,12 @@
 #include <linux/if_tunnel.h>
 #include <linux/rtnetlink.h>
 
+#ifdef CONFIG_IPV6_PRIVACY
+#include <linux/random.h>
+#include <linux/crypto.h>
+#include <asm/scatterlist.h>
+#endif
+
 #include <asm/uaccess.h>
 
 #define IPV6_MAX_ADDRESSES 16
@@ -87,6 +95,18 @@
 int inet6_dev_count;
 int inet6_ifa_count;
 
+#ifdef CONFIG_IPV6_PRIVACY
+static int __ipv6_regen_rndid(struct inet6_dev *idev);
+static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); 
+static void ipv6_regen_rndid(unsigned long data);
+
+static int desync_factor = MAX_DESYNC_FACTOR * HZ;
+static struct crypto_tfm *md5_tfm;
+static spinlock_t md5_tfm_lock = SPIN_LOCK_UNLOCKED;
+#endif
+
+static int ipv6_count_addresses(struct inet6_dev *idev);
+
 /*
  *	Configured unicast address hash table
  */
@@ -125,6 +145,13 @@
 	MAX_RTR_SOLICITATIONS,		/* router solicits	*/
 	RTR_SOLICITATION_INTERVAL,	/* rtr solicit interval	*/
 	MAX_RTR_SOLICITATION_DELAY,	/* rtr solicit delay	*/
+#ifdef CONFIG_IPV6_PRIVACY
+	.use_tempaddr 			= 0,
+	.temp_valid_lft			= TEMP_VALID_LIFETIME,
+	.temp_prefered_lft		= TEMP_PREFERRED_LIFETIME,
+	.regen_max_retry		= REGEN_MAX_RETRY,
+	.max_desync_factor		= MAX_DESYNC_FACTOR,
+#endif
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt =
@@ -139,6 +166,13 @@
 	MAX_RTR_SOLICITATIONS,		/* router solicits	*/
 	RTR_SOLICITATION_INTERVAL,	/* rtr solicit interval	*/
 	MAX_RTR_SOLICITATION_DELAY,	/* rtr solicit delay	*/
+#ifdef CONFIG_IPV6_PRIVACY
+	.use_tempaddr			= 0,
+	.temp_valid_lft			= TEMP_VALID_LIFETIME,
+	.temp_prefered_lft		= TEMP_PREFERRED_LIFETIME,
+	.regen_max_retry		= REGEN_MAX_RETRY,
+	.max_desync_factor		= MAX_DESYNC_FACTOR,
+#endif
 };
 
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
@@ -170,15 +204,8 @@
 		};
 		return type;
 	}
-	/* check for reserved anycast addresses */
-	
-	if ((st & htonl(0xE0000000)) &&
-	    ((addr->s6_addr32[2] == htonl(0xFDFFFFFF) &&
-	    (addr->s6_addr32[3] | htonl(0x7F)) == (u32)~0) ||
-	    (addr->s6_addr32[2] == 0 && addr->s6_addr32[3] == 0)))
-		type = IPV6_ADDR_ANYCAST;
-	else
-		type = IPV6_ADDR_UNICAST;
+
+	type = IPV6_ADDR_UNICAST;
 
 	/* Consider all addresses with the first three bits different of
 	   000 and 111 as finished.
@@ -299,10 +326,32 @@
 		/* We refer to the device */
 		dev_hold(dev);
 
+		/* One reference from device.  We must do this before
+		 * we invoke __ipv6_regen_rndid().
+		 */
+		in6_dev_hold(ndev);
+
+#ifdef CONFIG_IPV6_PRIVACY
+		get_random_bytes(ndev->rndid, sizeof(ndev->rndid));
+		get_random_bytes(ndev->entropy, sizeof(ndev->entropy));
+		init_timer(&ndev->regen_timer);
+		ndev->regen_timer.function = ipv6_regen_rndid;
+		ndev->regen_timer.data = (unsigned long) ndev;
+		if ((dev->flags&IFF_LOOPBACK) ||
+		    dev->type == ARPHRD_TUNNEL ||
+		    dev->type == ARPHRD_SIT) {
+			printk(KERN_INFO
+				"Disabled Privacy Extensions on device %p(%s)\n",
+				dev, dev->name);
+			ndev->cnf.use_tempaddr = -1;
+		} else {
+			in6_dev_hold(ndev);
+			ipv6_regen_rndid((unsigned long) ndev);
+		}
+#endif
+
 		write_lock_bh(&addrconf_lock);
 		dev->ip6_ptr = ndev;
-		/* One reference from device */
-		in6_dev_hold(ndev);
 		write_unlock_bh(&addrconf_lock);
 
 		ipv6_mc_init_dev(ndev);
@@ -330,38 +379,6 @@
 	return idev;
 }
 
-void ipv6_addr_prefix(struct in6_addr *prefix,
-	struct in6_addr *addr, int prefix_len)
-{
-	unsigned long mask;
-	int ncopy, nbits;
-
-	memset(prefix, 0, sizeof(*prefix));
-
-	if (prefix_len <= 0)
-		return;
-	if (prefix_len > 128)
-		prefix_len = 128;
-
-	ncopy = prefix_len / 32;
-	switch (ncopy) {
-	case 4:	prefix->s6_addr32[3] = addr->s6_addr32[3];
-	case 3:	prefix->s6_addr32[2] = addr->s6_addr32[2];
-	case 2:	prefix->s6_addr32[1] = addr->s6_addr32[1];
-	case 1:	prefix->s6_addr32[0] = addr->s6_addr32[0];
-	case 0:	break;
-	}
-	nbits = prefix_len % 32;
-	if (nbits == 0)
-		return;
-
-	mask = ~((1 << (32 - nbits)) - 1);
-	mask = htonl(mask);
-
-	prefix->s6_addr32[ncopy] = addr->s6_addr32[ncopy] & mask;
-}
-
-
 static void dev_forward_change(struct inet6_dev *idev)
 {
 	struct net_device *dev;
@@ -501,6 +518,18 @@
 	/* Add to inet6_dev unicast addr list. */
 	ifa->if_next = idev->addr_list;
 	idev->addr_list = ifa;
+
+#ifdef CONFIG_IPV6_PRIVACY
+	ifa->regen_count = 0;
+	if (ifa->flags&IFA_F_TEMPORARY) {
+		ifa->tmp_next = idev->tempaddr_list;
+		idev->tempaddr_list = ifa;
+		in6_ifa_hold(ifa);
+	} else {
+		ifa->tmp_next = NULL;
+	}
+#endif
+
 	in6_ifa_hold(ifa);
 	write_unlock_bh(&idev->lock);
 	read_unlock(&addrconf_lock);
@@ -523,6 +552,15 @@
 
 	ifp->dead = 1;
 
+#ifdef CONFIG_IPV6_PRIVACY
+	spin_lock_bh(&ifp->lock);
+	if (ifp->ifpub) {
+		__in6_ifa_put(ifp->ifpub);
+		ifp->ifpub = NULL;
+	}
+	spin_unlock_bh(&ifp->lock);
+#endif
+
 	write_lock_bh(&addrconf_hash_lock);
 	for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
 	     ifap = &ifa->lst_next) {
@@ -536,6 +574,24 @@
 	write_unlock_bh(&addrconf_hash_lock);
 
 	write_lock_bh(&idev->lock);
+#ifdef CONFIG_IPV6_PRIVACY
+	if (ifp->flags&IFA_F_TEMPORARY) {
+		for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
+		     ifap = &ifa->tmp_next) {
+			if (ifa == ifp) {
+				*ifap = ifa->tmp_next;
+				if (ifp->ifpub) {
+					__in6_ifa_put(ifp->ifpub);
+					ifp->ifpub = NULL;
+				}
+				__in6_ifa_put(ifp);
+				ifa->tmp_next = NULL;
+				break;
+			}
+		}
+	}
+#endif
+
 	for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;
 	     ifap = &ifa->if_next) {
 		if (ifa == ifp) {
@@ -556,6 +612,96 @@
 	in6_ifa_put(ifp);
 }
 
+#ifdef CONFIG_IPV6_PRIVACY
+static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
+{
+	struct inet6_dev *idev;
+	struct in6_addr addr, *tmpaddr;
+	unsigned long tmp_prefered_lft, tmp_valid_lft;
+	int tmp_plen;
+	int ret = 0;
+
+	if (ift) {
+		spin_lock_bh(&ift->lock);
+		memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
+		spin_unlock_bh(&ift->lock);
+		tmpaddr = &addr;
+	} else {
+		tmpaddr = NULL;
+	}
+retry:
+	spin_lock_bh(&ifp->lock);
+	in6_ifa_hold(ifp);
+	idev = ifp->idev;
+	in6_dev_hold(idev);
+	memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
+	write_lock(&idev->lock);
+	if (idev->cnf.use_tempaddr <= 0) {
+		write_unlock(&idev->lock);
+		spin_unlock_bh(&ifp->lock);
+		printk(KERN_INFO
+			"ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
+		in6_dev_put(idev);
+		in6_ifa_put(ifp);
+		ret = -1;
+		goto out;
+	}
+	if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
+		idev->cnf.use_tempaddr = -1;	/*XXX*/
+		write_unlock(&idev->lock);
+		spin_unlock_bh(&ifp->lock);
+		printk(KERN_WARNING
+			"ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
+		in6_dev_put(idev);
+		in6_ifa_put(ifp);
+		ret = -1;
+		goto out;
+	}
+	if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
+		write_unlock(&idev->lock);
+		spin_unlock_bh(&ifp->lock);
+		printk(KERN_WARNING
+			"ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
+		in6_dev_put(idev);
+		in6_ifa_put(ifp);
+		ret = -1;
+		goto out;
+	}
+	memcpy(&addr.s6_addr[8], idev->rndid, 8);
+	tmp_valid_lft = min_t(__u32,
+			      ifp->valid_lft,
+			      idev->cnf.temp_valid_lft);
+	tmp_prefered_lft = min_t(__u32, 
+				 ifp->prefered_lft, 
+				 idev->cnf.temp_prefered_lft - desync_factor / HZ);
+	tmp_plen = ifp->prefix_len;
+	write_unlock(&idev->lock);
+	spin_unlock_bh(&ifp->lock);
+	ift = ipv6_count_addresses(idev) < IPV6_MAX_ADDRESSES ?
+		ipv6_add_addr(idev, &addr, tmp_plen,
+			      ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : 0;
+	if (!ift || IS_ERR(ift)) {
+		in6_dev_put(idev);
+		in6_ifa_put(ifp);
+		printk(KERN_INFO
+			"ipv6_create_tempaddr(): retry temporary address regeneration.\n");
+		tmpaddr = &addr;
+		goto retry;
+	}
+	spin_lock_bh(&ift->lock);
+	ift->ifpub = ifp;
+	ift->valid_lft = tmp_valid_lft;
+	ift->prefered_lft = tmp_prefered_lft;
+	ift->tstamp = ifp->tstamp;
+	spin_unlock_bh(&ift->lock);
+	addrconf_dad_start(ift, 0);
+	in6_ifa_put(ift);
+	in6_dev_put(idev);
+out:
+	return ret;
+}
+#endif
+
 /*
  *	Choose an apropriate source address
  *	should do:
@@ -564,6 +710,22 @@
  *		an address of the attached interface 
  *	iii)	don't use deprecated addresses
  */
+static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref)
+{
+	int pref;
+	pref = ifp->flags&IFA_F_DEPRECATED ? 0 : 2;
+#ifdef CONFIG_IPV6_PRIVACY
+	pref |= (ifp->flags^invpref)&IFA_F_TEMPORARY ? 0 : 1;
+#endif
+	return pref;
+}
+
+#ifdef CONFIG_IPV6_PRIVACY
+#define IPV6_GET_SADDR_MAXSCORE(score)	((score) == 3)
+#else
+#define IPV6_GET_SADDR_MAXSCORE(score)	(score)
+#endif
+
 int ipv6_dev_get_saddr(struct net_device *dev,
 		   struct in6_addr *daddr, struct in6_addr *saddr, int onlink)
 {
@@ -572,6 +734,7 @@
 	struct inet6_dev *idev;
 	int scope;
 	int err;
+	int hiscore = -1, score;
 
 
 	if (!onlink)
@@ -594,17 +757,27 @@
 			read_lock_bh(&idev->lock);
 			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
 				if (ifp->scope == scope) {
-					if (!(ifp->flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
-						in6_ifa_hold(ifp);
+					if (ifp->flags&IFA_F_TENTATIVE)
+						continue;
+#ifdef CONFIG_IPV6_PRIVACY
+					score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
+#else
+					score = ipv6_saddr_pref(ifp, 0);
+#endif
+					if (score <= hiscore)
+						continue;
+
+					if (match)
+						in6_ifa_put(match);
+					match = ifp;
+					hiscore = score;
+					in6_ifa_hold(ifp);
+
+					if (IPV6_GET_SADDR_MAXSCORE(score)) {
 						read_unlock_bh(&idev->lock);
 						read_unlock(&addrconf_lock);
 						goto out;
 					}
-
-					if (!match && !(ifp->flags & IFA_F_TENTATIVE)) {
-						match = ifp;
-						in6_ifa_hold(ifp);
-					}
 				}
 			}
 			read_unlock_bh(&idev->lock);
@@ -627,16 +800,26 @@
 			read_lock_bh(&idev->lock);
 			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
 				if (ifp->scope == scope) {
-					if (!(ifp->flags&(IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
-						in6_ifa_hold(ifp);
+					if (ifp->flags&IFA_F_TENTATIVE)
+						continue;
+#ifdef CONFIG_IPV6_PRIVACY
+					score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
+#else
+					score = ipv6_saddr_pref(ifp, 0);
+#endif
+					if (score <= hiscore)
+						continue;
+
+					if (match)
+						in6_ifa_put(match);
+					match = ifp;
+					hiscore = score;
+					in6_ifa_hold(ifp);
+
+					if (IPV6_GET_SADDR_MAXSCORE(score)) {
 						read_unlock_bh(&idev->lock);
 						goto out_unlock_base;
 					}
-
-					if (!match && !(ifp->flags&IFA_F_TENTATIVE)) {
-						match = ifp;
-						in6_ifa_hold(ifp);
-					}
 				}
 			}
 			read_unlock_bh(&idev->lock);
@@ -648,24 +831,16 @@
 	read_unlock(&dev_base_lock);
 
 out:
-	if (ifp == NULL) {
-		ifp = match;
-		match = NULL;
-	}
-
 	err = -EADDRNOTAVAIL;
-	if (ifp) {
-		ipv6_addr_copy(saddr, &ifp->addr);
+	if (match) {
+		ipv6_addr_copy(saddr, &match->addr);
 		err = 0;
-		in6_ifa_put(ifp);
-	}
-	if (match)
 		in6_ifa_put(match);
+	}
 
 	return err;
 }
 
-
 int ipv6_get_saddr(struct dst_entry *dst,
 		   struct in6_addr *daddr, struct in6_addr *saddr)
 {
@@ -706,7 +881,7 @@
 	return err;
 }
 
-int ipv6_count_addresses(struct inet6_dev *idev)
+static int ipv6_count_addresses(struct inet6_dev *idev)
 {
 	int cnt = 0;
 	struct inet6_ifaddr *ifp;
@@ -785,6 +960,21 @@
 		ifp->flags |= IFA_F_TENTATIVE;
 		spin_unlock_bh(&ifp->lock);
 		in6_ifa_put(ifp);
+#ifdef CONFIG_IPV6_PRIVACY
+	} else if (ifp->flags&IFA_F_TEMPORARY) {
+		struct inet6_ifaddr *ifpub;
+		spin_lock_bh(&ifp->lock);
+		ifpub = ifp->ifpub;
+		if (ifpub) {
+			in6_ifa_hold(ifpub);
+			spin_unlock_bh(&ifp->lock);
+			ipv6_create_tempaddr(ifpub, ifp);
+			in6_ifa_put(ifpub);
+		} else {
+			spin_unlock_bh(&ifp->lock);
+		}
+		ipv6_del_addr(ifp);
+#endif
 	} else
 		ipv6_del_addr(ifp);
 }
@@ -857,6 +1047,110 @@
 	return err;
 }
 
+#ifdef CONFIG_IPV6_PRIVACY
+/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
+static int __ipv6_regen_rndid(struct inet6_dev *idev)
+{
+	struct net_device *dev;
+	struct scatterlist sg[2];
+
+	sg[0].page = virt_to_page(idev->entropy);
+	sg[0].offset = ((long) idev->entropy & ~PAGE_MASK);
+	sg[0].length = 8;
+	sg[1].page = virt_to_page(idev->work_eui64);
+	sg[1].offset = ((long) idev->work_eui64 & ~PAGE_MASK);
+	sg[1].length = 8;
+
+	dev = idev->dev;
+
+	if (ipv6_generate_eui64(idev->work_eui64, dev)) {
+		printk(KERN_INFO
+			"__ipv6_regen_rndid(idev=%p): cannot get EUI64 identifier; use random bytes.\n",
+			idev);
+		get_random_bytes(idev->work_eui64, sizeof(idev->work_eui64));
+	}
+regen:
+	spin_lock(&md5_tfm_lock);
+	if (unlikely(md5_tfm == NULL)) {
+		spin_unlock(&md5_tfm_lock);
+		return -1;
+	}
+	crypto_digest_init(md5_tfm);
+	crypto_digest_update(md5_tfm, sg, 2);
+	crypto_digest_final(md5_tfm, idev->work_digest);
+	spin_unlock(&md5_tfm_lock);
+
+	memcpy(idev->rndid, &idev->work_digest[0], 8);
+	idev->rndid[0] &= ~0x02;
+	memcpy(idev->entropy, &idev->work_digest[8], 8);
+
+	/*
+	 * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>:
+	 * check if generated address is not inappropriate
+	 *
+	 *  - Reserved subnet anycast (RFC 2526)
+	 *	11111101 11....11 1xxxxxxx
+	 *  - ISATAP (draft-ietf-ngtrans-isatap-01.txt) 4.3
+	 *	00-00-5E-FE-xx-xx-xx-xx
+	 *  - value 0
+	 *  - XXX: already assigned to an address on the device
+	 */
+	if (idev->rndid[0] == 0xfd && 
+	    (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) &&
+	    (idev->rndid[7]&0x80))
+		goto regen;
+	if ((idev->rndid[0]|idev->rndid[1]) == 0) {
+		if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe)
+			goto regen;
+		if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
+			goto regen;
+	}
+
+	return 0;
+}
+
+static void ipv6_regen_rndid(unsigned long data)
+{
+	struct inet6_dev *idev = (struct inet6_dev *) data;
+	unsigned long expires;
+
+	read_lock_bh(&addrconf_lock);
+	write_lock_bh(&idev->lock);
+
+	if (idev->dead)
+		goto out;
+
+	if (__ipv6_regen_rndid(idev) < 0)
+		goto out;
+	
+	expires = jiffies +
+		idev->cnf.temp_prefered_lft * HZ - 
+		idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor;
+	if (time_before(expires, jiffies)) {
+		printk(KERN_WARNING
+			"ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
+			idev->dev->name);
+		goto out;
+	}
+
+	if (!mod_timer(&idev->regen_timer, expires))
+		in6_dev_hold(idev);
+
+out:
+	write_unlock_bh(&idev->lock);
+	read_unlock_bh(&addrconf_lock);
+	in6_dev_put(idev);
+}
+
+static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
+	int ret = 0;
+
+	if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
+		ret = __ipv6_regen_rndid(idev);
+	return ret;
+}
+#endif
+
 /*
  *	Add prefix route.
  */
@@ -883,7 +1177,7 @@
 	if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
 		rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
 
-	ip6_route_add(&rtmsg, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL);
 }
 
 /* Create "default" multicast route to the interface */
@@ -900,7 +1194,7 @@
 	rtmsg.rtmsg_ifindex = dev->ifindex;
 	rtmsg.rtmsg_flags = RTF_UP;
 	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-	ip6_route_add(&rtmsg, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL);
 }
 
 static void sit_route_add(struct net_device *dev)
@@ -917,7 +1211,7 @@
 	rtmsg.rtmsg_flags	= RTF_UP|RTF_NONEXTHOP;
 	rtmsg.rtmsg_ifindex	= dev->ifindex;
 
-	ip6_route_add(&rtmsg, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL);
 }
 
 static void addrconf_add_lroute(struct net_device *dev)
@@ -948,7 +1242,6 @@
 void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 {
 	struct prefix_info *pinfo;
-	struct rt6_info *rt;
 	__u32 valid_lft;
 	__u32 prefered_lft;
 	int addr_type;
@@ -1004,32 +1297,33 @@
 	else
 		rt_expires = jiffies + valid_lft * HZ;
 
-	rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
-
-	if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
-		if (rt->rt6i_flags&RTF_EXPIRES) {
-			if (pinfo->onlink == 0 || valid_lft == 0) {
-				ip6_del_rt(rt, NULL);
-				rt = NULL;
-			} else {
-				rt->rt6i_expires = rt_expires;
+	if (pinfo->onlink) {
+		struct rt6_info *rt;
+		rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
+
+		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+			if (rt->rt6i_flags&RTF_EXPIRES) {
+				if (valid_lft == 0) {
+					ip6_del_rt(rt, NULL, NULL);
+					rt = NULL;
+				} else {
+					rt->rt6i_expires = rt_expires;
+				}
 			}
+		} else if (valid_lft) {
+			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
+					      dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
 		}
-	} else if (pinfo->onlink && valid_lft) {
-		addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
-				      dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
+		if (rt)
+			dst_release(&rt->u.dst);
 	}
-	if (rt)
-		dst_release(&rt->u.dst);
 
 	/* Try to figure out our local address for this prefix */
 
 	if (pinfo->autoconf && in6_dev->cnf.autoconf) {
 		struct inet6_ifaddr * ifp;
 		struct in6_addr addr;
-		int plen;
-
-		plen = pinfo->prefix_len >> 3;
+		int create = 0, update_lft = 0;
 
 		if (pinfo->prefix_len == 64) {
 			memcpy(&addr, &pinfo->prefix, 8);
@@ -1058,33 +1352,95 @@
 				ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
 						    addr_type&IPV6_ADDR_SCOPE_MASK, 0);
 
-			if (IS_ERR(ifp)) {
+			if (!ifp || IS_ERR(ifp)) {
 				in6_dev_put(in6_dev);
 				return;
 			}
 
+			update_lft = create = 1;
 			addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
 		}
 
-		if (ifp && valid_lft == 0) {
-			ipv6_del_addr(ifp);
-			ifp = NULL;
-		}
-
 		if (ifp) {
 			int flags;
+			unsigned long now;
+#ifdef CONFIG_IPV6_PRIVACY
+			struct inet6_ifaddr *ift;
+#endif
+			u32 stored_lft;
 
+			/* update lifetime (RFC2462 5.5.3 e) */
 			spin_lock(&ifp->lock);
-			ifp->valid_lft = valid_lft;
-			ifp->prefered_lft = prefered_lft;
-			ifp->tstamp = jiffies;
-			flags = ifp->flags;
-			ifp->flags &= ~IFA_F_DEPRECATED;
-			spin_unlock(&ifp->lock);
-
-			if (!(flags&IFA_F_TENTATIVE))
-				ipv6_ifa_notify((flags&IFA_F_DEPRECATED) ?
-						0 : RTM_NEWADDR, ifp);
+			now = jiffies;
+			if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
+				stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
+			else
+				stored_lft = 0;
+			if (!update_lft && stored_lft) {
+				if (valid_lft > MIN_VALID_LIFETIME ||
+				    valid_lft > stored_lft)
+					update_lft = 1;
+				else if (stored_lft <= MIN_VALID_LIFETIME) {
+					/* valid_lft <= stored_lft is always true */
+					/* XXX: IPsec */
+					update_lft = 0;
+				} else {
+					valid_lft = MIN_VALID_LIFETIME;
+					if (valid_lft < prefered_lft)
+						prefered_lft = valid_lft;
+					update_lft = 1;
+				}
+			}
+
+			if (update_lft) {
+				ifp->valid_lft = valid_lft;
+				ifp->prefered_lft = prefered_lft;
+				ifp->tstamp = now;
+				flags = ifp->flags;
+				ifp->flags &= ~IFA_F_DEPRECATED;
+				spin_unlock(&ifp->lock);
+
+				if (!(flags&IFA_F_TENTATIVE))
+					ipv6_ifa_notify((flags&IFA_F_DEPRECATED) ?
+							0 : RTM_NEWADDR, ifp);
+			} else
+				spin_unlock(&ifp->lock);
+
+#ifdef CONFIG_IPV6_PRIVACY
+			read_lock_bh(&in6_dev->lock);
+			/* update all temporary addresses in the list */
+			for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
+				/*
+				 * When adjusting the lifetimes of an existing
+				 * temporary address, only lower the lifetimes.
+				 * Implementations must not increase the
+				 * lifetimes of an existing temporary address
+				 * when processing a Prefix Information Option.
+				 */
+				spin_lock(&ift->lock);
+				flags = ift->flags;
+				if (ift->valid_lft > valid_lft &&
+				    ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
+					ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
+				if (ift->prefered_lft > prefered_lft &&
+				    ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
+					ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
+				spin_unlock(&ift->lock);
+				if (!(flags&IFA_F_TENTATIVE))
+					ipv6_ifa_notify(0, ift);
+			}
+
+			if (create && in6_dev->cnf.use_tempaddr > 0) {
+				/*
+				 * When a new public address is created as described in [ADDRCONF],
+				 * also create a new temporary address.
+				 */
+				read_unlock_bh(&in6_dev->lock); 
+				ipv6_create_tempaddr(ifp, NULL);
+			} else {
+				read_unlock_bh(&in6_dev->lock);
+			}
+#endif
 			in6_ifa_put(ifp);
 			addrconf_verify(0);
 		}
@@ -1515,6 +1871,27 @@
 	/* Step 3: clear address list */
 
 	write_lock_bh(&idev->lock);
+#ifdef CONFIG_IPV6_PRIVACY
+	if (how == 1 && del_timer(&idev->regen_timer))
+		in6_dev_put(idev);
+
+	/* clear tempaddr list */
+	while ((ifa = idev->tempaddr_list) != NULL) {
+		idev->tempaddr_list = ifa->tmp_next;
+		ifa->tmp_next = NULL;
+		ifa->dead = 1;
+		write_unlock_bh(&idev->lock);
+		spin_lock_bh(&ifa->lock);
+
+		if (ifa->ifpub) {
+			in6_ifa_put(ifa->ifpub);
+			ifa->ifpub = NULL;
+		}
+		spin_unlock_bh(&ifa->lock);
+		in6_ifa_put(ifa);
+		write_lock_bh(&idev->lock);
+	}
+#endif
 	while ((ifa = idev->addr_list) != NULL) {
 		idev->addr_list = ifa->if_next;
 		ifa->if_next = NULL;
@@ -1539,10 +1916,11 @@
 	/* Shot the device (if unregistered) */
 
 	if (how == 1) {
-		neigh_parms_release(&nd_tbl, idev->nd_parms);
 #ifdef CONFIG_SYSCTL
 		addrconf_sysctl_unregister(&idev->cnf);
+		neigh_sysctl_unregister(idev->nd_parms);
 #endif
+		neigh_parms_release(&nd_tbl, idev->nd_parms);
 		in6_dev_put(idev);
 	}
 	return 0;
@@ -1592,7 +1970,7 @@
 
 		rtmsg.rtmsg_ifindex = ifp->idev->dev->ifindex;
 
-		ip6_route_add(&rtmsg, NULL);
+		ip6_route_add(&rtmsg, NULL, NULL);
 	}
 
 out:
@@ -1612,7 +1990,8 @@
 	addrconf_join_solict(dev, &ifp->addr);
 
 	if (ifp->prefix_len != 128 && (ifp->flags&IFA_F_PERMANENT))
-		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 0, flags);
+		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 0,
+					flags);
 
 	net_srandom(ifp->addr.s6_addr32[3]);
 	rand_num = net_random() % (ifp->idev->cnf.rtr_solicit_delay ? : 1);
@@ -1787,6 +2166,9 @@
 		write_lock(&addrconf_hash_lock);
 		for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
 			unsigned long age;
+#ifdef CONFIG_IPV6_PRIVACY
+			unsigned long regen_advance;
+#endif
 
 			if (ifp->flags & IFA_F_PERMANENT)
 				continue;
@@ -1794,6 +2176,12 @@
 			spin_lock(&ifp->lock);
 			age = (now - ifp->tstamp) / HZ;
 
+#ifdef CONFIG_IPV6_PRIVACY
+			regen_advance = ifp->idev->cnf.regen_max_retry * 
+					ifp->idev->cnf.dad_transmits * 
+					ifp->idev->nd_parms->retrans_time / HZ;
+#endif
+
 			if (age >= ifp->valid_lft) {
 				spin_unlock(&ifp->lock);
 				in6_ifa_hold(ifp);
@@ -1822,6 +2210,28 @@
 					in6_ifa_put(ifp);
 					goto restart;
 				}
+#ifdef CONFIG_IPV6_PRIVACY
+			} else if ((ifp->flags&IFA_F_TEMPORARY) &&
+				   !(ifp->flags&IFA_F_TENTATIVE)) {
+				if (age >= ifp->prefered_lft - regen_advance) {
+					struct inet6_ifaddr *ifpub = ifp->ifpub;
+					if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
+						next = ifp->tstamp + ifp->prefered_lft * HZ;
+					if (!ifp->regen_count && ifpub) {
+						ifp->regen_count++;
+						in6_ifa_hold(ifp);
+						in6_ifa_hold(ifpub);
+						spin_unlock(&ifp->lock);
+						write_unlock(&addrconf_hash_lock);
+						ipv6_create_tempaddr(ifpub, ifp);
+						in6_ifa_put(ifpub);
+						in6_ifa_put(ifp);
+						goto restart;
+					}
+				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
+					next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
+				spin_unlock(&ifp->lock);
+#endif
 			} else {
 				/* ifp->prefered_lft <= ifp->valid_lft */
 				if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -2106,7 +2516,7 @@
 
 	switch (event) {
 	case RTM_NEWADDR:
-		ip6_rt_addr_add(&ifp->addr, ifp->idev->dev);
+		ip6_rt_addr_add(&ifp->addr, ifp->idev->dev, 0);
 		break;
 	case RTM_DELADDR:
 		addrconf_leave_solict(ifp->idev->dev, &ifp->addr);
@@ -2157,7 +2567,7 @@
 static struct addrconf_sysctl_table
 {
 	struct ctl_table_header *sysctl_header;
-	ctl_table addrconf_vars[11];
+	ctl_table addrconf_vars[16];
 	ctl_table addrconf_dev[2];
 	ctl_table addrconf_conf_dir[2];
 	ctl_table addrconf_proto_dir[2];
@@ -2204,6 +2614,28 @@
          &ipv6_devconf.rtr_solicit_delay, sizeof(int), 0644, NULL,
          &proc_dointvec_jiffies},
 
+#ifdef CONFIG_IPV6_PRIVACY
+	{NET_IPV6_USE_TEMPADDR, "use_tempaddr",
+	 &ipv6_devconf.use_tempaddr, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+
+	{NET_IPV6_TEMP_VALID_LFT, "temp_valid_lft",
+	 &ipv6_devconf.temp_valid_lft, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+
+	{NET_IPV6_TEMP_PREFERED_LFT, "temp_prefered_lft",
+	 &ipv6_devconf.temp_prefered_lft, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+
+	{NET_IPV6_REGEN_MAX_RETRY, "regen_max_retry",
+	 &ipv6_devconf.regen_max_retry, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+
+	{NET_IPV6_MAX_DESYNC_FACTOR, "max_desync_factor",
+	 &ipv6_devconf.max_desync_factor, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+#endif
+
 	{0}},
 
 	{{NET_PROTO_CONF_ALL, "all", NULL, 0, 0555, addrconf_sysctl.addrconf_vars},{0}},
@@ -2222,7 +2654,7 @@
 	if (t == NULL)
 		return;
 	memcpy(t, &addrconf_sysctl, sizeof(*t));
-	for (i=0; i<sizeof(t->addrconf_vars)/sizeof(t->addrconf_vars[0])-1; i++) {
+	for (i=0; t->addrconf_vars[i].data; i++) {
 		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
 		t->addrconf_vars[i].de = NULL;
 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
@@ -2285,7 +2717,16 @@
 {
 #ifdef MODULE
 	struct net_device *dev;
+#endif
 
+#ifdef CONFIG_IPV6_PRIVACY
+	md5_tfm = crypto_alloc_tfm("md5", 0);
+	if (unlikely(md5_tfm == NULL))
+		printk(KERN_WARNING
+			"failed to load transform for md5\n");
+#endif
+
+#ifdef MODULE
 	/* This takes sense only during module load. */
 	rtnl_lock();
 	for (dev = dev_base; dev; dev = dev->next) {
@@ -2371,6 +2812,13 @@
 
 	rtnl_unlock();
 
+#ifdef CONFIG_IPV6_PRIVACY
+	if (likely(md5_tfm != NULL)) {
+		crypto_free_tfm(md5_tfm);
+		md5_tfm = NULL;
+	}
+#endif
+
 #ifdef CONFIG_PROC_FS
 	proc_net_remove("if_inet6");
 #endif
Index: net/ipv6/af_inet6.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/af_inet6.c,v
retrieving revision 1.1.1.21
retrieving revision 1.1.1.21.2.1
diff -u -r1.1.1.21 -r1.1.1.21.2.1
--- a/net/ipv6/af_inet6.c	28 Nov 2003 18:26:21 -0000	1.1.1.21
+++ b/net/ipv6/af_inet6.c	16 Apr 2004 13:16:24 -0000	1.1.1.21.2.1
@@ -58,6 +58,9 @@
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
+#if CONFIG_IPV6_TUNNEL
+#include <net/ip6_tunnel.h>
+#endif
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -181,7 +184,7 @@
 	/* Init the ipv4 part of the socket since we can have sockets
 	 * using v6 API for ipv4.
 	 */
-	sk->protinfo.af_inet.ttl	= 64;
+	sk->protinfo.af_inet.uc_ttl	= -1;
 
 	sk->protinfo.af_inet.mc_loop	= 1;
 	sk->protinfo.af_inet.mc_ttl	= 1;
@@ -630,6 +633,11 @@
 	 */
 	inet6_register_protosw(&rawv6_protosw);
 
+	/* Register the family here so that the init calls below will
+	 * be able to create sockets. (?? is this dangerous ??)
+	 */
+	(void) sock_register(&inet6_family_ops);
+
 	/*
 	 *	ipngwg API draft makes clear that the correct semantics
 	 *	for TCP and UDP is to consider one TCP and UDP instance
@@ -646,6 +654,11 @@
 	err = ndisc_init(&inet6_family_ops);
 	if (err)
 		goto ndisc_fail;
+#ifdef CONFIG_IPV6_TUNNEL
+	err = ip6_tunnel_init();
+	if (err)
+		goto ip6_tunnel_fail;
+#endif
 	err = igmp6_init(&inet6_family_ops);
 	if (err)
 		goto igmp_fail;
@@ -671,15 +684,17 @@
 	ip6_flowlabel_init();
 	addrconf_init();
 	sit_init();
+
+	/* Init v6 extention headers. */
+	ipv6_rthdr_init();
 	ipv6_frag_init();
+	ipv6_nodata_init();
+	ipv6_destopt_init();
 
 	/* Init v6 transport protocols. */
 	udpv6_init();
 	tcpv6_init();
 
-	/* Now the userspace is allowed to create INET6 sockets. */
-	(void) sock_register(&inet6_family_ops);
-	
 	return 0;
 
 #ifdef CONFIG_PROC_FS
@@ -697,6 +712,10 @@
 	igmp6_cleanup();
 #endif
 igmp_fail:
+#ifdef CONFIG_IPV6_TUNNEL
+	ip6_tunnel_cleanup();
+ip6_tunnel_fail:
+#endif
 	ndisc_cleanup();
 ndisc_fail:
 	icmpv6_cleanup();
@@ -730,6 +749,9 @@
 	ip6_route_cleanup();
 	ipv6_packet_cleanup();
 	igmp6_cleanup();
+#ifdef CONFIG_IPV6_TUNNEL
+	ip6_tunnel_cleanup();
+#endif
 	ndisc_cleanup();
 	icmpv6_cleanup();
 #ifdef CONFIG_SYSCTL
Index: net/ipv6/ah6.c
===================================================================
RCS file: net/ipv6/ah6.c
diff -N net/ipv6/ah6.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv6/ah6.c	16 Apr 2004 13:16:24 -0000	1.8.2.1
@@ -0,0 +1,521 @@
+/*
+ * Copyright (C)2002 USAGI/WIDE Project
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Authors
+ *
+ *	Mitsuru KANDA @USAGI       : IPv6 Support 
+ * 	Kazunori MIYAZAWA @USAGI   :
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 	
+ * 	This file is derived from net/ipv4/ah.c.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/ah.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+#include <asm/scatterlist.h>
+
+/* XXX no ipv6 ah specific */
+#define NIP6(addr) \
+	ntohs((addr).s6_addr16[0]),\
+	ntohs((addr).s6_addr16[1]),\
+	ntohs((addr).s6_addr16[2]),\
+	ntohs((addr).s6_addr16[3]),\
+	ntohs((addr).s6_addr16[4]),\
+	ntohs((addr).s6_addr16[5]),\
+	ntohs((addr).s6_addr16[6]),\
+	ntohs((addr).s6_addr16[7])
+
+static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
+{
+	u8 *opt = (u8 *)opthdr;
+	int len = ipv6_optlen(opthdr);
+	int off = 0;
+	int optlen = 0;
+
+	off += 2;
+	len -= 2;
+
+	while (len > 0) {
+
+		switch (opt[off]) {
+
+		case IPV6_TLV_PAD0:
+			optlen = 1;
+			break;
+		default:
+			if (len < 2) 
+				goto bad;
+			optlen = opt[off+1]+2;
+			if (len < optlen)
+				goto bad;
+			if (opt[off] & 0x20)
+				memset(&opt[off+2], 0, opt[off+1]);
+			break;
+		}
+
+		off += optlen;
+		len -= optlen;
+	}
+	if (len == 0)
+		return 1;
+
+bad:
+	return 0;
+}
+
+static int ipv6_clear_mutable_options(struct sk_buff *skb, u16 *nh_offset, int dir)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	unsigned int packet_len = skb->tail - skb->nh.raw;
+	u8 nexthdr = skb->nh.ipv6h->nexthdr;
+	u8 nextnexthdr = 0;
+
+	*nh_offset = ((unsigned char *)&skb->nh.ipv6h->nexthdr) - skb->nh.raw;
+
+	while (offset + 1 <= packet_len) {
+
+		switch (nexthdr) {
+
+		case NEXTHDR_HOP:
+			*nh_offset = offset;
+			offset += ipv6_optlen(exthdr);
+			if (!zero_out_mutable_opts(exthdr)) {
+				if (net_ratelimit())
+					printk(KERN_WARNING "overrun hopopts\n"); 
+				return 0;
+			}
+			nexthdr = exthdr->nexthdr;
+			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+			break;
+
+		case NEXTHDR_ROUTING:
+			*nh_offset = offset;
+			offset += ipv6_optlen(exthdr);
+			((struct ipv6_rt_hdr*)exthdr)->segments_left = 0; 
+			nexthdr = exthdr->nexthdr;
+			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+			break;
+
+		case NEXTHDR_DEST:
+			*nh_offset = offset;
+			offset += ipv6_optlen(exthdr);
+			if (!zero_out_mutable_opts(exthdr))  {
+				if (net_ratelimit())
+					printk(KERN_WARNING "overrun destopt\n"); 
+				return 0;
+			}
+			nexthdr = exthdr->nexthdr;
+			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+			break;
+
+		case NEXTHDR_AUTH:
+			if (dir == XFRM_POLICY_OUT) {
+				memset(((struct ipv6_auth_hdr*)exthdr)->auth_data, 0, 
+				       (((struct ipv6_auth_hdr*)exthdr)->hdrlen - 1) << 2);
+			}
+			if (exthdr->nexthdr == NEXTHDR_DEST) {
+				offset += (((struct ipv6_auth_hdr*)exthdr)->hdrlen + 2) << 2;
+				exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+				nextnexthdr = exthdr->nexthdr;
+				if (!zero_out_mutable_opts(exthdr)) {
+					if (net_ratelimit())
+						printk(KERN_WARNING "overrun destopt\n");
+					return 0;
+				}
+			}
+			return nexthdr;
+		default :
+			return nexthdr;
+		}
+	}
+
+	return nexthdr;
+}
+
+int ah6_output(struct sk_buff *skb)
+{
+	int err;
+	int hdr_len = sizeof(struct ipv6hdr);
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x  = dst->xfrm;
+	struct ipv6hdr *iph = NULL;
+	struct ip_auth_hdr *ah;
+	struct ah_data *ahp;
+	u16 nh_offset = 0;
+	u8 nexthdr;
+
+	if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) {
+		err = -EINVAL;
+		goto error_nolock;
+	}
+
+	spin_lock_bh(&x->lock);
+	err = xfrm_check_output(x, skb, AF_INET);
+	if (err)
+		goto error;
+
+	if (x->props.mode) {
+		iph = skb->nh.ipv6h;
+		skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, x->props.header_len);
+		skb->nh.ipv6h->version = 6;
+		skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+		skb->nh.ipv6h->nexthdr = IPPROTO_AH;
+		memcpy(&skb->nh.ipv6h->saddr, &x->props.saddr, sizeof(struct in6_addr));
+		memcpy(&skb->nh.ipv6h->daddr, &x->id.daddr, sizeof(struct in6_addr));
+		ah = (struct ip_auth_hdr*)(skb->nh.ipv6h+1);
+		ah->nexthdr = IPPROTO_IPV6;
+	} else {
+		hdr_len = skb->h.raw - skb->nh.raw;
+		iph = kmalloc(hdr_len, GFP_ATOMIC);
+		if (!iph) {
+			err = -ENOMEM;
+			goto error;
+		}
+		memcpy(iph, skb->data, hdr_len);
+		skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, x->props.header_len);
+		memcpy(skb->nh.ipv6h, iph, hdr_len);
+		nexthdr = ipv6_clear_mutable_options(skb, &nh_offset, XFRM_POLICY_OUT);
+		if (nexthdr == 0)
+			goto error;
+
+		skb->nh.raw[nh_offset] = IPPROTO_AH;
+		skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+		ah = (struct ip_auth_hdr*)(skb->nh.raw+hdr_len);
+		skb->h.raw = (unsigned char*) ah;
+		ah->nexthdr = nexthdr;
+	}
+
+	skb->nh.ipv6h->priority    = 0;
+	skb->nh.ipv6h->flow_lbl[0] = 0;
+	skb->nh.ipv6h->flow_lbl[1] = 0;
+	skb->nh.ipv6h->flow_lbl[2] = 0;
+	skb->nh.ipv6h->hop_limit    = 0;
+
+	ahp = x->data;
+	ah->hdrlen  = (XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + 
+				   ahp->icv_trunc_len) >> 2) - 2;
+
+	ah->reserved = 0;
+	ah->spi = x->id.spi;
+	ah->seq_no = htonl(++x->replay.oseq);
+	ahp->icv(ahp, skb, ah->auth_data);
+
+	if (x->props.mode) {
+		skb->nh.ipv6h->hop_limit   = iph->hop_limit;
+		skb->nh.ipv6h->priority    = iph->priority; 	
+		skb->nh.ipv6h->flow_lbl[0] = iph->flow_lbl[0];
+		skb->nh.ipv6h->flow_lbl[1] = iph->flow_lbl[1];
+		skb->nh.ipv6h->flow_lbl[2] = iph->flow_lbl[2];
+		if (x->props.flags & XFRM_STATE_NOECN)
+			IP6_ECN_clear(skb->nh.ipv6h);
+	} else {
+		memcpy(skb->nh.ipv6h, iph, hdr_len);
+		skb->nh.raw[nh_offset] = IPPROTO_AH;
+		skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+		kfree (iph);
+	}
+
+	skb->nh.raw = skb->data;
+
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+	spin_unlock_bh(&x->lock);
+	if ((skb->dst = dst_pop(dst)) == NULL) {
+		err = -EHOSTUNREACH;
+		goto error_nolock;
+	}
+	return NET_XMIT_BYPASS;
+error:
+	spin_unlock_bh(&x->lock);
+error_nolock:
+	kfree_skb(skb);
+	return err;
+}
+
+int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	/*
+	 * Before process AH
+	 * [IPv6][Ext1][Ext2][AH][Dest][Payload]
+	 * |<-------------->| hdr_len
+	 * |<------------------------>| cleared_hlen
+	 *
+	 * To erase AH:
+	 * Keeping copy of cleared headers. After AH processing,
+	 * Moving the pointer of skb->nh.raw by using skb_pull as long as AH
+	 * header length. Then copy back the copy as long as hdr_len
+	 * If destination header following AH exists, copy it into after [Ext2].
+	 * 
+	 * |<>|[IPv6][Ext1][Ext2][Dest][Payload]
+	 * There is offset of AH before IPv6 header after the process.
+	 */
+
+	struct ipv6_auth_hdr *ah;
+	struct ah_data *ahp;
+	unsigned char *tmp_hdr = NULL;
+	u16 hdr_len;
+	u16 ah_hlen;
+	u16 cleared_hlen;
+	u16 nh_offset = 0;
+	u8 nexthdr = 0;
+	u8 *prevhdr;
+
+	if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
+		goto out;
+
+	/* We are going to _remove_ AH header to keep sockets happy,
+	 * so... Later this can change. */
+	if (skb_cloned(skb) &&
+	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		goto out;
+
+	hdr_len = skb->data - skb->nh.raw;
+	cleared_hlen = hdr_len;
+	ah = (struct ipv6_auth_hdr*)skb->data;
+	ahp = x->data;
+	nexthdr = ah->nexthdr;
+	ah_hlen = (ah->hdrlen + 2) << 2;
+	cleared_hlen += ah_hlen;
+
+	if (nexthdr == NEXTHDR_DEST) {
+		struct ipv6_opt_hdr *dsthdr = (struct ipv6_opt_hdr*)(skb->data + ah_hlen);
+		cleared_hlen += ipv6_optlen(dsthdr);
+	}
+
+        if (ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_full_len) &&
+            ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len))
+                goto out;
+
+	if (!pskb_may_pull(skb, ah_hlen))
+		goto out;
+
+	tmp_hdr = kmalloc(cleared_hlen, GFP_ATOMIC);
+	if (!tmp_hdr)
+		goto out;
+	memcpy(tmp_hdr, skb->nh.raw, cleared_hlen);
+	ipv6_clear_mutable_options(skb, &nh_offset, XFRM_POLICY_IN);
+	skb->nh.ipv6h->priority    = 0;
+	skb->nh.ipv6h->flow_lbl[0] = 0;
+	skb->nh.ipv6h->flow_lbl[1] = 0;
+	skb->nh.ipv6h->flow_lbl[2] = 0;
+	skb->nh.ipv6h->hop_limit   = 0;
+
+        {
+		u8 auth_data[MAX_AH_AUTH_LEN];
+
+		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
+		memset(ah->auth_data, 0, ahp->icv_trunc_len);
+		skb_push(skb, skb->data - skb->nh.raw);
+		ahp->icv(ahp, skb, ah->auth_data);
+		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
+			if (net_ratelimit())
+				printk(KERN_WARNING "ipsec ah authentication error\n");
+			x->stats.integrity_failed++;
+			goto free_out;
+		}
+	}
+
+	skb->nh.raw = skb_pull(skb, ah_hlen);
+	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
+	if (nexthdr == NEXTHDR_DEST) {
+		memcpy(skb->nh.raw + hdr_len,
+		       tmp_hdr + hdr_len + ah_hlen,
+		       cleared_hlen - hdr_len - ah_hlen);
+	}
+	prevhdr = (u8*)(skb->nh.raw + nh_offset);
+	*prevhdr = nexthdr;
+	skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb_pull(skb, hdr_len);
+	skb->h.raw = skb->data;
+
+
+	kfree(tmp_hdr);
+
+	return nexthdr;
+
+free_out:
+	kfree(tmp_hdr);
+out:
+	return -EINVAL;
+}
+
+void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 
+	 int type, int code, int offset, __u32 info)
+{
+	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
+	struct xfrm_state *x;
+
+	if (type != ICMPV6_DEST_UNREACH ||
+	    type != ICMPV6_PKT_TOOBIG)
+		return;
+
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
+	if (!x)
+		return;
+
+	printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/"
+			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+	       ntohl(ah->spi), NIP6(iph->daddr));
+
+	xfrm_state_put(x);
+}
+
+static int ah6_init_state(struct xfrm_state *x, void *args)
+{
+	struct ah_data *ahp = NULL;
+	struct xfrm_algo_desc *aalg_desc;
+
+	if (!x->aalg)
+		goto error;
+
+	/* null auth can use a zero length key */
+	if (x->aalg->alg_key_len > 512)
+		goto error;
+
+	ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
+	if (ahp == NULL)
+		return -ENOMEM;
+
+	memset(ahp, 0, sizeof(*ahp));
+
+	ahp->key = x->aalg->alg_key;
+	ahp->key_len = (x->aalg->alg_key_len+7)/8;
+	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
+	if (!ahp->tfm)
+		goto error;
+	ahp->icv = ah_hmac_digest;
+	
+	/*
+	 * Lookup the algorithm description maintained by xfrm_algo,
+	 * verify crypto transform properties, and store information
+	 * we need for AH processing.  This lookup cannot fail here
+	 * after a successful crypto_alloc_tfm().
+	 */
+	aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name);
+	BUG_ON(!aalg_desc);
+
+	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+	    crypto_tfm_alg_digestsize(ahp->tfm)) {
+		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
+		       x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
+		       aalg_desc->uinfo.auth.icv_fullbits/8);
+		goto error;
+	}
+	
+	ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
+	ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+	
+	BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
+	
+	ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
+	if (!ahp->work_icv)
+		goto error;
+	
+	x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len);
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct ipv6hdr);
+	x->data = ahp;
+
+	return 0;
+
+error:
+	if (ahp) {
+		if (ahp->work_icv)
+			kfree(ahp->work_icv);
+		if (ahp->tfm)
+			crypto_free_tfm(ahp->tfm);
+		kfree(ahp);
+	}
+	return -EINVAL;
+}
+
+static void ah6_destroy(struct xfrm_state *x)
+{
+	struct ah_data *ahp = x->data;
+
+	if (!ahp)
+		return;
+
+	if (ahp->work_icv) {
+		kfree(ahp->work_icv);
+		ahp->work_icv = NULL;
+	}
+	if (ahp->tfm) {
+		crypto_free_tfm(ahp->tfm);
+		ahp->tfm = NULL;
+	}
+	kfree(ahp);
+}
+
+static struct xfrm_type ah6_type =
+{
+	.description	= "AH6",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_AH,
+	.init_state	= ah6_init_state,
+	.destructor	= ah6_destroy,
+	.input		= ah6_input,
+	.output		= ah6_output
+};
+
+static struct inet6_protocol ah6_protocol = {
+	.handler	=	xfrm6_rcv,
+	.err_handler	=	ah6_err,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+int __init ah6_init(void)
+{
+	if (xfrm_register_type(&ah6_type, AF_INET6) < 0) {
+		printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+
+	if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
+		printk(KERN_INFO "ipv6 ah init: can't add protocol\n");
+		xfrm_unregister_type(&ah6_type, AF_INET6);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static void __exit ah6_fini(void)
+{
+	if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
+		printk(KERN_INFO "ipv6 ah close: can't remove protocol\n");
+
+	if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
+		printk(KERN_INFO "ipv6 ah close: can't remove xfrm type\n");
+
+}
+
+module_init(ah6_init);
+module_exit(ah6_fini);
+
+MODULE_LICENSE("GPL");
Index: net/ipv6/anycast.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/anycast.c,v
retrieving revision 1.1.1.11
retrieving revision 1.1.1.11.2.1
diff -u -r1.1.1.11 -r1.1.1.11.2.1
--- a/net/ipv6/anycast.c	25 Aug 2003 11:44:44 -0000	1.1.1.11
+++ b/net/ipv6/anycast.c	16 Apr 2004 13:16:24 -0000	1.1.1.11.2.1
@@ -95,7 +95,6 @@
 	return onlink;
 }
 
-
 /*
  *	socket join an anycast group
  */
@@ -109,8 +108,12 @@
 	int	ishost = !ipv6_devconf.forwarding;
 	int	err = 0;
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
 	if (ipv6_addr_type(addr) & IPV6_ADDR_MULTICAST)
 		return -EINVAL;
+	if (ipv6_chk_addr(addr, NULL))
+		return -EINVAL;
 
 	pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
 	if (pac == NULL)
@@ -160,21 +163,12 @@
 	 * For hosts, allow link-local or matching prefix anycasts.
 	 * This obviates the need for propagating anycast routes while
 	 * still allowing some non-router anycast participation.
-	 *
-	 * allow anyone to join anycasts that don't require a special route
-	 * and can't be spoofs of unicast addresses (reserved anycast only)
 	 */
 	if (!ip6_onlink(addr, dev)) {
 		if (ishost)
 			err = -EADDRNOTAVAIL;
-		else if (!capable(CAP_NET_ADMIN))
-			err = -EPERM;
 		if (err)
 			goto out_dev_put;
-	} else if (!(ipv6_addr_type(addr) & IPV6_ADDR_ANYCAST) &&
-		   !capable(CAP_NET_ADMIN)) {
-		err = -EPERM;
-		goto out_dev_put;
 	}
 
 	err = ipv6_dev_ac_inc(dev, addr);
@@ -265,6 +259,13 @@
 		dev_put(dev);
 }
 
+#if 0
+/* The function is not used, which is funny. Apparently, author
+ * supposed to use it to filter out datagrams inside udp/raw but forgot.
+ *
+ * It is OK, anycasts are not special comparing to delivery to unicasts.
+ */
+
 int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
 {
 	struct ipv6_ac_socklist *pac;
@@ -285,6 +286,8 @@
 	return found;
 }
 
+#endif
+
 static void aca_put(struct ifacaddr6 *ac)
 {
 	if (atomic_dec_and_test(&ac->aca_refcnt)) {
@@ -346,7 +349,7 @@
 	idev->ac_list = aca;
 	write_unlock_bh(&idev->lock);
 
-	ip6_rt_addr_add(&aca->aca_addr, dev);
+	ip6_rt_addr_add(&aca->aca_addr, dev, 1);
 
 	addrconf_join_solict(dev, &aca->aca_addr);
 
Index: net/ipv6/datagram.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/datagram.c,v
retrieving revision 1.1.1.19
retrieving revision 1.1.1.19.2.1
diff -u -r1.1.1.19 -r1.1.1.19.2.1
--- a/net/ipv6/datagram.c	13 Jun 2003 14:51:39 -0000	1.1.1.19
+++ b/net/ipv6/datagram.c	16 Apr 2004 13:16:24 -0000	1.1.1.19.2.1
@@ -78,7 +78,7 @@
 
 	iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
 	skb->nh.ipv6h = iph;
-	memcpy(&iph->daddr, fl->fl6_dst, 16);
+	ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
 
 	serr = SKB_EXT_ERR(skb);
 	serr->ee.ee_errno = err;
@@ -89,7 +89,7 @@
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
 	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
-	serr->port = fl->uli_u.ports.dport;
+	serr->port = fl->fl_ip_dport;
 
 	skb->h.raw = skb->tail;
 	__skb_pull(skb, skb->tail - skb->data);
@@ -291,7 +291,8 @@
 					goto exit_f;
 				}
 
-				fl->fl6_src = &src_info->ipi6_addr;
+				ipv6_addr_copy(&fl->fl6_src,
+					       &src_info->ipi6_addr);
 			}
 
 			break;
Index: net/ipv6/esp6.c
===================================================================
RCS file: net/ipv6/esp6.c
diff -N net/ipv6/esp6.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv6/esp6.c	16 Apr 2004 13:16:24 -0000	1.6.18.1
@@ -0,0 +1,500 @@
+/*
+ * Copyright (C)2002 USAGI/WIDE Project
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Authors
+ *
+ *	Mitsuru KANDA @USAGI       : IPv6 Support 
+ * 	Kazunori MIYAZAWA @USAGI   :
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 	
+ * 	This file is derived from net/ipv4/esp.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/random.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <linux/icmpv6.h>
+
+#define MAX_SG_ONSTACK 4
+
+/* XXX no ipv6 esp specific */
+#define NIP6(addr) \
+	ntohs((addr).s6_addr16[0]),\
+	ntohs((addr).s6_addr16[1]),\
+	ntohs((addr).s6_addr16[2]),\
+	ntohs((addr).s6_addr16[3]),\
+	ntohs((addr).s6_addr16[4]),\
+	ntohs((addr).s6_addr16[5]),\
+	ntohs((addr).s6_addr16[6]),\
+	ntohs((addr).s6_addr16[7])
+
+int esp6_output(struct sk_buff *skb)
+{
+	int err;
+	int hdr_len = 0;
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x  = dst->xfrm;
+	struct ipv6hdr *iph = NULL, *top_iph;
+	struct ipv6_esp_hdr *esph;
+	struct crypto_tfm *tfm;
+	struct esp_data *esp;
+	struct sk_buff *trailer;
+	int blksize;
+	int clen;
+	int alen;
+	int nfrags;
+	u8 *prevhdr;
+	u8 nexthdr = 0;
+
+	/* First, if the skb is not checksummed, complete checksum. */
+	if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) {
+		err = -EINVAL;
+		goto error_nolock;
+	}
+
+	spin_lock_bh(&x->lock);
+	err = xfrm_check_output(x, skb, AF_INET6);
+	if (err)
+		goto error;
+	err = -ENOMEM;
+
+	/* Strip IP header in transport mode. Save it. */
+
+	if (!x->props.mode) {
+		hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
+		nexthdr = *prevhdr;
+		*prevhdr = IPPROTO_ESP;
+		iph = kmalloc(hdr_len, GFP_ATOMIC);
+		if (!iph) {
+			err = -ENOMEM;
+			goto error;
+		}
+		memcpy(iph, skb->nh.raw, hdr_len);
+		__skb_pull(skb, hdr_len);
+	}
+
+	/* Now skb is pure payload to encrypt */
+
+	/* Round to block size */
+	clen = skb->len;
+
+	esp = x->data;
+	alen = esp->auth.icv_trunc_len;
+	tfm = esp->conf.tfm;
+	blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
+	clen = (clen + 2 + blksize-1)&~(blksize-1);
+	if (esp->conf.padlen)
+		clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+
+	if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) {
+		if (!x->props.mode && iph) kfree(iph);
+		goto error;
+	}
+
+	/* Fill padding... */
+	do {
+		int i;
+		for (i=0; i<clen-skb->len - 2; i++)
+			*(u8*)(trailer->tail + i) = i+1;
+	} while (0);
+	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+	pskb_put(skb, trailer, clen - skb->len);
+
+	if (x->props.mode) {
+		iph = skb->nh.ipv6h;
+		top_iph = (struct ipv6hdr*)skb_push(skb, x->props.header_len);
+		esph = (struct ipv6_esp_hdr*)(top_iph+1);
+		*(u8*)(trailer->tail - 1) = IPPROTO_IPV6;
+		top_iph->version = 6;
+		top_iph->priority = iph->priority;
+		top_iph->flow_lbl[0] = iph->flow_lbl[0];
+		top_iph->flow_lbl[1] = iph->flow_lbl[1];
+		top_iph->flow_lbl[2] = iph->flow_lbl[2];
+		if (x->props.flags & XFRM_STATE_NOECN)
+			IP6_ECN_clear(top_iph);
+		top_iph->nexthdr = IPPROTO_ESP;
+		top_iph->payload_len = htons(skb->len + alen - sizeof(struct ipv6hdr));
+		top_iph->hop_limit = iph->hop_limit;
+		memcpy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr, sizeof(struct in6_addr));
+		memcpy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr, sizeof(struct in6_addr));
+	} else { 
+		esph = (struct ipv6_esp_hdr*)skb_push(skb, x->props.header_len);
+		skb->h.raw = (unsigned char*)esph;
+		top_iph = (struct ipv6hdr*)skb_push(skb, hdr_len);
+		memcpy(top_iph, iph, hdr_len);
+		kfree(iph);
+		top_iph->payload_len = htons(skb->len + alen - sizeof(struct ipv6hdr));
+		*(u8*)(trailer->tail - 1) = nexthdr;
+	}
+
+	esph->spi = x->id.spi;
+	esph->seq_no = htonl(++x->replay.oseq);
+
+	if (esp->conf.ivlen)
+		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+
+	do {
+		struct scatterlist sgbuf[nfrags>MAX_SG_ONSTACK ? 0 : nfrags];
+		struct scatterlist *sg = sgbuf;
+
+		if (unlikely(nfrags > MAX_SG_ONSTACK)) {
+			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
+			if (!sg)
+				goto error;
+		}
+		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
+		crypto_cipher_encrypt(tfm, sg, sg, clen);
+		if (unlikely(sg != sgbuf))
+			kfree(sg);
+	} while (0);
+
+	if (esp->conf.ivlen) {
+		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+	}
+
+	if (esp->auth.icv_full_len) {
+		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
+			sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
+		pskb_put(skb, trailer, alen);
+	}
+
+	skb->nh.raw = skb->data;
+
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+	spin_unlock_bh(&x->lock);
+	if ((skb->dst = dst_pop(dst)) == NULL) {
+		err = -EHOSTUNREACH;
+		goto error_nolock;
+	}
+	return NET_XMIT_BYPASS;
+
+error:
+	spin_unlock_bh(&x->lock);
+error_nolock:
+	kfree_skb(skb);
+	return err;
+}
+
+int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph;
+	struct ipv6_esp_hdr *esph;
+	struct esp_data *esp = x->data;
+	struct sk_buff *trailer;
+	int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+	int alen = esp->auth.icv_trunc_len;
+	int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
+
+	int hdr_len = skb->h.raw - skb->nh.raw;
+	int nfrags;
+	unsigned char *tmp_hdr = NULL;
+	int ret = 0;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6_esp_hdr))) {
+		ret = -EINVAL;
+		goto out_nofree;
+	}
+
+	if (elen <= 0 || (elen & (blksize-1))) {
+		ret = -EINVAL;
+		goto out_nofree;
+	}
+
+	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
+	if (!tmp_hdr) {
+		ret = -ENOMEM;
+		goto out_nofree;
+	}
+	memcpy(tmp_hdr, skb->nh.raw, hdr_len);
+
+	/* If integrity check is required, do this. */
+        if (esp->auth.icv_full_len) {
+		u8 sum[esp->auth.icv_full_len];
+		u8 sum1[alen];
+
+		esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+
+		if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
+			BUG();
+
+		if (unlikely(memcmp(sum, sum1, alen))) {
+			x->stats.integrity_failed++;
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	esph = (struct ipv6_esp_hdr*)skb->data;
+	iph = skb->nh.ipv6h;
+
+	/* Get ivec. This can be wrong, check against another impls. */
+	if (esp->conf.ivlen)
+		crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
+
+        {
+		u8 nexthdr[2];
+		struct scatterlist sgbuf[nfrags>MAX_SG_ONSTACK ? 0 : nfrags];
+		struct scatterlist *sg = sgbuf;
+		u8 padlen;
+		u8 *prevhdr;
+
+		if (unlikely(nfrags > MAX_SG_ONSTACK)) {
+			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
+			if (!sg) {
+				ret = -ENOMEM;
+				goto out;
+			}
+		}
+		skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen);
+		crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
+		if (unlikely(sg != sgbuf))
+			kfree(sg);
+
+		if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
+			BUG();
+
+		padlen = nexthdr[0];
+		if (padlen+2 >= elen) {
+			if (net_ratelimit()) {
+				printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen);
+			}
+			ret = -EINVAL;
+			goto out;
+		}
+		/* ... check padding bits here. Silly. :-) */ 
+
+		pskb_trim(skb, skb->len - alen - padlen - 2);
+		skb->h.raw = skb_pull(skb, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen);
+		skb->nh.raw += sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
+		memcpy(skb->nh.raw, tmp_hdr, hdr_len);
+		skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+		ip6_find_1stfragopt(skb, &prevhdr);
+		ret = *prevhdr = nexthdr[1];
+	}
+
+out:
+	kfree(tmp_hdr);
+out_nofree:
+	return ret;
+}
+
+static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+{
+	struct esp_data *esp = x->data;
+	u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+
+	if (x->props.mode) {
+		mtu = (mtu + 2 + blksize-1)&~(blksize-1);
+	} else {
+		/* The worst case. */
+		mtu += 2 + blksize;
+	}
+	if (esp->conf.padlen)
+		mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+
+	return mtu + x->props.header_len + esp->auth.icv_full_len;
+}
+
+void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		int type, int code, int offset, __u32 info)
+{
+	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+	struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset);
+	struct xfrm_state *x;
+
+	if (type != ICMPV6_DEST_UNREACH ||
+	    type != ICMPV6_PKT_TOOBIG)
+		return;
+
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
+	if (!x)
+		return;
+	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/"
+			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 
+			ntohl(esph->spi), NIP6(iph->daddr));
+	xfrm_state_put(x);
+}
+
+void esp6_destroy(struct xfrm_state *x)
+{
+	struct esp_data *esp = x->data;
+
+	if (!esp)
+		return;
+
+	if (esp->conf.tfm) {
+		crypto_free_tfm(esp->conf.tfm);
+		esp->conf.tfm = NULL;
+	}
+	if (esp->conf.ivec) {
+		kfree(esp->conf.ivec);
+		esp->conf.ivec = NULL;
+	}
+	if (esp->auth.tfm) {
+		crypto_free_tfm(esp->auth.tfm);
+		esp->auth.tfm = NULL;
+	}
+	if (esp->auth.work_icv) {
+		kfree(esp->auth.work_icv);
+		esp->auth.work_icv = NULL;
+	}
+	kfree(esp);
+}
+
+int esp6_init_state(struct xfrm_state *x, void *args)
+{
+	struct esp_data *esp = NULL;
+
+	if (x->aalg) {
+		if (x->aalg->alg_key_len == 0 || x->aalg->alg_key_len > 512)
+			goto error;
+	}
+	if (x->ealg == NULL)
+		goto error;
+
+	esp = kmalloc(sizeof(*esp), GFP_KERNEL);
+	if (esp == NULL)
+		return -ENOMEM;
+
+	memset(esp, 0, sizeof(*esp));
+
+	if (x->aalg) {
+		struct xfrm_algo_desc *aalg_desc;
+
+		esp->auth.key = x->aalg->alg_key;
+		esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
+		esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
+		if (esp->auth.tfm == NULL)
+			goto error;
+		esp->auth.icv = esp_hmac_digest;
+ 
+		aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name);
+		BUG_ON(!aalg_desc);
+ 
+		if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+			crypto_tfm_alg_digestsize(esp->auth.tfm)) {
+				printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+					x->aalg->alg_name,
+					crypto_tfm_alg_digestsize(esp->auth.tfm),
+					aalg_desc->uinfo.auth.icv_fullbits/8);
+				goto error;
+		}
+ 
+		esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
+		esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+ 
+		esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL);
+		if (!esp->auth.work_icv)
+			goto error;
+	}
+	esp->conf.key = x->ealg->alg_key;
+	esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
+	if (x->props.ealgo == SADB_EALG_NULL)
+		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
+	else
+		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
+	if (esp->conf.tfm == NULL)
+		goto error;
+	esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
+	esp->conf.padlen = 0;
+	if (esp->conf.ivlen) {
+		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
+		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+	}
+	crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len);
+	x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct ipv6hdr);
+	x->data = esp;
+	return 0;
+
+error:
+	if (esp) {
+		if (esp->auth.tfm)
+			crypto_free_tfm(esp->auth.tfm);
+		if (esp->auth.work_icv)
+			kfree(esp->auth.work_icv);
+		if (esp->conf.tfm)
+			crypto_free_tfm(esp->conf.tfm);
+		kfree(esp);
+	}
+	return -EINVAL;
+}
+
+static struct xfrm_type esp6_type =
+{
+	.description	= "ESP6",
+	.owner	     	= THIS_MODULE,
+	.proto	     	= IPPROTO_ESP,
+	.init_state	= esp6_init_state,
+	.destructor	= esp6_destroy,
+	.get_max_size	= esp6_get_max_size,
+	.input		= esp6_input,
+	.output		= esp6_output
+};
+
+static struct inet6_protocol esp6_protocol = {
+	.handler 	=	xfrm6_rcv,
+	.err_handler	=	esp6_err,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+int __init esp6_init(void)
+{
+	if (xfrm_register_type(&esp6_type, AF_INET6) < 0) {
+		printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
+		printk(KERN_INFO "ipv6 esp init: can't add protocol\n");
+		xfrm_unregister_type(&esp6_type, AF_INET6);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static void __exit esp6_fini(void)
+{
+	if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
+		printk(KERN_INFO "ipv6 esp close: can't remove protocol\n");
+	if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
+		printk(KERN_INFO "ipv6 esp close: can't remove xfrm type\n");
+}
+
+module_init(esp6_init);
+module_exit(esp6_fini);
+
+MODULE_LICENSE("GPL");
Index: net/ipv6/exthdrs.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/exthdrs.c,v
retrieving revision 1.1.1.17
retrieving revision 1.1.1.17.2.1
diff -u -r1.1.1.17 -r1.1.1.17.2.1
--- a/net/ipv6/exthdrs.c	14 Apr 2004 13:05:41 -0000	1.1.1.17
+++ b/net/ipv6/exthdrs.c	16 Apr 2004 13:16:24 -0000	1.1.1.17.2.1
@@ -18,6 +18,9 @@
 /* Changes:
  *	yoshfuji		: ensure not to overrun while parsing 
  *				  tlv options.
+ *	Mitsuru KANDA @USAGI and: Remove ipv6_parse_exthdrs().
+ *	YOSHIFUJI Hideaki @USAGI  Register inbound extention header
+ *				  handlers as inet6_protocol{}.
  */
 
 #include <linux/errno.h>
@@ -44,20 +47,6 @@
 #include <asm/uaccess.h>
 
 /*
- *	Parsing inbound headers.
- *
- *	Parsing function "func" returns offset wrt skb->nh of the place,
- *	where next nexthdr value is stored or NULL, if parsing
- *	failed. It should also update skb->h tp point at the next header.
- */
-
-struct hdrtype_proc
-{
-	int	type;
-	int	(*func) (struct sk_buff **, int offset);
-};
-
-/*
  *	Parsing tlv encoded headers.
  *
  *	Parsing function "func" returns 1, if parsing succeed
@@ -164,9 +153,9 @@
 	{-1,			NULL}
 };
 
-static int ipv6_dest_opt(struct sk_buff **skb_ptr, int nhoff)
+static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
 {
-	struct sk_buff *skb=*skb_ptr;
+	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
 
 	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
@@ -179,29 +168,56 @@
 
 	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
 		skb->h.raw += ((skb->h.raw[1]+1)<<3);
-		return opt->dst1;
+		*nhoffp = opt->dst1;
+		return 1;
 	}
 
 	return -1;
 }
 
+static struct inet6_protocol destopt_protocol =
+{
+	.handler	=	ipv6_destopt_rcv,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+void __init ipv6_destopt_init(void)
+{
+	if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
+		printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
+}
+
 /********************************
   NONE header. No data in packet.
  ********************************/
 
-static int ipv6_nodata(struct sk_buff **skb_ptr, int nhoff)
+static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
 {
-	kfree_skb(*skb_ptr);
-	return -1;
+	struct sk_buff *skb = *skbp;
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static struct inet6_protocol nodata_protocol =
+{
+	.handler	=	ipv6_nodata_rcv,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+void __init ipv6_nodata_init(void)
+{
+	if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
+		printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
 }
 
 /********************************
   Routing header.
  ********************************/
 
-static int ipv6_routing_header(struct sk_buff **skb_ptr, int nhoff)
+static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
 {
-	struct sk_buff *skb = *skb_ptr;
+	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
 	struct in6_addr *addr;
 	struct in6_addr daddr;
@@ -232,7 +248,8 @@
 		skb->h.raw += (hdr->hdrlen + 1) << 3;
 		opt->dst0 = opt->dst1;
 		opt->dst1 = 0;
-		return (&hdr->nexthdr) - skb->nh.raw;
+		*nhoffp = (&hdr->nexthdr) - skb->nh.raw;
+		return 1;
 	}
 
 	if (hdr->type != IPV6_SRCRT_TYPE_0) {
@@ -247,7 +264,7 @@
 
 	/*
 	 *	This is the routing header forwarding algorithm from
-	 *	RFC 1883, page 17.
+	 *	RFC 2460, page 16.
 	 */
 
 	n = hdr->hdrlen >> 1;
@@ -265,7 +282,7 @@
 		kfree_skb(skb);
 		if (skb2 == NULL)
 			return -1;
-		*skb_ptr = skb = skb2;
+		*skbp = skb = skb2;
 		opt = (struct inet6_skb_parm *)skb2->cb;
 		hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
 	}
@@ -293,7 +310,7 @@
 	dst_release(xchg(&skb->dst, NULL));
 	ip6_route_input(skb);
 	if (skb->dst->error) {
-		skb->dst->input(skb);
+		dst_input(skb);
 		return -1;
 	}
 	if (skb->dst->dev->flags&IFF_LOOPBACK) {
@@ -307,10 +324,22 @@
 		goto looped_back;
 	}
 
-	skb->dst->input(skb);
+	dst_input(skb);
 	return -1;
 }
 
+static struct inet6_protocol rthdr_protocol =
+{
+	.handler	=	ipv6_rthdr_rcv,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+void __init ipv6_rthdr_init(void)
+{
+	if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0)
+		printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n");
+};
+
 /*
    This function inverts received rthdr.
    NOTE: specs allow to make it automatically only if
@@ -376,97 +405,6 @@
 	return opt;
 }
 
-/********************************
-  AUTH header.
- ********************************/
-
-/*
-   rfc1826 said, that if a host does not implement AUTH header
-   it MAY ignore it. We use this hole 8)
-
-   Actually, now we can implement OSPFv6 without kernel IPsec.
-   Authentication for poors may be done in user space with the same success.
-
-   Yes, it means, that we allow application to send/receive
-   raw authentication header. Apparently, we suppose, that it knows
-   what it does and calculates authentication data correctly.
-   Certainly, it is possible only for udp and raw sockets, but not for tcp.
-
-   AUTH header has 4byte granular length, which kills all the idea
-   behind AUTOMATIC 64bit alignment of IPv6. Now we will lose
-   cpu ticks, checking that sender did not something stupid
-   and opt->hdrlen is even. Shit!		--ANK (980730)
- */
-
-static int ipv6_auth_hdr(struct sk_buff **skb_ptr, int nhoff)
-{
-	struct sk_buff *skb=*skb_ptr;
-	struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
-	int len;
-
-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8))
-		goto fail;
-
-	/*
-	 * RFC2402 2.2 Payload Length
-	 * The 8-bit field specifies the length of AH in 32-bit words 
-	 * (4-byte units), minus "2".
-	 * -- Noriaki Takamiya @USAGI Project
-	 */
-	len = (skb->h.raw[1]+2)<<2;
-
-	if (len&7)
-		goto fail;
-
-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+len))
-		goto fail;
-
-	opt->auth = skb->h.raw - skb->nh.raw;
-	skb->h.raw += len;
-	return opt->auth;
-
-fail:
-	kfree_skb(skb);
-	return -1;
-}
-
-/* This list MUST NOT contain entry for NEXTHDR_HOP.
-   It is parsed immediately after packet received
-   and if it occurs somewhere in another place we must
-   generate error.
- */
-
-struct hdrtype_proc hdrproc_lst[] = {
-	{NEXTHDR_FRAGMENT,	ipv6_reassembly},
-	{NEXTHDR_ROUTING,	ipv6_routing_header},
-	{NEXTHDR_DEST,		ipv6_dest_opt},
-	{NEXTHDR_NONE,		ipv6_nodata},
-	{NEXTHDR_AUTH,		ipv6_auth_hdr},
-   /*
-	{NEXTHDR_ESP,		ipv6_esp_hdr},
-    */
-	{-1,			NULL}
-};
-
-int ipv6_parse_exthdrs(struct sk_buff **skb_in, int nhoff)
-{
-	struct hdrtype_proc *hdrt;
-	u8 nexthdr = (*skb_in)->nh.raw[nhoff];
-
-restart:
-	for (hdrt=hdrproc_lst; hdrt->type >= 0; hdrt++) {
-		if (hdrt->type == nexthdr) {
-			if ((nhoff = hdrt->func(skb_in, nhoff)) >= 0) {
-				nexthdr = (*skb_in)->nh.raw[nhoff];
-				goto restart;
-			}
-			return -1;
-		}
-	}
-	return nhoff;
-}
-
-
 /**********************************
   Hop-by-hop options.
  **********************************/
@@ -498,7 +436,7 @@
 	}
 
 	pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2));
-	if (pkt_len < 0x10000) {
+	if (pkt_len <= IPV6_MAXPLEN) {
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
 		return 0;
 	}
Index: net/ipv6/icmp.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/icmp.c,v
retrieving revision 1.1.1.24
retrieving revision 1.1.1.24.2.1
diff -u -r1.1.1.24 -r1.1.1.24.2.1
--- a/net/ipv6/icmp.c	14 Apr 2004 13:05:41 -0000	1.1.1.24
+++ b/net/ipv6/icmp.c	16 Apr 2004 13:16:24 -0000	1.1.1.24.2.1
@@ -26,6 +26,7 @@
  *	yoshfuji		:	ensure to sent parameter problem for
  *					fragments.
  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
+ *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
  */
 
 #include <linux/module.h>
@@ -74,17 +75,11 @@
 #define icmpv6_socket	__icmpv6_socket[smp_processor_id()]
 #define icmpv6_socket_cpu(X) __icmpv6_socket[(X)]
 
-int icmpv6_rcv(struct sk_buff *skb);
+static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
 
-static struct inet6_protocol icmpv6_protocol = 
-{
-	icmpv6_rcv,		/* handler		*/
-	NULL,			/* error control	*/
-	NULL,			/* next			*/
-	IPPROTO_ICMPV6,		/* protocol ID		*/
-	0,			/* copy			*/
-	NULL,			/* data			*/
-	"ICMPv6"	       	/* name			*/
+static struct inet6_protocol icmpv6_protocol = {
+	.handler	=	icmpv6_rcv,
+	.flags		=	INET6_PROTO_FINAL,
 };
 
 struct icmpv6_msg {
@@ -116,40 +111,6 @@
 	spin_unlock_bh(&icmpv6_socket->sk->lock.slock);
 }
 
-/*
- *	getfrag callback
- */
-
-static int icmpv6_getfrag(const void *data, struct in6_addr *saddr, 
-			   char *buff, unsigned int offset, unsigned int len)
-{
-	struct icmpv6_msg *msg = (struct icmpv6_msg *) data;
-	struct icmp6hdr *icmph;
-	__u32 csum;
-
-	if (offset) {
-		csum = skb_copy_and_csum_bits(msg->skb, msg->offset +
-					      (offset - sizeof(struct icmp6hdr)),
-					      buff, len, msg->csum);
-		msg->csum = csum;
-		return 0;
-	}
-
-	csum = csum_partial_copy_nocheck((void *) &msg->icmph, buff,
-					 sizeof(struct icmp6hdr), msg->csum);
-
-	csum = skb_copy_and_csum_bits(msg->skb, msg->offset,
-				      buff + sizeof(struct icmp6hdr),
-				      len - sizeof(struct icmp6hdr), csum);
-
-	icmph = (struct icmp6hdr *) buff;
-
-	icmph->icmp6_cksum = csum_ipv6_magic(saddr, msg->daddr, msg->len,
-					     IPPROTO_ICMPV6, csum);
-	return 0; 
-}
-
-
 /* 
  * Slightly more convenient version of icmpv6_send.
  */
@@ -252,21 +213,74 @@
 	return (optval&0xC0) == 0x80;
 }
 
+int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
+{
+	struct sk_buff *skb;
+	struct icmp6hdr *icmp6h;
+	int err = 0;
+
+	if ((skb = skb_peek(&sk->write_queue)) == NULL)
+		goto out;
+
+	icmp6h = (struct icmp6hdr*) skb->h.raw;
+	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
+	icmp6h->icmp6_cksum = 0;
+
+	if (skb_queue_len(&sk->write_queue) == 1) {
+		skb->csum = csum_partial((char *)icmp6h,
+					sizeof(struct icmp6hdr), skb->csum);
+		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
+						      &fl->fl6_dst,
+						      len, fl->proto,
+						      skb->csum);
+	} else {
+		u32 tmp_csum = 0;
+
+		skb_queue_walk(&sk->write_queue, skb) {
+			tmp_csum = csum_add(tmp_csum, skb->csum);
+		}
+
+		tmp_csum = csum_partial((char *)icmp6h,
+					sizeof(struct icmp6hdr), tmp_csum);
+		tmp_csum = csum_ipv6_magic(&fl->fl6_src,
+					   &fl->fl6_dst,
+					   len, fl->proto, tmp_csum);
+		icmp6h->icmp6_cksum = tmp_csum;
+	}
+	if (icmp6h->icmp6_cksum == 0)
+		icmp6h->icmp6_cksum = -1;
+	ip6_push_pending_frames(sk);
+out:
+	return err;
+}
+
+static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
+{
+	struct sk_buff *org_skb = (struct sk_buff *)from;
+	__u32 csum = 0;
+	csum = skb_copy_and_csum_bits(org_skb, offset, to, len, csum);
+	skb->csum = csum_block_add(skb->csum, csum, odd);
+	return 0;
+}
+
 /*
  *	Send an ICMP message in response to a packet in error
  */
-
 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, 
 		 struct net_device *dev)
 {
 	struct ipv6hdr *hdr = skb->nh.ipv6h;
 	struct sock *sk = icmpv6_socket->sk;
+	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct in6_addr *saddr = NULL;
-	int iif = 0;
-	struct icmpv6_msg msg;
+	struct dst_entry *dst;
+	struct icmp6hdr tmp_hdr;
 	struct flowi fl;
+	int iif = 0;
 	int addr_type = 0;
-	int len;
+	int len, plen;
+	int hlimit = -1;
+	int err = 0;
 
 	if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
 		return;
@@ -324,13 +338,14 @@
 		return;
 	}
 
+	memset(&fl, 0, sizeof(fl));
 	fl.proto = IPPROTO_ICMPV6;
-	fl.nl_u.ip6_u.daddr = &hdr->saddr;
-	fl.nl_u.ip6_u.saddr = saddr;
+	ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
+	if (saddr)
+		ipv6_addr_copy(&fl.fl6_src, saddr);
 	fl.oif = iif;
-	fl.fl6_flowlabel = 0;
-	fl.uli_u.icmpt.type = type;
-	fl.uli_u.icmpt.code = code;
+	fl.fl_icmp_type = type;
+	fl.fl_icmp_code = code;
 
 	if (icmpv6_xmit_lock())
 		return;
@@ -338,37 +353,52 @@
 	if (!icmpv6_xrlim_allow(sk, type, &fl))
 		goto out;
 
-	/*
-	 *	ok. kick it. checksum will be provided by the 
-	 *	getfrag_t callback.
-	 */
+	tmp_hdr.icmp6_type = type;
+	tmp_hdr.icmp6_code = code;
+	tmp_hdr.icmp6_cksum = 0;
+	tmp_hdr.icmp6_pointer = htonl(info);
+
+	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
+		fl.oif = np->mcast_oif;
 
-	msg.icmph.icmp6_type = type;
-	msg.icmph.icmp6_code = code;
-	msg.icmph.icmp6_cksum = 0;
-	msg.icmph.icmp6_pointer = htonl(info);
-
-	msg.skb = skb;
-	msg.offset = skb->nh.raw - skb->data;
-	msg.csum = 0;
-	msg.daddr = &hdr->saddr;
+	err = ip6_dst_lookup(sk, &dst, &fl);
+	if (err)
+		goto out;
 
-	len = skb->len - msg.offset + sizeof(struct icmp6hdr);
-	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr));
+	if (hlimit < 0) {
+		if (ipv6_addr_is_multicast(&fl.fl6_dst))
+			hlimit = np->mcast_hops;
+		else
+			hlimit = np->hop_limit;
+		if (hlimit < 0)
+			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+	}
 
+	plen = skb->nh.raw - skb->data;
+	__skb_pull(skb, plen);
+	len = skb->len;
+	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
 	if (len < 0) {
 		if (net_ratelimit())
 			printk(KERN_DEBUG "icmp: len problem\n");
-		goto out;
+		__skb_push(skb, plen);
+		goto out_dst_release;
 	}
 
-	msg.len = len;
+	err = ip6_append_data(sk, icmpv6_getfrag, skb, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr),
+				hlimit, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT);
+	if (err) {
+		ip6_flush_pending_frames(sk);
+		goto out_dst_release;
+	}
+	err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
+	__skb_push(skb, plen);
 
-	ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1,
-		       MSG_DONTWAIT);
 	if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
 		(&(icmpv6_statistics[smp_processor_id()*2].Icmp6OutDestUnreachs))[type-1]++;
 	ICMP6_INC_STATS_BH(Icmp6OutMsgs);
+out_dst_release:
+	dst_release(dst);
 out:
 	icmpv6_xmit_unlock();
 }
@@ -376,45 +406,66 @@
 static void icmpv6_echo_reply(struct sk_buff *skb)
 {
 	struct sock *sk = icmpv6_socket->sk;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct in6_addr *saddr = NULL;
 	struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
-	struct in6_addr *saddr;
-	struct icmpv6_msg msg;
+	struct icmp6hdr tmp_hdr;
 	struct flowi fl;
+	struct dst_entry *dst;
+	int err = 0;
+	int hlimit = -1;
 
 	saddr = &skb->nh.ipv6h->daddr;
 
-	if (ipv6_addr_type(saddr) & IPV6_ADDR_MULTICAST ||
-	    ipv6_chk_acast_addr(0, saddr)) 
+	if (!ipv6_unicast_destination(skb))
 		saddr = NULL;
 
-	msg.icmph.icmp6_type = ICMPV6_ECHO_REPLY;
-	msg.icmph.icmp6_code = 0;
-	msg.icmph.icmp6_cksum = 0;
-	msg.icmph.icmp6_identifier = icmph->icmp6_identifier;
-	msg.icmph.icmp6_sequence = icmph->icmp6_sequence;
-
-	msg.skb = skb;
-	msg.offset = 0;
-	msg.csum = 0;
-	msg.len = skb->len + sizeof(struct icmp6hdr);
-	msg.daddr =  &skb->nh.ipv6h->saddr;
+	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
+	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
 
+	memset(&fl, 0, sizeof(fl));
 	fl.proto = IPPROTO_ICMPV6;
-	fl.nl_u.ip6_u.daddr = msg.daddr;
-	fl.nl_u.ip6_u.saddr = saddr;
+	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+	if (saddr)
+		ipv6_addr_copy(&fl.fl6_src, saddr);
 	fl.oif = skb->dev->ifindex;
-	fl.fl6_flowlabel = 0;
-	fl.uli_u.icmpt.type = ICMPV6_ECHO_REPLY;
-	fl.uli_u.icmpt.code = 0;
+	fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
 
 	if (icmpv6_xmit_lock())
 		return;
 
-	ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, msg.len, NULL, -1,
-		       MSG_DONTWAIT);
-	ICMP6_INC_STATS_BH(Icmp6OutEchoReplies);
-	ICMP6_INC_STATS_BH(Icmp6OutMsgs);
+	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
+		fl.oif = np->mcast_oif;
+
+	err = ip6_dst_lookup(sk, &dst, &fl);
+	if (err)
+		goto out;
 
+	if (hlimit < 0) {
+		if (ipv6_addr_is_multicast(&fl.fl6_dst))
+			hlimit = np->mcast_hops;
+		else
+			hlimit = np->hop_limit;
+		if (hlimit < 0)
+			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+	}
+
+	err = ip6_append_data(sk, icmpv6_getfrag, skb, skb->len + sizeof(struct icmp6hdr),
+				sizeof(struct icmp6hdr), hlimit, NULL, &fl,
+				(struct rt6_info*)dst, MSG_DONTWAIT);
+  
+	if (err) {
+		ip6_flush_pending_frames(sk);
+		goto out_dst_release;
+	}
+	err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
+
+        ICMP6_INC_STATS_BH(Icmp6OutEchoReplies);
+        ICMP6_INC_STATS_BH(Icmp6OutMsgs);
+
+out_dst_release:
+	dst_release(dst);
+out: 
 	icmpv6_xmit_unlock();
 }
 
@@ -456,15 +507,9 @@
 
 	hash = nexthdr & (MAX_INET_PROTOS - 1);
 
-	for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; 
-	     ipprot != NULL; 
-	     ipprot=(struct inet6_protocol *)ipprot->next) {
-		if (ipprot->protocol != nexthdr)
-			continue;
-
-		if (ipprot->err_handler)
-			ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
-	}
+	ipprot = inet6_protos[hash];
+	if (ipprot && ipprot->err_handler)
+		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
 
 	read_lock(&raw_v6_lock);
 	if ((sk = raw_v6_htable[hash]) != NULL) {
@@ -480,8 +525,9 @@
  *	Handle icmp messages
  */
 
-int icmpv6_rcv(struct sk_buff *skb)
+static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 {
+	struct sk_buff *skb = *pskb;
 	struct net_device *dev = skb->dev;
 	struct in6_addr *saddr, *daddr;
 	struct ipv6hdr *orig_hdr;
@@ -658,7 +704,12 @@
 		sk->prot->unhash(sk);
 	}
 
-	inet6_add_protocol(&icmpv6_protocol);
+	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
+		printk(KERN_ERR "Failed to register ICMP6 protocol\n");
+		sock_release(icmpv6_socket);
+		icmpv6_socket = NULL;
+		return -EAGAIN;
+	}
 
 	return 0;
 fail:
@@ -677,7 +728,7 @@
 		sock_release(icmpv6_socket_cpu(i));
 		icmpv6_socket_cpu(i) = NULL;
 	}
-	inet6_del_protocol(&icmpv6_protocol);
+	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
 }
 
 static struct icmp6_err {
Index: net/ipv6/ip6_fib.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/ip6_fib.c,v
retrieving revision 1.1.1.21
retrieving revision 1.1.1.21.2.1
diff -u -r1.1.1.21 -r1.1.1.21.2.1
--- a/net/ipv6/ip6_fib.c	25 Aug 2003 11:44:44 -0000	1.1.1.21
+++ b/net/ipv6/ip6_fib.c	16 Apr 2004 13:16:24 -0000	1.1.1.21.2.1
@@ -40,7 +40,6 @@
 #include <net/ip6_route.h>
 
 #define RT6_DEBUG 2
-#undef CONFIG_IPV6_SUBTREES
 
 #if RT6_DEBUG >= 3
 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
@@ -453,7 +452,6 @@
 			 */
 
 			if ((iter->rt6i_dev == rt->rt6i_dev) &&
-			    (iter->rt6i_flowr == rt->rt6i_flowr) &&
 			    (ipv6_addr_cmp(&iter->rt6i_gateway,
 					   &rt->rt6i_gateway) == 0)) {
 				if (!(iter->rt6i_flags&RTF_EXPIRES))
@@ -500,13 +498,19 @@
 		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
 }
 
+void fib6_force_start_gc(void)
+{
+	if (ip6_fib_timer.expires == 0)
+		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+}
+
 /*
  *	Add routing information to the routing tree.
  *	<destination addr>/<source addr>
  *	with source addr info in sub-trees
  */
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
 {
 	struct fib6_node *fn;
 	int err = -ENOMEM;
@@ -597,8 +601,8 @@
 	   is orphan. If it is, shoot it.
 	 */
 st_failure:
-	if (fn && !(fn->fn_flags&RTN_RTINFO|RTN_ROOT))
-		fib_repair_tree(fn);
+	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+		fib6_repair_tree(fn);
 	dst_free(&rt->u.dst);
 	return err;
 #endif
@@ -888,7 +892,7 @@
 }
 
 static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
-    struct nlmsghdr *nlh)
+    struct nlmsghdr *nlh, void *_rtattr)
 {
 	struct fib6_walker_t *w;
 	struct rt6_info *rt = *rtp;
@@ -947,7 +951,7 @@
 	rt6_release(rt);
 }
 
-int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh)
+int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
 {
 	struct fib6_node *fn = rt->rt6i_node;
 	struct rt6_info **rtp;
@@ -972,7 +976,7 @@
 
 	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
 		if (*rtp == rt) {
-			fib6_del_route(fn, rtp, nlh);
+			fib6_del_route(fn, rtp, nlh, _rtattr);
 			return 0;
 		}
 	}
@@ -1101,7 +1105,7 @@
 		res = c->func(rt, c->arg);
 		if (res < 0) {
 			w->leaf = rt;
-			res = fib6_del(rt, NULL);
+			res = fib6_del(rt, NULL, NULL);
 			if (res) {
 #if RT6_DEBUG >= 2
 				printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
@@ -1218,6 +1222,7 @@
 
 
 	write_lock_bh(&rt6_lock);
+	ndisc_dst_gc(&gc_args.more);
 	fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
 	write_unlock_bh(&rt6_lock);
 
@@ -1232,17 +1237,17 @@
 
 void __init fib6_init(void)
 {
-	if (!fib6_node_kmem)
-		fib6_node_kmem = kmem_cache_create("fib6_nodes",
-						   sizeof(struct fib6_node),
-						   0, SLAB_HWCACHE_ALIGN,
-						   NULL, NULL);
+	fib6_node_kmem = kmem_cache_create("fib6_nodes",
+					   sizeof(struct fib6_node),
+					   0, SLAB_HWCACHE_ALIGN,
+					   NULL, NULL);
 }
 
 #ifdef MODULE
 void fib6_gc_cleanup(void)
 {
 	del_timer(&ip6_fib_timer);
+	kmem_cache_destroy(fib6_node_kmem);
 }
 #endif
 
Index: net/ipv6/ip6_fw.c
===================================================================
RCS file: net/ipv6/ip6_fw.c
diff -N net/ipv6/ip6_fw.c
--- a/net/ipv6/ip6_fw.c	21 Dec 2001 17:42:05 -0000	1.1.1.4
+++ /dev/null	1 Jan 1970 00:00:00 -0000
@@ -1,390 +0,0 @@
-/*
- *	IPv6 Firewall
- *	Linux INET6 implementation
- *
- *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
- *
- *	$Id: ip6_fw.c,v 1.16 2001/10/31 08:17:58 davem Exp $
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/route.h>
-#include <linux/netdevice.h>
-#include <linux/in6.h>
-#include <linux/udp.h>
-#include <linux/init.h>
-
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <net/ip6_fw.h>
-#include <net/netlink.h>
-
-static unsigned long ip6_fw_rule_cnt;
-static struct ip6_fw_rule ip6_fw_rule_list = {
-	{0},
-	NULL, NULL,
-	{0},
-	IP6_FW_REJECT
-};
-
-static int ip6_fw_accept(struct dst_entry *dst, struct fl_acc_args *args);
-
-struct flow_rule_ops ip6_fw_ops = {
-	ip6_fw_accept
-};
-
-
-static struct rt6_info ip6_fw_null_entry = {
-	{{NULL, 0, 0, NULL,
-	  0, 0, 0, 0, 0, 0, 0, 0, -ENETUNREACH, NULL, NULL,
-	  ip6_pkt_discard, ip6_pkt_discard, NULL}},
-	NULL, {{{0}}}, 256, RTF_REJECT|RTF_NONEXTHOP, ~0UL,
-	0, &ip6_fw_rule_list, {{{{0}}}, 128}, {{{{0}}}, 128}
-};
-
-static struct fib6_node ip6_fw_fib = {
-	NULL, NULL, NULL, NULL,
-	&ip6_fw_null_entry,
-	0, RTN_ROOT|RTN_TL_ROOT, 0
-};
-
-rwlock_t ip6_fw_lock = RW_LOCK_UNLOCKED;
-
-
-static void ip6_rule_add(struct ip6_fw_rule *rl)
-{
-	struct ip6_fw_rule *next;
-
-	write_lock_bh(&ip6_fw_lock);
-	ip6_fw_rule_cnt++;
-	next = &ip6_fw_rule_list;
-	rl->next = next;
-	rl->prev = next->prev;
-	rl->prev->next = rl;
-	next->prev = rl;
-	write_unlock_bh(&ip6_fw_lock);
-}
-
-static void ip6_rule_del(struct ip6_fw_rule *rl)
-{
-	struct ip6_fw_rule *next, *prev;
-
-	write_lock_bh(&ip6_fw_lock);
-	ip6_fw_rule_cnt--;
-	next = rl->next;
-	prev = rl->prev;
-	next->prev = prev;
-	prev->next = next;
-	write_unlock_bh(&ip6_fw_lock);
-}
-
-static __inline__ struct ip6_fw_rule * ip6_fwrule_alloc(void)
-{
-	struct ip6_fw_rule *rl;
-
-	rl = kmalloc(sizeof(struct ip6_fw_rule), GFP_ATOMIC);
-	if (rl)
-	{
-		memset(rl, 0, sizeof(struct ip6_fw_rule));
-		rl->flowr.ops = &ip6_fw_ops;
-	}
-	return rl;
-}
-
-static __inline__ void ip6_fwrule_free(struct ip6_fw_rule * rl)
-{
-	kfree(rl);
-}
-
-static __inline__ int port_match(int rl_port, int fl_port)
-{
-	int res = 0;
-	if (rl_port == 0 || (rl_port == fl_port))
-		res = 1;
-	return res;
-}
-
-static int ip6_fw_accept_trans(struct ip6_fw_rule *rl,
-			       struct fl_acc_args *args)
-{
-	int res = FLOWR_NODECISION;
-	int proto = 0;
-	int sport = 0;
-	int dport = 0;
-
-	switch (args->type) {
-	case FL_ARG_FORWARD:
-	{
-		struct sk_buff *skb = args->fl_u.skb;
-		struct ipv6hdr *hdr = skb->nh.ipv6h;
-		int len;
-
-		len = skb->len - sizeof(struct ipv6hdr);
-
-		proto = hdr->nexthdr;
-
-		switch (proto) {
-		case IPPROTO_TCP:
-		{
-			struct tcphdr *th;
-
-			if (len < sizeof(struct tcphdr)) {
-				res = FLOWR_ERROR;
-				goto out;
-			}
-			th = (struct tcphdr *)(hdr + 1);
-			sport = th->source;
-			dport = th->dest;
-			break;
-		}
-		case IPPROTO_UDP:
-		{
-			struct udphdr *uh;
-
-			if (len < sizeof(struct udphdr)) {
-				res = FLOWR_ERROR;
-				goto out;
-			}
-			uh = (struct udphdr *)(hdr + 1);
-			sport = uh->source;
-			dport = uh->dest;
-			break;
-		}
-		default:
-			goto out;
-		};
-		break;
-	}
-
-	case FL_ARG_ORIGIN:
-	{
-		proto = args->fl_u.fl_o.flow->proto;
-
-		if (proto == IPPROTO_ICMPV6) {
-			goto out;
-		} else {
-			sport = args->fl_u.fl_o.flow->uli_u.ports.sport;
-			dport = args->fl_u.fl_o.flow->uli_u.ports.dport;
-		}
-		break;
-	}
-
-	if (proto == rl->info.proto &&
-	    port_match(args->fl_u.fl_o.flow->uli_u.ports.sport, sport) &&
-	    port_match(args->fl_u.fl_o.flow->uli_u.ports.dport, dport)) {
-		if (rl->policy & IP6_FW_REJECT)
-			res = FLOWR_SELECT;
-		else
-			res = FLOWR_CLEAR;
-	}
-
-	default:
-#if IP6_FW_DEBUG >= 1
-		printk(KERN_DEBUG "ip6_fw_accept: unknown arg type\n");
-#endif
-		goto out;
-	};
-
-out:
-	return res;
-}
-
-static int ip6_fw_accept(struct dst_entry *dst, struct fl_acc_args *args)
-{
-	struct rt6_info *rt;
-	struct ip6_fw_rule *rl;
-	int proto;
-	int res = FLOWR_NODECISION;
-
-	rt = (struct rt6_info *) dst;
-	rl = (struct ip6_fw_rule *) rt->rt6i_flowr;
-
-	proto = rl->info.proto;
-
-	switch (proto) {
-	case 0:
-		if (rl->policy & IP6_FW_REJECT)
-			res = FLOWR_SELECT;
-		else
-			res = FLOWR_CLEAR;
-		break;
-	case IPPROTO_TCP:
-	case IPPROTO_UDP:
-		res = ip6_fw_accept_trans(rl, args);
-		break;
-	case IPPROTO_ICMPV6:
-	};
-
-	return res;
-}
-
-static struct dst_entry * ip6_fw_dup(struct dst_entry *frule,
-				     struct dst_entry *rt,
-				     struct fl_acc_args *args)
-{
-	struct ip6_fw_rule *rl;
-	struct rt6_info *nrt;
-	struct rt6_info *frt;
-
-	frt = (struct rt6_info *) frule;
-
-	rl = (struct ip6_fw_rule *) frt->rt6i_flowr;
-
-	nrt = ip6_rt_copy((struct rt6_info *) rt);
-
-	if (nrt) {
-		nrt->u.dst.input = frule->input;
-		nrt->u.dst.output = frule->output;
-
-		nrt->rt6i_flowr = flow_clone(frt->rt6i_flowr);
-
-		nrt->rt6i_flags |= RTF_CACHE;
-		nrt->rt6i_tstamp = jiffies;
-	}
-
-	return (struct dst_entry *) nrt;
-}
-
-int ip6_fw_reject(struct sk_buff *skb)
-{
-#if IP6_FW_DEBUG >= 1
-	printk(KERN_DEBUG "packet rejected: \n");
-#endif
-
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADM_PROHIBITED, 0,
-		    skb->dev);
-	/*
-	 *	send it via netlink, as (rule, skb)
-	 */
-
-	kfree_skb(skb);
-	return 0;
-}
-
-int ip6_fw_discard(struct sk_buff *skb)
-{
-	printk(KERN_DEBUG "ip6_fw: BUG fw_reject called\n");
-	kfree_skb(skb);
-	return 0;
-}
-
-int ip6_fw_msg_add(struct ip6_fw_msg *msg)
-{
-	struct in6_rtmsg rtmsg;
-	struct ip6_fw_rule *rl;
-	struct rt6_info *rt;
-	int err;
-
-	ipv6_addr_copy(&rtmsg.rtmsg_dst, &msg->dst);
-	ipv6_addr_copy(&rtmsg.rtmsg_src, &msg->src);
-	rtmsg.rtmsg_dst_len = msg->dst_len;
-	rtmsg.rtmsg_src_len = msg->src_len;
-	rtmsg.rtmsg_metric = IP6_RT_PRIO_FW;
-
-	rl = ip6_fwrule_alloc();
-
-	if (rl == NULL)
-		return -ENOMEM;
-
-	rl->policy = msg->policy;
-	rl->info.proto = msg->proto;
-	rl->info.uli_u.data = msg->u.data;
-
-	rtmsg.rtmsg_flags = RTF_NONEXTHOP|RTF_POLICY;
-	err = ip6_route_add(&rtmsg);
-
-	if (err) {
-		ip6_fwrule_free(rl);
-		return err;
-	}
-
-	/* The rest will not work for now. --ABK (989725) */
-
-#ifndef notdef
-	ip6_fwrule_free(rl);
-	return -EPERM;
-#else
-	rt->u.dst.error = -EPERM;
-
-	if (msg->policy == IP6_FW_ACCEPT) {
-		/*
-		 *	Accept rules are never selected
-		 *	(i.e. packets use normal forwarding)
-		 */
-		rt->u.dst.input = ip6_fw_discard;
-		rt->u.dst.output = ip6_fw_discard;
-	} else {
-		rt->u.dst.input = ip6_fw_reject;
-		rt->u.dst.output = ip6_fw_reject;
-	}
-
-	ip6_rule_add(rl);
-
-	rt->rt6i_flowr = flow_clone((struct flow_rule *)rl);
-
-	return 0;
-#endif
-}
-
-static int ip6_fw_msgrcv(int unit, struct sk_buff *skb)
-{
-	int count = 0;
-
-	while (skb->len) {
-		struct ip6_fw_msg *msg;
-
-		if (skb->len < sizeof(struct ip6_fw_msg)) {
-			count = -EINVAL;
-			break;
-		}
-
-		msg = (struct ip6_fw_msg *) skb->data;
-		skb_pull(skb, sizeof(struct ip6_fw_msg));
-		count += sizeof(struct ip6_fw_msg);
-
-		switch (msg->action) {
-		case IP6_FW_MSG_ADD:
-			ip6_fw_msg_add(msg);
-			break;
-		case IP6_FW_MSG_DEL:
-			break;
-		default:
-			return -EINVAL;
-		};
-	}
-
-	return count;
-}
-
-static void ip6_fw_destroy(struct flow_rule *rl)
-{
-	ip6_fwrule_free((struct ip6_fw_rule *)rl);
-}
-
-#ifdef MODULE
-#define ip6_fw_init module_init
-#endif
-
-void __init ip6_fw_init(void)
-{
-	netlink_attach(NETLINK_IP6_FW, ip6_fw_msgrcv);
-}
-
-#ifdef MODULE
-void cleanup_module(void)
-{
-	netlink_detach(NETLINK_IP6_FW);
-}
-#endif
Index: net/ipv6/ip6_input.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/ip6_input.c,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/net/ipv6/ip6_input.c	25 Aug 2003 11:44:44 -0000	1.1.1.15
+++ b/net/ipv6/ip6_input.c	16 Apr 2004 13:16:25 -0000	1.1.1.15.2.1
@@ -15,6 +15,11 @@
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
  */
+/* Changes
+ *
+ * 	Mitsuru KANDA @USAGI and
+ * 	YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs().
+ */
 
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -39,6 +44,7 @@
 #include <net/ndisc.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
+#include <net/xfrm.h>
 
 
 
@@ -47,7 +53,7 @@
 	if (skb->dst == NULL)
 		ip6_route_input(skb);
 
-	return skb->dst->input(skb);
+	return dst_input(skb);
 }
 
 int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
@@ -121,13 +127,12 @@
 
 static inline int ip6_input_finish(struct sk_buff *skb)
 {
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
 	struct inet6_protocol *ipprot;
 	struct sock *raw_sk;
-	int nhoff;
+	unsigned int nhoff;
 	int nexthdr;
-	int found = 0;
 	u8 hash;
+	int cksum_sub = 0;
 
 	skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
 
@@ -135,7 +140,7 @@
 	 *	Parse extension headers
 	 */
 
-	nexthdr = hdr->nexthdr;
+	nexthdr = skb->nh.ipv6h->nexthdr;
 	nhoff = offsetof(struct ipv6hdr, nexthdr);
 
 	/* Skip  hop-by-hop options, they are already parsed. */
@@ -145,58 +150,46 @@
 		skb->h.raw += (skb->h.raw[1]+1)<<3;
 	}
 
-	/* This check is sort of optimization.
-	   It would be stupid to detect for optional headers,
-	   which are missing with probability of 200%
-	 */
-	if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP) {
-		nhoff = ipv6_parse_exthdrs(&skb, nhoff);
-		if (nhoff < 0)
-			return 0;
-		nexthdr = skb->nh.raw[nhoff];
-		hdr = skb->nh.ipv6h;
-	}
-
+resubmit:
 	if (!pskb_pull(skb, skb->h.raw - skb->data))
 		goto discard;
+	nexthdr = skb->nh.raw[nhoff];
 
-	if (skb->ip_summed == CHECKSUM_HW)
-		skb->csum = csum_sub(skb->csum,
-				     csum_partial(skb->nh.raw, skb->h.raw-skb->nh.raw, 0));
-
-	raw_sk = raw_v6_htable[nexthdr&(MAX_INET_PROTOS-1)];
+	raw_sk = raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)];
 	if (raw_sk)
-		raw_sk = ipv6_raw_deliver(skb, nexthdr);
+		ipv6_raw_deliver(skb, nexthdr);
 
 	hash = nexthdr & (MAX_INET_PROTOS - 1);
-	for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; 
-	     ipprot != NULL; 
-	     ipprot = (struct inet6_protocol *) ipprot->next) {
-		struct sk_buff *buff = skb;
-
-		if (ipprot->protocol != nexthdr)
-			continue;
-
-		if (ipprot->copy || raw_sk)
-			buff = skb_clone(skb, GFP_ATOMIC);
-
-		if (buff)
-			ipprot->handler(buff);
-		found = 1;
-	}
-
-	if (raw_sk) {
-		rawv6_rcv(raw_sk, skb);
-		sock_put(raw_sk);
-		found = 1;
-	}
-
-	/*
-	 *	not found: send ICMP parameter problem back
-	 */
-	if (!found) {
-		IP6_INC_STATS_BH(Ip6InUnknownProtos);
-		icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff);
+	if ((ipprot = inet6_protos[hash]) != NULL) {
+		int ret;
+		
+		if (ipprot->flags & INET6_PROTO_FINAL) {
+			if (!cksum_sub && skb->ip_summed == CHECKSUM_HW) {
+				skb->csum = csum_sub(skb->csum,
+						     csum_partial(skb->nh.raw, skb->h.raw-skb->nh.raw, 0));
+				cksum_sub++;
+			}
+		}
+		if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
+		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+			kfree_skb(skb);
+			return 0;
+		}
+		
+		ret = ipprot->handler(&skb, &nhoff);
+		if (ret > 0)
+			goto resubmit;
+		else if (ret == 0)
+			IP6_INC_STATS_BH(Ip6InDelivers);
+	} else {
+		if (!raw_sk) {
+			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+				IP6_INC_STATS_BH(Ip6InUnknownProtos);
+				icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff);
+			}
+		} else {
+			kfree_skb(skb);
+		}
 	}
 
 	return 0;
@@ -246,7 +239,7 @@
 				skb2 = skb;
 			}
 
-			dst->output(skb2);
+			dst_output(skb2);
 		}
 	}
 #endif
Index: net/ipv6/ip6_output.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/ip6_output.c,v
retrieving revision 1.1.1.21
retrieving revision 1.1.1.21.2.1
diff -u -r1.1.1.21 -r1.1.1.21.2.1
--- a/net/ipv6/ip6_output.c	25 Aug 2003 11:44:44 -0000	1.1.1.21
+++ b/net/ipv6/ip6_output.c	16 Apr 2004 13:16:25 -0000	1.1.1.21.2.1
@@ -23,6 +23,9 @@
  *
  *      H. von Brand    :       Added missing #include <linux/string.h>
  *	Imran Patel	: 	frag id should be in NBO
+ *      Kazunori MIYAZAWA @USAGI
+ *			:       add ip6_append_data and related functions
+ *				for datagram xmit
  */
 
 #include <linux/config.h>
@@ -49,6 +52,9 @@
 #include <net/addrconf.h>
 #include <net/rawv6.h>
 #include <net/icmp.h>
+#include <net/xfrm.h>
+
+static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
 
 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
 {
@@ -99,7 +105,7 @@
 }
 
 
-int ip6_output(struct sk_buff *skb)
+int ip6_output2(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
 	struct net_device *dev = dst->dev;
@@ -134,21 +140,27 @@
 	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
 }
 
+int ip6_output(struct sk_buff *skb)
+{
+	if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list))
+		return ip6_fragment(skb, ip6_output2);
+	else
+		return ip6_output2(skb);
+}
 
 #ifdef CONFIG_NETFILTER
 int ip6_route_me_harder(struct sk_buff *skb)
 {
 	struct ipv6hdr *iph = skb->nh.ipv6h;
 	struct dst_entry *dst;
-	struct flowi fl;
-
-	fl.proto = iph->nexthdr;
-	fl.fl6_dst = &iph->daddr;
-	fl.fl6_src = &iph->saddr;
-	fl.oif = skb->sk ? skb->sk->bound_dev_if : 0;
-	fl.fl6_flowlabel = 0;
-	fl.uli_u.ports.dport = 0;
-	fl.uli_u.ports.sport = 0;
+	struct flowi fl = {
+		.oif = skb->sk ? skb->sk->bound_dev_if : 0,
+		.nl_u =
+		{ .ip6_u =
+		  { .daddr = iph->daddr,
+		    .saddr = iph->saddr, } },
+		.proto = iph->nexthdr,
+	};
 
 	dst = ip6_route_output(skb->sk, &fl);
 
@@ -177,7 +189,7 @@
 		}
 	}
 #endif /* CONFIG_NETFILTER */
-	return skb->dst->output(skb);
+	return dst_output(skb);
 }
 
 /*
@@ -188,12 +200,13 @@
 	     struct ipv6_txoptions *opt)
 {
 	struct ipv6_pinfo * np = sk ? &sk->net_pinfo.af_inet6 : NULL;
-	struct in6_addr *first_hop = fl->nl_u.ip6_u.daddr;
+	struct in6_addr *first_hop = &fl->fl6_dst;
 	struct dst_entry *dst = skb->dst;
 	struct ipv6hdr *hdr;
 	u8  proto = fl->proto;
 	int seg_len = skb->len;
 	int hlimit;
+	u32 mtu;
 
 	if (opt) {
 		int head_room;
@@ -231,16 +244,17 @@
 	if (np)
 		hlimit = np->hop_limit;
 	if (hlimit < 0)
-		hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
+		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
 
 	hdr->payload_len = htons(seg_len);
 	hdr->nexthdr = proto;
 	hdr->hop_limit = hlimit;
 
-	ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
+	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 	ipv6_addr_copy(&hdr->daddr, first_hop);
 
-	if (skb->len <= dst->pmtu) {
+	mtu = dst_pmtu(dst);
+	if (skb->len <= mtu) {
 		IP6_INC_STATS(Ip6OutRequests);
 		return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
 	}
@@ -248,7 +262,7 @@
 	if (net_ratelimit())
 		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 	skb->dev = dst->dev;
-	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev);
+	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 	kfree_skb(skb);
 	return -EMSGSIZE;
 }
@@ -302,8 +316,8 @@
 	hdr->hop_limit = hlimit;
 	hdr->nexthdr = fl->proto;
 
-	ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
-	ipv6_addr_copy(&hdr->daddr, fl->nl_u.ip6_u.daddr);
+	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
+	ipv6_addr_copy(&hdr->daddr, &fl->fl6_dst);
 	return hdr;
 }
 
@@ -507,19 +521,19 @@
 		   struct ipv6_txoptions *opt, int hlimit, int flags)
 {
 	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
-	struct in6_addr *final_dst = NULL;
+	struct in6_addr final_dst_buf, *final_dst = NULL;
 	struct dst_entry *dst;
 	int err = 0;
 	unsigned int pktlength, jumbolen, mtu;
-	struct in6_addr saddr;
 
 	if (opt && opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		final_dst = fl->fl6_dst;
-		fl->fl6_dst = rt0->addr;
+		ipv6_addr_copy(&final_dst_buf, &fl->fl6_dst);
+		final_dst = &final_dst_buf;
+		ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
 	}
 
-	if (!fl->oif && ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr))
+	if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst))
 		fl->oif = np->mcast_oif;
 
 	dst = __sk_dst_check(sk, np->dst_cookie);
@@ -545,9 +559,9 @@
 			 */
 
 		if (((rt->rt6i_dst.plen != 128 ||
-		      ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr))
+		      ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr))
 		     && (np->daddr_cache == NULL ||
-			 ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache)))
+			 ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache)))
 		    || (fl->oif && fl->oif != dst->dev->ifindex)) {
 			dst = NULL;
 		} else
@@ -563,8 +577,8 @@
 		return -ENETUNREACH;
 	}
 
-	if (fl->fl6_src == NULL) {
-		err = ipv6_get_saddr(dst, fl->fl6_dst, &saddr);
+	if (ipv6_addr_any(&fl->fl6_src)) {
+		err = ipv6_get_saddr(dst, &fl->fl6_dst, &fl->fl6_src);
 
 		if (err) {
 #if IP6_DEBUG >= 2
@@ -573,17 +587,23 @@
 #endif
 			goto out;
 		}
-		fl->fl6_src = &saddr;
 	}
 	pktlength = length;
 
+        if (dst) {
+		if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) {
+			dst_release(dst);	
+			return -ENETUNREACH;
+		}
+        }
+
 	if (hlimit < 0) {
-		if (ipv6_addr_is_multicast(fl->fl6_dst))
+		if (ipv6_addr_is_multicast(&fl->fl6_dst))
 			hlimit = np->mcast_hops;
 		else
 			hlimit = np->hop_limit;
 		if (hlimit < 0)
-			hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
+			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
 	}
 
 	jumbolen = 0;
@@ -593,7 +613,7 @@
 		if (opt)
 			pktlength += opt->opt_flen + opt->opt_nflen;
 
-		if (pktlength > 0xFFFF + sizeof(struct ipv6hdr)) {
+		if (pktlength > sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
 			/* Jumbo datagram.
 			   It is assumed, that in the case of hdrincl
 			   jumbo option is supplied by user.
@@ -603,7 +623,7 @@
 		}
 	}
 
-	mtu = dst->pmtu;
+	mtu = dst_pmtu(dst);
 	if (np->frag_size < mtu) {
 		if (np->frag_size)
 			mtu = np->frag_size;
@@ -631,9 +651,8 @@
 		err = 0;
 		if (flags&MSG_PROBE)
 			goto out;
-
-		skb = sock_alloc_send_skb(sk, pktlength + 15 +
-					  dev->hard_header_len,
+		/* alloc skb with mtu as we do in the IPv4 stack for IPsec */
+		skb = sock_alloc_send_skb(sk, mtu + LL_RESERVED_SPACE(dev),
 					  flags & MSG_DONTWAIT, &err);
 
 		if (skb == NULL) {
@@ -664,6 +683,8 @@
 		err = getfrag(data, &hdr->saddr,
 			      ((char *) hdr) + (pktlength - length),
 			      0, length);
+		if (!opt || !opt->dst1opt)
+			skb->h.raw = ((char *) hdr) + (pktlength - length);
 
 		if (!err) {
 			IP6_INC_STATS(Ip6OutRequests);
@@ -688,7 +709,9 @@
 	 *	cleanup
 	 */
 out:
-	ip6_dst_store(sk, dst, fl->nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
+	ip6_dst_store(sk, dst,
+		      !ipv6_addr_cmp(&fl->fl6_dst, &np->daddr) ?
+		      &np->daddr : NULL);
 	if (err > 0)
 		err = np->recverr ? net_xmit_errno(err) : 0;
 	return err;
@@ -723,7 +746,7 @@
 
 static inline int ip6_forward_finish(struct sk_buff *skb)
 {
-	return skb->dst->output(skb);
+	return dst_output(skb);
 }
 
 int ip6_forward(struct sk_buff *skb)
@@ -735,6 +758,9 @@
 	if (ipv6_devconf.forwarding == 0)
 		goto error;
 
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb))
+		goto drop;
+
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/*
@@ -769,6 +795,9 @@
 		return -ETIMEDOUT;
 	}
 
+	if (!xfrm6_route_forward(skb))
+		goto drop;
+
 	/* IPv6 specs say nothing about it, but it is clear that we cannot
 	   send redirects to source routed frames.
 	 */
@@ -799,10 +828,10 @@
 		goto error;
 	}
 
-	if (skb->len > dst->pmtu) {
+	if (skb->len > dst_pmtu(dst)) {
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_pmtu(dst), skb->dev);
 		IP6_INC_STATS_BH(Ip6InTooBigErrors);
 		kfree_skb(skb);
 		return -EMSGSIZE;
@@ -826,3 +855,653 @@
 	kfree_skb(skb);
 	return -EINVAL;
 }
+
+static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
+{
+	to->pkt_type = from->pkt_type;
+	to->priority = from->priority;
+	to->protocol = from->protocol;
+	to->security = from->security;
+	to->dst = dst_clone(from->dst);
+	to->dev = from->dev;
+
+#ifdef CONFIG_NET_SCHED
+	to->tc_index = from->tc_index;
+#endif
+#ifdef CONFIG_NETFILTER
+	to->nfmark = from->nfmark;
+	/* Connection association is same as pre-frag packet */
+	to->nfct = from->nfct;
+	nf_conntrack_get(to->nfct);
+#ifdef CONFIG_NETFILTER_DEBUG
+	to->nf_debug = from->nf_debug;
+#endif
+#endif
+}
+
+int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+	unsigned int packet_len = skb->tail - skb->nh.raw;
+	int found_rhdr = 0;
+	*nexthdr = &skb->nh.ipv6h->nexthdr;
+
+	while (offset + 1 <= packet_len) {
+
+		switch (**nexthdr) {
+
+		case NEXTHDR_HOP:
+		case NEXTHDR_ROUTING:
+		case NEXTHDR_DEST:
+			if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
+			if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
+			offset += ipv6_optlen(exthdr);
+			*nexthdr = &exthdr->nexthdr;
+			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+			break;
+		default :
+			return offset;
+		}
+	}
+
+	return offset;
+}
+
+static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+{
+	struct net_device *dev;
+	struct rt6_info *rt = (struct rt6_info*)skb->dst;
+	struct sk_buff *frag;
+	struct ipv6hdr *tmp_hdr;
+	struct frag_hdr *fh;
+	unsigned int mtu, hlen, left, len;
+	u32 frag_id = 0;
+	int ptr, offset = 0, err=0;
+	u8 *prevhdr, nexthdr = 0;
+
+	dev = rt->u.dst.dev;
+	hlen = ip6_find_1stfragopt(skb, &prevhdr);
+	nexthdr = *prevhdr;
+
+	mtu = dst_pmtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
+
+	if (skb_shinfo(skb)->frag_list) {
+		int first_len = skb_pagelen(skb);
+
+		if (first_len - hlen > mtu ||
+		    ((first_len - hlen) & 7) ||
+		    skb_cloned(skb))
+			goto slow_path;
+
+		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+			/* Correct geometry. */
+			if (frag->len > mtu ||
+			    ((frag->len & 7) && frag->next) ||
+			    skb_headroom(frag) < hlen)
+			    goto slow_path;
+
+			/* Correct socket ownership. */
+			if (frag->sk == NULL)
+				goto slow_path;
+
+			/* Partially cloned skb? */
+			if (skb_shared(frag))
+				goto slow_path;
+		}
+
+		err = 0;
+		offset = 0;
+		frag = skb_shinfo(skb)->frag_list;
+		skb_shinfo(skb)->frag_list = 0;
+		/* BUILD HEADER */
+
+		tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
+		if (!tmp_hdr) {
+			IP6_INC_STATS(Ip6FragFails);
+			return -ENOMEM;
+		}
+
+		*prevhdr = NEXTHDR_FRAGMENT;
+		memcpy(tmp_hdr, skb->nh.raw, hlen);
+		__skb_pull(skb, hlen);
+		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
+		skb->nh.raw = __skb_push(skb, hlen);
+		memcpy(skb->nh.raw, tmp_hdr, hlen);
+
+		ipv6_select_ident(skb, fh);
+		fh->nexthdr = nexthdr;
+		fh->reserved = 0;
+		fh->frag_off = htons(IP6_MF);
+		frag_id = fh->identification;
+
+		first_len = skb_pagelen(skb);
+		skb->data_len = first_len - skb_headlen(skb);
+		skb->len = first_len;
+		skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+ 
+
+		for (;;) {
+			/* Prepare header of the next frame,
+			 * before previous one went down. */
+			if (frag) {
+				frag->h.raw = frag->data;
+				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
+				frag->nh.raw = __skb_push(frag, hlen);
+				memcpy(frag->nh.raw, tmp_hdr, hlen);
+				offset += skb->len - hlen - sizeof(struct frag_hdr);
+				fh->nexthdr = nexthdr;
+				fh->reserved = 0;
+				fh->frag_off = htons(offset);
+				if (frag->next != NULL)
+					fh->frag_off |= htons(IP6_MF);
+				fh->identification = frag_id;
+				frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+				ip6_copy_metadata(frag, skb);
+			}
+			err = output(skb);
+
+			if (err || !frag)
+				break;
+
+			skb = frag;
+			frag = skb->next;
+			skb->next = NULL;
+		}
+
+		if (tmp_hdr)
+			kfree(tmp_hdr);
+
+		if (err == 0) {
+			IP6_INC_STATS(Ip6FragOKs);
+			return 0;
+		}
+
+		while (frag) {
+			skb = frag->next;
+			kfree_skb(frag);
+			frag = skb;
+		}
+
+		IP6_INC_STATS(Ip6FragFails);
+		return err;
+	}
+
+slow_path:
+	left = skb->len - hlen;		/* Space per frame */
+	ptr = hlen;			/* Where to start from */
+
+	/*
+	 *	Fragment the datagram.
+	 */
+
+	*prevhdr = NEXTHDR_FRAGMENT;
+
+	/*
+	 *	Keep copying data until we run out.
+	 */
+	while(left > 0)	{
+		len = left;
+		/* IF: it doesn't fit, use 'mtu' - the data space left */
+		if (len > mtu)
+			len = mtu;
+		/* IF: we are not sending upto and including the packet end
+		   then align the next start on an eight byte boundary */
+		if (len < left)	{
+			len &= ~7;
+		}
+		/*
+		 *	Allocate buffer.
+		 */
+
+		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+			NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
+			err = -ENOMEM;
+			goto fail;
+		}
+
+		/*
+		 *	Set up data on packet
+		 */
+
+		ip6_copy_metadata(frag, skb);
+		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
+		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
+		frag->nh.raw = frag->data;
+		fh = (struct frag_hdr*)(frag->data + hlen);
+		frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
+
+		/*
+		 *	Charge the memory for the fragment to any owner
+		 *	it might possess
+		 */
+		if (skb->sk)
+			skb_set_owner_w(frag, skb->sk);
+
+		/*
+		 *	Copy the packet header into the new buffer.
+		 */
+		memcpy(frag->nh.raw, skb->data, hlen);
+
+		/*
+		 *	Build fragment header.
+		 */
+		fh->nexthdr = nexthdr;
+		fh->reserved = 0;
+		if (frag_id) {
+			ipv6_select_ident(skb, fh);
+			frag_id = fh->identification;
+		} else
+			fh->identification = frag_id;
+
+		/*
+		 *	Copy a block of the IP datagram.
+		 */
+		if (skb_copy_bits(skb, ptr, frag->h.raw, len))
+			BUG();
+		left -= len;
+
+		fh->frag_off = htons(offset);
+		if (left > 0)
+			fh->frag_off |= htons(IP6_MF);
+		frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+
+		ptr += len;
+		offset += len;
+
+		/*
+		 *	Put this fragment into the sending queue.
+		 */
+
+		IP6_INC_STATS(Ip6FragCreates);
+
+		err = output(frag);
+		if (err)
+			goto fail;
+	}
+	kfree_skb(skb);
+	IP6_INC_STATS(Ip6FragOKs);
+	return err;
+
+fail:
+	kfree_skb(skb); 
+	IP6_INC_STATS(Ip6FragFails);
+	return err;
+}
+
+int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
+{
+	int err = 0;
+
+	if (sk) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+	
+		*dst = __sk_dst_check(sk, np->dst_cookie);
+		if (*dst) {
+			struct rt6_info *rt = (struct rt6_info*)*dst;
+	
+				/* Yes, checking route validity in not connected
+				   case is not very simple. Take into account,
+				   that we do not support routing by source, TOS,
+				   and MSG_DONTROUTE 		--ANK (980726)
+	
+				   1. If route was host route, check that
+				      cached destination is current.
+				      If it is network route, we still may
+				      check its validity using saved pointer
+				      to the last used address: daddr_cache.
+				      We do not want to save whole address now,
+				      (because main consumer of this service
+				       is tcp, which has not this problem),
+				      so that the last trick works only on connected
+				      sockets.
+				   2. oif also should be the same.
+				 */
+	
+			if (((rt->rt6i_dst.plen != 128 ||
+			      ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr))
+			     && (np->daddr_cache == NULL ||
+				 ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache)))
+			    || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
+				*dst = NULL;
+			} else
+				dst_hold(*dst);
+		}
+	}
+
+	if (*dst == NULL)
+		*dst = ip6_route_output(sk, fl);
+
+	if ((err = (*dst)->error))
+		goto out_err_release;
+
+	if (ipv6_addr_any(&fl->fl6_src)) {
+		err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
+
+		if (err) {
+#if IP6_DEBUG >= 2
+			printk(KERN_DEBUG "ip6_dst_lookup: "
+			       "no availiable source address\n");
+#endif
+			goto out_err_release;
+		}
+	}
+	if ((err = xfrm_lookup(dst, fl, sk, 0)) < 0) {
+		err = -ENETUNREACH;
+		goto out_err_release;
+        }
+
+	return 0;
+
+out_err_release:
+	dst_release(*dst);
+	*dst = NULL;
+	return err;
+}
+
+int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
+		    void *from, int length, int transhdrlen,
+		    int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
+		    unsigned int flags)
+{
+	struct inet_opt *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+	unsigned int maxfraglen, fragheaderlen;
+	int exthdrlen;
+	int hh_len;
+	int mtu;
+	int copy = 0;
+	int err;
+	int offset = 0;
+	int csummode = CHECKSUM_NONE;
+
+	if (flags&MSG_PROBE)
+		return 0;
+	if (skb_queue_empty(&sk->write_queue)) {
+		/*
+		 * setup for corking
+		 */
+		if (opt) {
+			if (np->cork.opt == NULL)
+				np->cork.opt = kmalloc(opt->tot_len, sk->allocation);
+			memcpy(np->cork.opt, opt, opt->tot_len);
+			inet->cork.flags |= IPCORK_OPT;
+			/* need source address above miyazawa*/
+		}
+		dst_hold(&rt->u.dst);
+		np->cork.rt = rt;
+		np->cork.fl = *fl;
+		np->cork.hop_limit = hlimit;
+		inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
+		inet->cork.length = 0;
+		inet->sndmsg_page = NULL;
+		inet->sndmsg_off = 0;
+		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
+		length += exthdrlen;
+		transhdrlen += exthdrlen;
+	} else {
+		rt = np->cork.rt;
+		if (inet->cork.flags & IPCORK_OPT)
+			opt = np->cork.opt;
+		transhdrlen = 0;
+		exthdrlen = 0;
+		mtu = inet->cork.fragsize;
+	}
+
+	hh_len = (rt->u.dst.dev->hard_header_len&~15) + 16;
+
+	fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
+	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
+
+	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
+		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
+			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
+			return -EMSGSIZE;
+		}
+	}
+
+	inet->cork.length += length;
+
+	if ((skb = skb_peek_tail(&sk->write_queue)) == NULL)
+		goto alloc_new_skb;
+
+	while (length > 0) {
+		if ((copy = maxfraglen - skb->len) <= 0) {
+			char *data;
+			unsigned int datalen;
+			unsigned int fraglen;
+			unsigned int alloclen;
+			BUG_TRAP(copy == 0);
+alloc_new_skb:
+			datalen = maxfraglen - fragheaderlen;
+			if (datalen > length)
+				datalen = length;
+			fraglen = datalen + fragheaderlen;
+			if ((flags & MSG_MORE) &&
+			    !(rt->u.dst.dev->features&NETIF_F_SG))
+				alloclen = maxfraglen;
+			else
+				alloclen = fraglen;
+			alloclen += sizeof(struct frag_hdr);
+			if (transhdrlen) {
+				skb = sock_alloc_send_skb(sk,
+						alloclen + hh_len + 15,
+						(flags & MSG_DONTWAIT), &err);
+			} else {
+				skb = NULL;
+				if (atomic_read(&sk->wmem_alloc) <= 2*sk->sndbuf)
+					skb = sock_wmalloc(sk,
+							   alloclen + hh_len + 15, 1,
+							   sk->allocation);
+				if (unlikely(skb == NULL))
+					err = -ENOBUFS;
+			}
+			if (skb == NULL)
+				goto error;
+			/*
+			 *	Fill in the control structures
+			 */
+			skb->ip_summed = csummode;
+			skb->csum = 0;
+			/* reserve 8 byte for fragmentation */
+			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
+
+			/*
+			 *	Find where to start putting bytes
+			 */
+			data = skb_put(skb, fraglen);
+			skb->nh.raw = data + exthdrlen;
+			data += fragheaderlen;
+			skb->h.raw = data + exthdrlen;
+			copy = datalen - transhdrlen;
+			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
+				err = -EFAULT;
+				kfree_skb(skb);
+				goto error;
+			}
+
+			offset += copy;
+			length -= datalen;
+			transhdrlen = 0;
+			exthdrlen = 0;
+			csummode = CHECKSUM_NONE;
+
+			/*
+			 * Put the packet on the pending queue
+			 */
+			__skb_queue_tail(&sk->write_queue, skb);
+			continue;
+		}
+
+		if (copy > length)
+			copy = length;
+
+		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+			unsigned int off;
+
+			off = skb->len;
+			if (getfrag(from, skb_put(skb, copy),
+						offset, copy, off, skb) < 0) {
+				__skb_trim(skb, off);
+				err = -EFAULT;
+				goto error;
+			}
+		} else {
+			int i = skb_shinfo(skb)->nr_frags;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+			struct page *page = inet->sndmsg_page;
+			int off = inet->sndmsg_off;
+			unsigned int left;
+
+			if (page && (left = PAGE_SIZE - off) > 0) {
+				if (copy >= left)
+					copy = left;
+				if (page != frag->page) {
+					if (i == MAX_SKB_FRAGS) {
+						err = -EMSGSIZE;
+						goto error;
+					}
+					get_page(page);
+					skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
+					frag = &skb_shinfo(skb)->frags[i];
+				}
+			} else if(i < MAX_SKB_FRAGS) {
+				if (copy > PAGE_SIZE)
+					copy = PAGE_SIZE;
+				page = alloc_pages(sk->allocation, 0);
+				if (page == NULL) {
+					err = -ENOMEM;
+					goto error;
+				}
+				inet->sndmsg_page = page;
+				inet->sndmsg_off = 0;
+
+				skb_fill_page_desc(skb, i, page, 0, 0);
+				frag = &skb_shinfo(skb)->frags[i];
+				skb->truesize += PAGE_SIZE;
+				atomic_add(PAGE_SIZE, &sk->wmem_alloc);
+			} else {
+				err = -EMSGSIZE;
+				goto error;
+			}
+			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
+				err = -EFAULT;
+				goto error;
+			}
+			inet->sndmsg_off += copy;
+			frag->size += copy;
+			skb->len += copy;
+			skb->data_len += copy;
+		}
+		offset += copy;
+		length -= copy;
+	}
+	return 0;
+error:
+	inet->cork.length -= length;
+	IP6_INC_STATS(Ip6OutDiscards);
+	return err;
+}
+
+int ip6_push_pending_frames(struct sock *sk)
+{
+	struct sk_buff *skb, *tmp_skb;
+	struct sk_buff **tail_skb;
+	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
+	struct inet_opt *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6hdr *hdr;
+	struct ipv6_txoptions *opt = np->cork.opt;
+	struct rt6_info *rt = np->cork.rt;
+	struct flowi *fl = &np->cork.fl;
+	unsigned char proto = fl->proto;
+	int err = 0;
+
+	if ((skb = __skb_dequeue(&sk->write_queue)) == NULL)
+		goto out;
+	tail_skb = &(skb_shinfo(skb)->frag_list);
+
+	/* move skb->data to ip header from ext header */
+	if (skb->data < skb->nh.raw)
+		__skb_pull(skb, skb->nh.raw - skb->data);
+	while ((tmp_skb = __skb_dequeue(&sk->write_queue)) != NULL) {
+		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+		*tail_skb = tmp_skb;
+		tail_skb = &(tmp_skb->next);
+		skb->len += tmp_skb->len;
+		skb->data_len += tmp_skb->len;
+#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
+		skb->truesize += tmp_skb->truesize;
+		__sock_put(tmp_skb->sk);
+		tmp_skb->destructor = NULL;
+		tmp_skb->sk = NULL;
+#endif
+	}
+
+	ipv6_addr_copy(final_dst, &fl->fl6_dst);
+	__skb_pull(skb, skb->h.raw - skb->nh.raw);
+	if (opt && opt->opt_flen)
+		ipv6_push_frag_opts(skb, opt, &proto);
+	if (opt && opt->opt_nflen)
+		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
+
+	skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
+	
+	*(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
+
+	if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
+		hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	else
+		hdr->payload_len = 0;
+	hdr->hop_limit = np->cork.hop_limit;
+	hdr->nexthdr = proto;
+	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
+	ipv6_addr_copy(&hdr->daddr, final_dst);
+
+	skb->dst = dst_clone(&rt->u.dst);
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
+	if (err) {
+		if (err > 0)
+			err = inet->recverr ? net_xmit_errno(err) : 0;
+		if (err)
+			goto error;
+	}
+
+out:
+	inet->cork.flags &= ~IPCORK_OPT;
+	if (np->cork.opt) {
+		kfree(np->cork.opt);
+		np->cork.opt = NULL;
+	}
+	if (np->cork.rt) {
+		np->cork.rt = NULL;
+	}
+	memset(&np->cork.fl, 0, sizeof(np->cork.fl));
+	return err;
+error:
+	goto out;
+}
+
+void ip6_flush_pending_frames(struct sock *sk)
+{
+	struct inet_opt *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue_tail(&sk->write_queue)) != NULL)
+		kfree_skb(skb);
+
+	inet->cork.flags &= ~IPCORK_OPT;
+
+	if (np->cork.opt) {
+		kfree(np->cork.opt);
+		np->cork.opt = NULL;
+	}
+	if (np->cork.rt) {
+		dst_release(&np->cork.rt->u.dst);
+		dst_release(&np->cork.rt->u.dst);
+		np->cork.rt = NULL;
+	}
+	memset(&np->cork.fl, 0, sizeof(np->cork.fl));
+}
Index: net/ipv6/ip6_tunnel.c
===================================================================
RCS file: net/ipv6/ip6_tunnel.c
diff -N net/ipv6/ip6_tunnel.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv6/ip6_tunnel.c	16 Apr 2004 13:16:25 -0000	1.6.12.1
@@ -0,0 +1,1260 @@
+/*
+ *	IPv6 over IPv6 tunnel device
+ *	Linux INET6 implementation
+ *
+ *	Authors:
+ *	Ville Nuorvala		<vnuorval@tcs.hut.fi>	
+ *
+ *	$Id$
+ *
+ *      Based on:
+ *      linux/net/ipv6/sit.c
+ *
+ *      RFC 2473
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/xfrm.h>
+
+MODULE_AUTHOR("Ville Nuorvala");
+MODULE_DESCRIPTION("IPv6-in-IPv6 tunnel");
+MODULE_LICENSE("GPL");
+
+#define IPV6_TLV_TEL_DST_SIZE 8
+
+#ifdef IP6_TNL_DEBUG
+#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __FUNCTION__)
+#else
+#define IP6_TNL_TRACE(x...) do {;} while(0)
+#endif
+
+#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+
+/* socket(s) used by ip6ip6_tnl_xmit() for resending packets */
+static struct socket *__ip6_socket[NR_CPUS];
+#define ip6_socket __ip6_socket[smp_processor_id()]
+
+static void ip6_xmit_lock(void)
+{
+	local_bh_disable();
+	if (unlikely(!spin_trylock(&ip6_socket->sk->lock.slock)))
+		BUG();
+}
+
+static void ip6_xmit_unlock(void)
+{
+	spin_unlock_bh(&ip6_socket->sk->lock.slock);
+}
+
+#define HASH_SIZE  32
+
+#define HASH(addr) (((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
+	             (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
+                    (HASH_SIZE - 1))
+
+static int ip6ip6_fb_tnl_dev_init(struct net_device *dev);
+static int ip6ip6_tnl_dev_init(struct net_device *dev);
+
+/* the IPv6 tunnel fallback device */
+static struct net_device ip6ip6_fb_tnl_dev = {
+	.name = "ip6tnl0",
+	.init = ip6ip6_fb_tnl_dev_init
+};
+
+/* the IPv6 fallback tunnel */
+static struct ip6_tnl ip6ip6_fb_tnl = {
+	.dev = &ip6ip6_fb_tnl_dev,
+	.parms ={.name = "ip6tnl0", .proto = IPPROTO_IPV6}
+};
+
+/* lists for storing tunnels in use */
+static struct ip6_tnl *tnls_r_l[HASH_SIZE];
+static struct ip6_tnl *tnls_wc[1];
+static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
+
+/* lock for the tunnel lists */
+static rwlock_t ip6ip6_lock = RW_LOCK_UNLOCKED;
+
+/**
+ * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ *   @remote: the address of the tunnel exit-point 
+ *   @local: the address of the tunnel entry-point 
+ *
+ * Return:  
+ *   tunnel matching given end-points if found,
+ *   else fallback tunnel if its device is up, 
+ *   else %NULL
+ **/
+
+struct ip6_tnl *
+ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
+{
+	unsigned h0 = HASH(remote);
+	unsigned h1 = HASH(local);
+	struct ip6_tnl *t;
+
+	for (t = tnls_r_l[h0 ^ h1]; t; t = t->next) {
+		if (!ipv6_addr_cmp(local, &t->parms.laddr) &&
+		    !ipv6_addr_cmp(remote, &t->parms.raddr) &&
+		    (t->dev->flags & IFF_UP))
+			return t;
+	}
+	if ((t = tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
+		return t;
+
+	return NULL;
+}
+
+/**
+ * ip6ip6_bucket - get head of list matching given tunnel parameters
+ *   @p: parameters containing tunnel end-points 
+ *
+ * Description:
+ *   ip6ip6_bucket() returns the head of the list matching the 
+ *   &struct in6_addr entries laddr and raddr in @p.
+ *
+ * Return: head of IPv6 tunnel list 
+ **/
+
+static struct ip6_tnl **
+ip6ip6_bucket(struct ip6_tnl_parm *p)
+{
+	struct in6_addr *remote = &p->raddr;
+	struct in6_addr *local = &p->laddr;
+	unsigned h = 0;
+	int prio = 0;
+
+	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
+		prio = 1;
+		h = HASH(remote) ^ HASH(local);
+	}
+	return &tnls[prio][h];
+}
+
+/**
+ * ip6ip6_tnl_link - add tunnel to hash table
+ *   @t: tunnel to be added
+ **/
+
+static void
+ip6ip6_tnl_link(struct ip6_tnl *t)
+{
+	struct ip6_tnl **tp = ip6ip6_bucket(&t->parms);
+
+	write_lock_bh(&ip6ip6_lock);
+	t->next = *tp;
+	write_unlock_bh(&ip6ip6_lock);
+	*tp = t;
+}
+
+/**
+ * ip6ip6_tnl_unlink - remove tunnel from hash table
+ *   @t: tunnel to be removed
+ **/
+
+static void
+ip6ip6_tnl_unlink(struct ip6_tnl *t)
+{
+	struct ip6_tnl **tp;
+
+	for (tp = ip6ip6_bucket(&t->parms); *tp; tp = &(*tp)->next) {
+		if (t == *tp) {
+			write_lock_bh(&ip6ip6_lock);
+			*tp = t->next;
+			write_unlock_bh(&ip6ip6_lock);
+			break;
+		}
+	}
+}
+
+/**
+ * ip6_tnl_create() - create a new tunnel
+ *   @p: tunnel parameters
+ *   @pt: pointer to new tunnel
+ *
+ * Description:
+ *   Create tunnel matching given parameters.
+ * 
+ * Return: 
+ *   0 on success
+ **/
+
+static int
+ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
+{
+	struct net_device *dev;
+	int err = -ENOBUFS;
+	struct ip6_tnl *t;
+
+	dev = kmalloc(sizeof (*dev) + sizeof (*t), GFP_KERNEL);
+	if (!dev)
+		return err;
+
+	memset(dev, 0, sizeof (*dev) + sizeof (*t));
+	dev->priv = (void *) (dev + 1);
+	t = (struct ip6_tnl *) dev->priv;
+	t->dev = dev;
+	dev->init = ip6ip6_tnl_dev_init;
+	memcpy(&t->parms, p, sizeof (*p));
+	t->parms.name[IFNAMSIZ - 1] = '\0';
+	strcpy(dev->name, t->parms.name);
+	if (!dev->name[0]) {
+		int i = 0;
+		int exists = 0;
+
+		do {
+			sprintf(dev->name, "ip6tnl%d", ++i);
+			exists = (__dev_get_by_name(dev->name) != NULL);
+		} while (i < IP6_TNL_MAX && exists);
+
+		if (i == IP6_TNL_MAX) {
+			goto failed;
+		}
+		memcpy(t->parms.name, dev->name, IFNAMSIZ);
+	}
+	SET_MODULE_OWNER(dev);
+	if ((err = register_netdevice(dev)) < 0) {
+		goto failed;
+	}
+	ip6ip6_tnl_link(t);
+	*pt = t;
+	return 0;
+failed:
+	kfree(dev);
+	return err;
+}
+
+/**
+ * ip6_tnl_destroy() - destroy old tunnel
+ *   @t: tunnel to be destroyed
+ *
+ * Return:
+ *   whatever unregister_netdevice() returns
+ **/
+
+static inline int
+ip6_tnl_destroy(struct ip6_tnl *t)
+{
+	return unregister_netdevice(t->dev);
+}
+
+/**
+ * ip6ip6_tnl_locate - find or create tunnel matching given parameters
+ *   @p: tunnel parameters 
+ *   @create: != 0 if allowed to create new tunnel if no match found
+ *
+ * Description:
+ *   ip6ip6_tnl_locate() first tries to locate an existing tunnel
+ *   based on @parms. If this is unsuccessful, but @create is set a new
+ *   tunnel device is created and registered for use.
+ *
+ * Return:
+ *   0 if tunnel located or created,
+ *   -EINVAL if parameters incorrect,
+ *   -ENODEV if no matching tunnel available
+ **/
+
+static int
+ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create)
+{
+	struct in6_addr *remote = &p->raddr;
+	struct in6_addr *local = &p->laddr;
+	struct ip6_tnl *t;
+
+	if (p->proto != IPPROTO_IPV6)
+		return -EINVAL;
+
+	for (t = *ip6ip6_bucket(p); t; t = t->next) {
+		if (!ipv6_addr_cmp(local, &t->parms.laddr) &&
+		    !ipv6_addr_cmp(remote, &t->parms.raddr)) {
+			*pt = t;
+			return (create ? -EEXIST : 0);
+		}
+	}
+	if (!create) {
+		return -ENODEV;
+	}
+	return ip6_tnl_create(p, pt);
+}
+
+/**
+ * ip6ip6_tnl_dev_destructor - tunnel device destructor
+ *   @dev: the device to be destroyed
+ **/
+
+static void
+ip6ip6_tnl_dev_destructor(struct net_device *dev)
+{
+	kfree(dev);
+}
+
+/**
+ * ip6ip6_tnl_dev_uninit - tunnel device uninitializer
+ *   @dev: the device to be destroyed
+ *   
+ * Description:
+ *   ip6ip6_tnl_dev_uninit() removes tunnel from its list
+ **/
+
+static void
+ip6ip6_tnl_dev_uninit(struct net_device *dev)
+{
+	if (dev == &ip6ip6_fb_tnl_dev) {
+		write_lock_bh(&ip6ip6_lock);
+		tnls_wc[0] = NULL;
+		write_unlock_bh(&ip6ip6_lock);
+	} else {
+		struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+		ip6ip6_tnl_unlink(t);
+	}
+}
+
+/**
+ * parse_tvl_tnl_enc_lim - handle encapsulation limit option
+ *   @skb: received socket buffer
+ *
+ * Return: 
+ *   0 if none was found, 
+ *   else index to encapsulation limit
+ **/
+
+static __u16
+parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
+{
+	struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw;
+	__u8 nexthdr = ipv6h->nexthdr;
+	__u16 off = sizeof (*ipv6h);
+
+	while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
+		__u16 optlen = 0;
+		struct ipv6_opt_hdr *hdr;
+		if (raw + off + sizeof (*hdr) > skb->data &&
+		    !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
+			break;
+
+		hdr = (struct ipv6_opt_hdr *) (raw + off);
+		if (nexthdr == NEXTHDR_FRAGMENT) {
+			struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
+			if (frag_hdr->frag_off)
+				break;
+			optlen = 8;
+		} else if (nexthdr == NEXTHDR_AUTH) {
+			optlen = (hdr->hdrlen + 2) << 2;
+		} else {
+			optlen = ipv6_optlen(hdr);
+		}
+		if (nexthdr == NEXTHDR_DEST) {
+			__u16 i = off + 2;
+			while (1) {
+				struct ipv6_tlv_tnl_enc_lim *tel;
+
+				/* No more room for encapsulation limit */
+				if (i + sizeof (*tel) > off + optlen)
+					break;
+
+				tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
+				/* return index of option if found and valid */
+				if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
+				    tel->length == 1)
+					return i;
+				/* else jump to next option */
+				if (tel->type)
+					i += tel->length + 2;
+				else
+					i++;
+			}
+		}
+		nexthdr = hdr->nexthdr;
+		off += optlen;
+	}
+	return 0;
+}
+
+/**
+ * ip6ip6_err - tunnel error handler
+ *
+ * Description:
+ *   ip6ip6_err() should handle errors in the tunnel according
+ *   to the specifications in RFC 2473.
+ **/
+
+void ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		   int type, int code, int offset, __u32 info)
+{
+	struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
+	struct ip6_tnl *t;
+	int rel_msg = 0;
+	int rel_type = ICMPV6_DEST_UNREACH;
+	int rel_code = ICMPV6_ADDR_UNREACH;
+	__u32 rel_info = 0;
+	__u16 len;
+
+	/* If the packet doesn't contain the original IPv6 header we are 
+	   in trouble since we might need the source address for furter 
+	   processing of the error. */
+
+	read_lock(&ip6ip6_lock);
+	if ((t = ip6ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+		goto out;
+
+	switch (type) {
+		__u32 teli;
+		struct ipv6_tlv_tnl_enc_lim *tel;
+		__u32 mtu;
+	case ICMPV6_DEST_UNREACH:
+		if (net_ratelimit())
+			printk(KERN_WARNING
+			       "%s: Path to destination invalid "
+			       "or inactive!\n", t->parms.name);
+		rel_msg = 1;
+		break;
+	case ICMPV6_TIME_EXCEED:
+		if (code == ICMPV6_EXC_HOPLIMIT) {
+			if (net_ratelimit())
+				printk(KERN_WARNING
+				       "%s: Too small hop limit or "
+				       "routing loop in tunnel!\n", 
+				       t->parms.name);
+			rel_msg = 1;
+		}
+		break;
+	case ICMPV6_PARAMPROB:
+		/* ignore if parameter problem not caused by a tunnel
+		   encapsulation limit sub-option */
+		if (code != ICMPV6_HDR_FIELD) {
+			break;
+		}
+		teli = parse_tlv_tnl_enc_lim(skb, skb->data);
+
+		if (teli && teli == ntohl(info) - 2) {
+			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
+			if (tel->encap_limit == 0) {
+				if (net_ratelimit())
+					printk(KERN_WARNING
+					       "%s: Too small encapsulation "
+					       "limit or routing loop in "
+					       "tunnel!\n", t->parms.name);
+				rel_msg = 1;
+			}
+		}
+		break;
+	case ICMPV6_PKT_TOOBIG:
+		mtu = ntohl(info) - offset;
+		if (mtu < IPV6_MIN_MTU)
+			mtu = IPV6_MIN_MTU;
+		t->dev->mtu = mtu;
+
+		if ((len = sizeof (*ipv6h) + ipv6h->payload_len) > mtu) {
+			rel_type = ICMPV6_PKT_TOOBIG;
+			rel_code = 0;
+			rel_info = mtu;
+			rel_msg = 1;
+		}
+		break;
+	}
+	if (rel_msg &&  pskb_may_pull(skb, offset + sizeof (*ipv6h))) {
+		struct rt6_info *rt;
+		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (!skb2)
+			goto out;
+
+		dst_release(skb2->dst);
+		skb2->dst = NULL;
+		skb_pull(skb2, offset);
+		skb2->nh.raw = skb2->data;
+
+		/* Try to guess incoming interface */
+		rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0);
+
+		if (rt && rt->rt6i_dev)
+			skb2->dev = rt->rt6i_dev;
+
+		icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
+
+		if (rt)
+			dst_release(&rt->u.dst);
+
+		kfree_skb(skb2);
+	}
+out:
+	read_unlock(&ip6ip6_lock);
+}
+
+/**
+ * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
+ *   @skb: received socket buffer
+ *
+ * Return: 0
+ **/
+
+int ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+	struct sk_buff *skb = *pskb;
+	struct ipv6hdr *ipv6h;
+	struct ip6_tnl *t;
+
+	if (!pskb_may_pull(skb, sizeof (*ipv6h)))
+		goto discard;
+
+	ipv6h = skb->nh.ipv6h;
+
+	read_lock(&ip6ip6_lock);
+
+	if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
+		if (!(t->parms.flags & IP6_TNL_F_CAP_RCV)) {
+			t->stat.rx_dropped++;
+			read_unlock(&ip6ip6_lock);
+			goto discard;
+		}
+		secpath_reset(skb);
+		skb->mac.raw = skb->nh.raw;
+		skb->nh.raw = skb->data;
+		skb->protocol = htons(ETH_P_IPV6);
+		skb->pkt_type = PACKET_HOST;
+		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+		skb->dev = t->dev;
+		dst_release(skb->dst);
+		skb->dst = NULL;
+		t->stat.rx_packets++;
+		t->stat.rx_bytes += skb->len;
+		netif_rx(skb);
+		read_unlock(&ip6ip6_lock);
+		return 0;
+	}
+	read_unlock(&ip6ip6_lock);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
+discard:
+	kfree_skb(skb);
+	return 0;
+}
+
+/**
+ * txopt_len - get necessary size for new &struct ipv6_txoptions
+ *   @orig_opt: old options
+ *
+ * Return:
+ *   Size of old one plus size of tunnel encapsulation limit option
+ **/
+
+static inline int
+txopt_len(struct ipv6_txoptions *orig_opt)
+{
+	int len = sizeof (*orig_opt) + 8;
+
+	if (orig_opt && orig_opt->dst0opt)
+		len += ipv6_optlen(orig_opt->dst0opt);
+	return len;
+}
+
+/**
+ * merge_options - add encapsulation limit to original options
+ *   @encap_limit: number of allowed encapsulation limits
+ *   @orig_opt: original options
+ * 
+ * Return:
+ *   Pointer to new &struct ipv6_txoptions containing the tunnel
+ *   encapsulation limit
+ **/
+
+static struct ipv6_txoptions *
+merge_options(struct sock *sk, __u8 encap_limit,
+	      struct ipv6_txoptions *orig_opt)
+{
+	struct ipv6_tlv_tnl_enc_lim *tel;
+	struct ipv6_txoptions *opt;
+	__u8 *raw;
+	__u8 pad_to = 8;
+	int opt_len = txopt_len(orig_opt);
+
+	if (!(opt = sock_kmalloc(sk, opt_len, GFP_ATOMIC))) {
+		return NULL;
+	}
+
+	memset(opt, 0, opt_len);
+	opt->tot_len = opt_len;
+	opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1);
+	opt->opt_nflen = 8;
+
+	raw = (__u8 *) opt->dst0opt;
+
+	tel = (struct ipv6_tlv_tnl_enc_lim *) (opt->dst0opt + 1);
+	tel->type = IPV6_TLV_TNL_ENCAP_LIMIT;
+	tel->length = 1;
+	tel->encap_limit = encap_limit;
+
+	if (orig_opt) {
+		__u8 *orig_raw;
+
+		opt->hopopt = orig_opt->hopopt;
+
+		/* Keep the original destination options properly
+		   aligned and merge possible old paddings to the
+		   new padding option */
+		if ((orig_raw = (__u8 *) orig_opt->dst0opt) != NULL) {
+			__u8 type;
+			int i = sizeof (struct ipv6_opt_hdr);
+			pad_to += sizeof (struct ipv6_opt_hdr);
+			while (i < ipv6_optlen(orig_opt->dst0opt)) {
+				type = orig_raw[i++];
+				if (type == IPV6_TLV_PAD0)
+					pad_to++;
+				else if (type == IPV6_TLV_PADN) {
+					int len = orig_raw[i++];
+					i += len;
+					pad_to += len + 2;
+				} else {
+					break;
+				}
+			}
+			opt->dst0opt->hdrlen = orig_opt->dst0opt->hdrlen + 1;
+			memcpy(raw + pad_to, orig_raw + pad_to - 8,
+			       opt_len - sizeof (*opt) - pad_to);
+		}
+		opt->srcrt = orig_opt->srcrt;
+		opt->opt_nflen += orig_opt->opt_nflen;
+
+		opt->dst1opt = orig_opt->dst1opt;
+		opt->auth = orig_opt->auth;
+		opt->opt_flen = orig_opt->opt_flen;
+	}
+	raw[5] = IPV6_TLV_PADN;
+
+	/* subtract lengths of destination suboption header,
+	   tunnel encapsulation limit and pad N header */
+	raw[6] = pad_to - 7;
+
+	return opt;
+}
+
+static int 
+ip6ip6_getfrag(void *from, char *to, int offset, int len, int odd, 
+		struct sk_buff *skb)
+{
+	memcpy(to, (char *) from + offset, len);
+	return 0;
+}
+
+/**
+ * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ *   @t: the outgoing tunnel device
+ *   @hdr: IPv6 header from the incoming packet 
+ *
+ * Description:
+ *   Avoid trivial tunneling loop by checking that tunnel exit-point 
+ *   doesn't match source of incoming packet.
+ *
+ * Return: 
+ *   1 if conflict,
+ *   0 else
+ **/
+
+static inline int
+ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
+{
+	return !ipv6_addr_cmp(&t->parms.raddr, &hdr->saddr);
+}
+
+/**
+ * ip6ip6_tnl_xmit - encapsulate packet and send 
+ *   @skb: the outgoing socket buffer
+ *   @dev: the outgoing tunnel device 
+ *
+ * Description:
+ *   Build new header and do some sanity checks on the packet before sending
+ *   it to ip6_build_xmit().
+ *
+ * Return: 
+ *   0
+ **/
+
+int ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+	struct net_device_stats *stats = &t->stat;
+	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
+	struct ipv6_txoptions *orig_opt = NULL;
+	struct ipv6_txoptions *opt = NULL;
+	int encap_limit = -1;
+	__u16 offset;
+	struct flowi fl;
+	struct ip6_flowlabel *fl_lbl = NULL;
+	int err = 0;
+	struct dst_entry *dst;
+	int link_failure = 0;
+	struct sock *sk = ip6_socket->sk;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	int mtu;
+
+	if (t->recursion++) {
+		stats->collisions++;
+		goto tx_err;
+	}
+	if (skb->protocol != htons(ETH_P_IPV6) ||
+	    !(t->parms.flags & IP6_TNL_F_CAP_XMIT) ||
+	    ip6ip6_tnl_addr_conflict(t, ipv6h)) {
+		goto tx_err;
+	}
+	if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
+		struct ipv6_tlv_tnl_enc_lim *tel;
+		tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
+		if (tel->encap_limit == 0) {
+			icmpv6_send(skb, ICMPV6_PARAMPROB,
+				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
+			goto tx_err;
+		}
+		encap_limit = tel->encap_limit - 1;
+	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+		encap_limit = t->parms.encap_limit;
+	}
+	ip6_xmit_lock();
+
+	memcpy(&fl, &t->fl, sizeof (fl));
+
+	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+		fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_TCLASS_MASK);
+	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
+		fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+
+	if (fl.fl6_flowlabel) {
+		fl_lbl = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+		if (fl_lbl)
+			orig_opt = fl_lbl->opt;
+	}
+	if (encap_limit >= 0) {
+		if (!(opt = merge_options(sk, encap_limit, orig_opt))) {
+			goto tx_err_free_fl_lbl;
+		}
+	} else {
+		opt = orig_opt;
+	}
+	dst = __sk_dst_check(sk, np->dst_cookie);
+
+	if (dst) {
+		if (np->daddr_cache == NULL ||
+		    ipv6_addr_cmp(&fl.fl6_dst, np->daddr_cache) ||
+		    (fl.oif && fl.oif != dst->dev->ifindex)) {
+			dst = NULL;
+		}
+	}
+	if (dst == NULL) {
+		dst = ip6_route_output(sk, &fl);
+		if (dst->error) {
+			stats->tx_carrier_errors++;
+			link_failure = 1;
+			goto tx_err_dst_release;
+		}
+		/* local routing loop */
+		if (dst->dev == dev) {
+			stats->collisions++;
+			if (net_ratelimit())
+				printk(KERN_WARNING 
+				       "%s: Local routing loop detected!\n",
+				       t->parms.name);
+			goto tx_err_dst_release;
+		}
+		ipv6_addr_copy(&np->daddr, &fl.fl6_dst);
+		ipv6_addr_copy(&np->saddr, &fl.fl6_src);
+	}
+	mtu = dst_pmtu(dst) - sizeof (*ipv6h);
+	if (opt) {
+		mtu -= (opt->opt_nflen + opt->opt_flen);
+	}
+	if (mtu < IPV6_MIN_MTU)
+		mtu = IPV6_MIN_MTU;
+	if (skb->dst && mtu < dst_pmtu(skb->dst)) {
+		struct rt6_info *rt = (struct rt6_info *) skb->dst;
+		rt->rt6i_flags |= RTF_MODIFIED;
+		rt->u.dst.metrics[RTAX_MTU-1] = mtu;
+	}
+	if (skb->len > mtu) {
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+		goto tx_err_dst_release;
+	}
+	err = ip6_append_data(sk, ip6ip6_getfrag, skb->nh.raw, skb->len, 0,
+			      t->parms.hop_limit, opt, &fl, 
+			      (struct rt6_info *)dst, MSG_DONTWAIT);
+
+	if (err) {
+		ip6_flush_pending_frames(sk);
+	} else {
+		err = ip6_push_pending_frames(sk);
+		err = (err < 0 ? err : 0);
+	}
+	if (!err) {
+		stats->tx_bytes += skb->len;
+		stats->tx_packets++;
+	} else {
+		stats->tx_errors++;
+		stats->tx_aborted_errors++;
+	}
+	if (opt && opt != orig_opt)
+		sock_kfree_s(sk, opt, opt->tot_len);
+
+	fl6_sock_release(fl_lbl);
+	ip6_dst_store(sk, dst, &np->daddr);
+	ip6_xmit_unlock();
+	kfree_skb(skb);
+	t->recursion--;
+	return 0;
+tx_err_dst_release:
+	dst_release(dst);
+	if (opt && opt != orig_opt)
+		sock_kfree_s(sk, opt, opt->tot_len);
+tx_err_free_fl_lbl:
+	fl6_sock_release(fl_lbl);
+	ip6_xmit_unlock();
+	if (link_failure)
+		dst_link_failure(skb);
+tx_err:
+	stats->tx_errors++;
+	stats->tx_dropped++;
+	kfree_skb(skb);
+	t->recursion--;
+	return 0;
+}
+
+static void ip6_tnl_set_cap(struct ip6_tnl *t)
+{
+	struct ip6_tnl_parm *p = &t->parms;
+	struct in6_addr *laddr = &p->laddr;
+	struct in6_addr *raddr = &p->raddr;
+	int ltype = ipv6_addr_type(laddr);
+	int rtype = ipv6_addr_type(raddr);
+
+	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
+
+	if (ltype != IPV6_ADDR_ANY && rtype != IPV6_ADDR_ANY &&
+	    ((ltype|rtype) &
+	     (IPV6_ADDR_UNICAST|
+	      IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL|
+	      IPV6_ADDR_MAPPED|IPV6_ADDR_RESERVED)) == IPV6_ADDR_UNICAST) {
+		struct net_device *ldev = NULL;
+		int l_ok = 1;
+		int r_ok = 1;
+
+		if (p->link)
+			ldev = dev_get_by_index(p->link);
+		
+		if ((ltype&IPV6_ADDR_UNICAST) && !ipv6_chk_addr(laddr, ldev))
+			l_ok = 0;
+		
+		if ((rtype&IPV6_ADDR_UNICAST) && ipv6_chk_addr(raddr, NULL))
+			r_ok = 0;
+		
+		if (l_ok && r_ok) {
+			if (ltype&IPV6_ADDR_UNICAST)
+				p->flags |= IP6_TNL_F_CAP_XMIT;
+			if (rtype&IPV6_ADDR_UNICAST)
+				p->flags |= IP6_TNL_F_CAP_RCV;
+		}
+		if (ldev)
+			dev_put(ldev);
+	}
+}
+
+
+static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
+{
+	struct net_device *dev = t->dev;
+	struct ip6_tnl_parm *p = &t->parms;
+	struct flowi *fl;
+	/* Set up flowi template */
+	fl = &t->fl;
+	ipv6_addr_copy(&fl->fl6_src, &p->laddr);
+	ipv6_addr_copy(&fl->fl6_dst, &p->raddr);
+	fl->oif = p->link;
+	fl->fl6_flowlabel = 0;
+
+	if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
+		fl->fl6_flowlabel |= IPV6_TCLASS_MASK & htonl(p->flowinfo);
+	if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
+		fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & htonl(p->flowinfo);
+
+	ip6_tnl_set_cap(t);
+
+	if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
+		dev->flags |= IFF_POINTOPOINT;
+	else
+		dev->flags &= ~IFF_POINTOPOINT;
+
+	if (p->flags & IP6_TNL_F_CAP_XMIT) {
+		struct rt6_info *rt = rt6_lookup(&p->raddr, &p->laddr,
+						 p->link, 0);
+		if (rt) {
+			struct net_device *rtdev;
+			if (!(rtdev = rt->rt6i_dev) ||
+			    rtdev->type == ARPHRD_TUNNEL6) {
+				/* as long as tunnels use the same socket 
+				   for transmission, locally nested tunnels 
+				   won't work */
+				dst_release(&rt->u.dst);
+				goto no_link;
+			} else {
+				dev->iflink = rtdev->ifindex;
+				dev->hard_header_len = rtdev->hard_header_len +
+					sizeof (struct ipv6hdr);
+				dev->mtu = rtdev->mtu - sizeof (struct ipv6hdr);
+				if (dev->mtu < IPV6_MIN_MTU)
+					dev->mtu = IPV6_MIN_MTU;
+				
+				dst_release(&rt->u.dst);
+			}
+		}
+	} else {
+	no_link:
+		dev->iflink = 0;
+		dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
+		dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
+	}
+}
+
+/**
+ * ip6ip6_tnl_change - update the tunnel parameters
+ *   @t: tunnel to be changed
+ *   @p: tunnel configuration parameters
+ *   @active: != 0 if tunnel is ready for use
+ *
+ * Description:
+ *   ip6ip6_tnl_change() updates the tunnel parameters
+ **/
+
+static int
+ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+{
+	ipv6_addr_copy(&t->parms.laddr, &p->laddr);
+	ipv6_addr_copy(&t->parms.raddr, &p->raddr);
+	t->parms.flags = p->flags;
+	t->parms.hop_limit = p->hop_limit;
+	t->parms.encap_limit = p->encap_limit;
+	t->parms.flowinfo = p->flowinfo;
+	ip6ip6_tnl_link_config(t);
+	return 0;
+}
+
+/**
+ * ip6ip6_tnl_ioctl - configure ipv6 tunnels from userspace 
+ *   @dev: virtual device associated with tunnel
+ *   @ifr: parameters passed from userspace
+ *   @cmd: command to be performed
+ *
+ * Description:
+ *   ip6ip6_tnl_ioctl() is used for managing IPv6 tunnels 
+ *   from userspace. 
+ *
+ *   The possible commands are the following:
+ *     %SIOCGETTUNNEL: get tunnel parameters for device
+ *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
+ *     %SIOCCHGTUNNEL: change tunnel parameters to those given
+ *     %SIOCDELTUNNEL: delete tunnel
+ *
+ *   The fallback device "ip6tnl0", created during module 
+ *   initialization, can be used for creating other tunnel devices.
+ *
+ * Return:
+ *   0 on success,
+ *   %-EFAULT if unable to copy data to or from userspace,
+ *   %-EPERM if current process hasn't %CAP_NET_ADMIN set
+ *   %-EINVAL if passed tunnel parameters are invalid,
+ *   %-EEXIST if changing a tunnel's parameters would cause a conflict
+ *   %-ENODEV if attempting to change or delete a nonexisting device
+ **/
+
+static int
+ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	int err = 0;
+	int create;
+	struct ip6_tnl_parm p;
+	struct ip6_tnl *t = NULL;
+
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		if (dev == &ip6ip6_fb_tnl_dev) {
+			if (copy_from_user(&p,
+					   ifr->ifr_ifru.ifru_data,
+					   sizeof (p))) {
+				err = -EFAULT;
+				break;
+			}
+			if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV)
+				t = (struct ip6_tnl *) dev->priv;
+			else if (err)
+				break;
+		} else
+			t = (struct ip6_tnl *) dev->priv;
+
+		memcpy(&p, &t->parms, sizeof (p));
+		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
+			err = -EFAULT;
+		}
+		break;
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		create = (cmd == SIOCADDTUNNEL);
+		if (!capable(CAP_NET_ADMIN))
+			break;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
+			err = -EFAULT;
+			break;
+		}
+		if (!create && dev != &ip6ip6_fb_tnl_dev) {
+			t = (struct ip6_tnl *) dev->priv;
+		}
+		if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) {
+			break;
+		}
+		if (cmd == SIOCCHGTUNNEL) {
+			if (t->dev != dev) {
+				err = -EEXIST;
+				break;
+			}
+			ip6ip6_tnl_unlink(t);
+			err = ip6ip6_tnl_change(t, &p);
+			ip6ip6_tnl_link(t);
+			netdev_state_change(dev);
+		}
+		if (copy_to_user(ifr->ifr_ifru.ifru_data,
+				 &t->parms, sizeof (p))) {
+			err = -EFAULT;
+		} else {
+			err = 0;
+		}
+		break;
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			break;
+
+		if (dev == &ip6ip6_fb_tnl_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
+					   sizeof (p))) {
+				err = -EFAULT;
+				break;
+			}
+			err = ip6ip6_tnl_locate(&p, &t, 0);
+			if (err)
+				break;
+			if (t == &ip6ip6_fb_tnl) {
+				err = -EPERM;
+				break;
+			}
+		} else {
+			t = (struct ip6_tnl *) dev->priv;
+		}
+		err = ip6_tnl_destroy(t);
+		break;
+	default:
+		err = -EINVAL;
+	}
+	return err;
+}
+
+/**
+ * ip6ip6_tnl_get_stats - return the stats for tunnel device 
+ *   @dev: virtual device associated with tunnel
+ *
+ * Return: stats for device
+ **/
+
+static struct net_device_stats *
+ip6ip6_tnl_get_stats(struct net_device *dev)
+{
+	return &(((struct ip6_tnl *) dev->priv)->stat);
+}
+
+/**
+ * ip6ip6_tnl_change_mtu - change mtu manually for tunnel device
+ *   @dev: virtual device associated with tunnel
+ *   @new_mtu: the new mtu
+ *
+ * Return:
+ *   0 on success,
+ *   %-EINVAL if mtu too small
+ **/
+
+static int
+ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if (new_mtu < IPV6_MIN_MTU) {
+		return -EINVAL;
+	}
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+/**
+ * ip6ip6_tnl_dev_init_gen - general initializer for all tunnel devices
+ *   @dev: virtual device associated with tunnel
+ *
+ * Description:
+ *   Set function pointers and initialize the &struct flowi template used
+ *   by the tunnel.
+ **/
+
+static void
+ip6ip6_tnl_dev_init_gen(struct net_device *dev)
+{
+	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+	struct flowi *fl = &t->fl;
+
+	memset(fl, 0, sizeof (*fl));
+	fl->proto = IPPROTO_IPV6;
+
+	dev->destructor = ip6ip6_tnl_dev_destructor;
+	dev->uninit = ip6ip6_tnl_dev_uninit;
+	dev->hard_start_xmit = ip6ip6_tnl_xmit;
+	dev->get_stats = ip6ip6_tnl_get_stats;
+	dev->do_ioctl = ip6ip6_tnl_ioctl;
+	dev->change_mtu = ip6ip6_tnl_change_mtu;
+	dev->type = ARPHRD_TUNNEL6;
+	dev->flags |= IFF_NOARP;
+	if (ipv6_addr_type(&t->parms.raddr) & IPV6_ADDR_UNICAST &&
+	    ipv6_addr_type(&t->parms.laddr) & IPV6_ADDR_UNICAST)
+		dev->flags |= IFF_POINTOPOINT;
+	/* Hmm... MAX_ADDR_LEN is 8, so the ipv6 addresses can't be 
+	   copied to dev->dev_addr and dev->broadcast, like the ipv4
+	   addresses were in ipip.c, ip_gre.c and sit.c. */
+	dev->addr_len = 0;
+}
+
+/**
+ * ip6ip6_tnl_dev_init - initializer for all non fallback tunnel devices
+ *   @dev: virtual device associated with tunnel
+ **/
+
+static int
+ip6ip6_tnl_dev_init(struct net_device *dev)
+{
+	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+	ip6ip6_tnl_dev_init_gen(dev);
+	ip6ip6_tnl_link_config(t);
+	return 0;
+}
+
+/**
+ * ip6ip6_fb_tnl_dev_init - initializer for fallback tunnel device
+ *   @dev: fallback device
+ *
+ * Return: 0
+ **/
+
+int ip6ip6_fb_tnl_dev_init(struct net_device *dev)
+{
+	ip6ip6_tnl_dev_init_gen(dev);
+	tnls_wc[0] = &ip6ip6_fb_tnl;
+	return 0;
+}
+
+static struct inet6_protocol ip6ip6_protocol = {
+	.handler = ip6ip6_rcv,
+	.err_handler = ip6ip6_err,
+	.flags = INET6_PROTO_FINAL
+};
+
+/**
+ * ip6_tunnel_init - register protocol and reserve needed resources
+ *
+ * Return: 0 on success
+ **/
+
+int __init ip6_tunnel_init(void)
+{
+	int i, j, err;
+	struct sock *sk;
+	struct ipv6_pinfo *np;
+
+	ip6ip6_fb_tnl_dev.priv = (void *) &ip6ip6_fb_tnl;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		err = sock_create(PF_INET6, SOCK_RAW, IPPROTO_IPV6, 
+				  &__ip6_socket[i]);
+		if (err < 0) {
+			printk(KERN_ERR 
+			       "Failed to create the IPv6 tunnel socket "
+			       "(err %d).\n", 
+			       err);
+			goto fail;
+		}
+		sk = __ip6_socket[i]->sk;
+		sk->allocation = GFP_ATOMIC;
+
+		np = inet6_sk(sk);
+		np->hop_limit = 255;
+		np->mc_loop = 0;
+
+		sk->prot->unhash(sk);
+	}
+	if ((err = inet6_add_protocol(&ip6ip6_protocol, IPPROTO_IPV6)) < 0) {
+		printk(KERN_ERR "Failed to register IPv6 protocol\n");
+		goto fail;
+	}
+
+	SET_MODULE_OWNER(&ip6ip6_fb_tnl_dev);
+	register_netdev(&ip6ip6_fb_tnl_dev);
+
+	return 0;
+fail:
+	for (j = 0; j < i; j++) {
+		sock_release(__ip6_socket[j]);
+		__ip6_socket[j] = NULL;
+	}
+	return err;
+}
+
+/**
+ * ip6_tunnel_cleanup - free resources and unregister protocol
+ **/
+
+void ip6_tunnel_cleanup(void)
+{
+	int i;
+
+	unregister_netdev(&ip6ip6_fb_tnl_dev);
+
+	inet6_del_protocol(&ip6ip6_protocol, IPPROTO_IPV6);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		sock_release(__ip6_socket[i]);
+		__ip6_socket[i] = NULL;
+	}
+}
+
+#ifdef MODULE
+module_init(ip6_tunnel_init);
+module_exit(ip6_tunnel_cleanup);
+#endif
Index: net/ipv6/ipcomp6.c
===================================================================
RCS file: net/ipv6/ipcomp6.c
diff -N net/ipv6/ipcomp6.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv6/ipcomp6.c	16 Apr 2004 13:16:25 -0000	1.6.12.1
@@ -0,0 +1,378 @@
+/*
+ * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173
+ *
+ * Copyright (C)2003 USAGI/WIDE Project
+ *
+ * Author	Mitsuru KANDA  <mk@linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/* 
+ * [Memo]
+ *
+ * Outbound:
+ *  The compression of IP datagram MUST be done before AH/ESP processing, 
+ *  fragmentation, and the addition of Hop-by-Hop/Routing header. 
+ *
+ * Inbound:
+ *  The decompression of IP datagram MUST be done after the reassembly, 
+ *  AH/ESP processing.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/ipcomp.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/random.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+
+/* XXX no ipv6 ipcomp specific */
+#define NIP6(addr) \
+	ntohs((addr).s6_addr16[0]),\
+	ntohs((addr).s6_addr16[1]),\
+	ntohs((addr).s6_addr16[2]),\
+	ntohs((addr).s6_addr16[3]),\
+	ntohs((addr).s6_addr16[4]),\
+	ntohs((addr).s6_addr16[5]),\
+	ntohs((addr).s6_addr16[6]),\
+	ntohs((addr).s6_addr16[7])
+
+static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	int err = 0;
+	u8 nexthdr = 0;
+	u8 *prevhdr;
+	int hdr_len = skb->h.raw - skb->nh.raw;
+	unsigned char *tmp_hdr = NULL;
+	struct ipv6hdr *iph;
+	int plen, dlen;
+	struct ipcomp_data *ipcd = x->data;
+	u8 *start, *scratch = ipcd->scratch;
+
+	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+		skb_linearize(skb, GFP_ATOMIC) != 0) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	/* Remove ipcomp header and decompress original payload */
+	iph = skb->nh.ipv6h;
+	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
+	if (!tmp_hdr)
+		goto out;
+	memcpy(tmp_hdr, iph, hdr_len);
+	nexthdr = *(u8 *)skb->data;
+	skb_pull(skb, sizeof(struct ipv6_comp_hdr)); 
+	skb->nh.raw += sizeof(struct ipv6_comp_hdr);
+	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
+	iph = skb->nh.ipv6h;
+	iph->payload_len = htons(ntohs(iph->payload_len) - sizeof(struct ipv6_comp_hdr));
+	skb->h.raw = skb->data;
+
+	/* decompression */
+	plen = skb->len;
+	dlen = IPCOMP_SCRATCH_SIZE;
+	start = skb->data;
+
+	err = crypto_comp_decompress(ipcd->tfm, start, plen, scratch, &dlen);
+	if (err) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (dlen < (plen + sizeof(struct ipv6_comp_hdr))) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
+	if (err) {
+		goto out;
+	}
+
+	skb_put(skb, dlen - plen);
+	memcpy(skb->data, scratch, dlen);
+
+	iph = skb->nh.ipv6h;
+	iph->payload_len = htons(skb->len);
+	
+	ip6_find_1stfragopt(skb, &prevhdr);
+	*prevhdr = nexthdr;
+out:
+	if (tmp_hdr)
+		kfree(tmp_hdr);
+	if (err)
+		goto error_out;
+	return nexthdr;
+error_out:
+	return err;
+}
+
+static int ipcomp6_output(struct sk_buff *skb)
+{
+	int err;
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x = dst->xfrm;
+	struct ipv6hdr *tmp_iph = NULL, *iph, *top_iph;
+	int hdr_len = 0;
+	struct ipv6_comp_hdr *ipch;
+	struct ipcomp_data *ipcd = x->data;
+	u8 *prevhdr;
+	u8 nexthdr = 0;
+	int plen, dlen;
+	u8 *start, *scratch = ipcd->scratch;
+
+	if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) {
+		err = -EINVAL;
+		goto error_nolock;
+	}
+
+	spin_lock_bh(&x->lock);
+
+	err = xfrm_check_output(x, skb, AF_INET6);
+	if (err)
+		goto error;
+
+	if (x->props.mode) {
+		hdr_len = sizeof(struct ipv6hdr);
+		nexthdr = IPPROTO_IPV6;
+		iph = skb->nh.ipv6h;
+		top_iph = (struct ipv6hdr *)skb_push(skb, sizeof(struct ipv6hdr));
+		top_iph->version = 6;
+		top_iph->priority = iph->priority;
+		top_iph->flow_lbl[0] = iph->flow_lbl[0];
+		top_iph->flow_lbl[1] = iph->flow_lbl[1];
+		top_iph->flow_lbl[2] = iph->flow_lbl[2];
+		top_iph->nexthdr = IPPROTO_IPV6; /* initial */
+		top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+		top_iph->hop_limit = iph->hop_limit;
+		memcpy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr, sizeof(struct in6_addr));
+		memcpy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr, sizeof(struct in6_addr));
+		skb->nh.raw = skb->data; /* == top_iph */
+		skb->h.raw = skb->nh.raw + hdr_len;
+	} else {
+		hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
+		nexthdr = *prevhdr;
+	}
+
+	/* check whether datagram len is larger than threshold */
+	if ((skb->len - hdr_len) < ipcd->threshold) {
+		goto out_ok;
+	}
+
+	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+		skb_linearize(skb, GFP_ATOMIC) != 0) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* compression */
+	plen = skb->len - hdr_len;
+	dlen = IPCOMP_SCRATCH_SIZE;
+	start = skb->data + hdr_len;
+
+	err = crypto_comp_compress(ipcd->tfm, start, plen, scratch, &dlen);
+	if (err) {
+		goto error;
+	}
+	if ((dlen + sizeof(struct ipv6_comp_hdr)) >= plen) {
+		goto out_ok;
+	}
+	memcpy(start, scratch, dlen);
+	pskb_trim(skb, hdr_len+dlen);
+
+	/* insert ipcomp header and replace datagram */
+	tmp_iph = kmalloc(hdr_len, GFP_ATOMIC);
+	if (!tmp_iph) {
+		err = -ENOMEM;
+		goto error;
+	}
+	memcpy(tmp_iph, skb->nh.raw, hdr_len);
+	top_iph = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6_comp_hdr));
+	memcpy(top_iph, tmp_iph, hdr_len);
+	kfree(tmp_iph);
+
+	if (x->props.mode && (x->props.flags & XFRM_STATE_NOECN))
+		IP6_ECN_clear(top_iph);
+	top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb->nh.raw = skb->data; /* top_iph */
+	ip6_find_1stfragopt(skb, &prevhdr); 
+	*prevhdr = IPPROTO_COMP;
+
+	ipch = (struct ipv6_comp_hdr *)((unsigned char *)top_iph + hdr_len);
+	ipch->nexthdr = nexthdr;
+	ipch->flags = 0;
+	ipch->cpi = htons((u16 )ntohl(x->id.spi));
+
+	skb->h.raw = (unsigned char*)ipch;
+out_ok:
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+	spin_unlock_bh(&x->lock);
+
+	if ((skb->dst = dst_pop(dst)) == NULL) {
+		err = -EHOSTUNREACH;
+		goto error_nolock;
+	}
+	err = NET_XMIT_BYPASS;
+
+out_exit:
+	return err;
+error:
+	spin_unlock_bh(&x->lock);
+error_nolock:
+	kfree_skb(skb);
+	goto out_exit;
+}
+
+static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		                int type, int code, int offset, __u32 info)
+{
+	u32 spi;
+	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+	struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset);
+	struct xfrm_state *x;
+
+	if (type != ICMPV6_DEST_UNREACH || type != ICMPV6_PKT_TOOBIG)
+		return;
+
+	spi = ntohl(ntohs(ipcomph->cpi));
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
+	if (!x)
+		return;
+
+	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/"
+			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+			spi, NIP6(iph->daddr));
+	xfrm_state_put(x);
+}
+
+static void ipcomp6_free_data(struct ipcomp_data *ipcd)
+{
+	if (ipcd->tfm)
+		crypto_free_tfm(ipcd->tfm);
+	if (ipcd->scratch)
+		kfree(ipcd->scratch);
+}
+
+static void ipcomp6_destroy(struct xfrm_state *x)
+{
+	struct ipcomp_data *ipcd = x->data;
+	if (!ipcd)
+		return;
+	ipcomp6_free_data(ipcd);
+	kfree(ipcd);
+}
+
+static int ipcomp6_init_state(struct xfrm_state *x, void *args)
+{
+	int err;
+	struct ipcomp_data *ipcd;
+	struct xfrm_algo_desc *calg_desc;
+
+	err = -EINVAL;
+	if (!x->calg)
+		goto out;
+
+	err = -ENOMEM;
+	ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
+	if (!ipcd)
+		goto error;
+
+	memset(ipcd, 0, sizeof(*ipcd));
+	x->props.header_len = sizeof(struct ipv6_comp_hdr);
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct ipv6hdr);
+	
+	ipcd->scratch = kmalloc(IPCOMP_SCRATCH_SIZE, GFP_KERNEL);
+	if (!ipcd->scratch)
+		goto error;
+
+	ipcd->tfm = crypto_alloc_tfm(x->calg->alg_name, 0);
+	if (!ipcd->tfm)
+		goto error;
+
+	calg_desc = xfrm_calg_get_byname(x->calg->alg_name);
+	BUG_ON(!calg_desc);
+	ipcd->threshold = calg_desc->uinfo.comp.threshold;
+	x->data = ipcd;
+	err = 0;
+out:
+	return err;
+error:
+	if (ipcd) {
+		ipcomp6_free_data(ipcd);
+		kfree(ipcd);
+	}
+
+	goto out;
+}
+
+static struct xfrm_type ipcomp6_type = 
+{
+	.description	= "IPCOMP6",
+	.owner		= THIS_MODULE,
+	.proto		= IPPROTO_COMP,
+	.init_state	= ipcomp6_init_state,
+	.destructor	= ipcomp6_destroy,
+	.input		= ipcomp6_input,
+	.output		= ipcomp6_output,
+};
+
+static struct inet6_protocol ipcomp6_protocol = 
+{
+	.handler	= xfrm6_rcv,
+	.err_handler	= ipcomp6_err,
+	.flags		= INET6_PROTO_NOPOLICY,
+};
+
+static int __init ipcomp6_init(void)
+{
+	if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) {
+		printk(KERN_INFO "ipcomp6 init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+		printk(KERN_INFO "ipcomp6 init: can't add protocol\n");
+		xfrm_unregister_type(&ipcomp6_type, AF_INET6);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit ipcomp6_fini(void)
+{
+	if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) 
+		printk(KERN_INFO "ipv6 ipcomp close: can't remove protocol\n");
+	if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
+		printk(KERN_INFO "ipv6 ipcomp close: can't remove xfrm type\n");
+}
+
+module_init(ipcomp6_init);
+module_exit(ipcomp6_fini);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173");
+MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>");
+
+
Index: net/ipv6/ipv6_sockglue.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/ipv6_sockglue.c,v
retrieving revision 1.1.1.19
retrieving revision 1.1.1.19.2.1
diff -u -r1.1.1.19 -r1.1.1.19.2.1
--- a/net/ipv6/ipv6_sockglue.c	14 Apr 2004 13:05:41 -0000	1.1.1.19
+++ b/net/ipv6/ipv6_sockglue.c	16 Apr 2004 13:16:25 -0000	1.1.1.19.2.1
@@ -51,6 +51,7 @@
 #include <net/inet_common.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <net/xfrm.h>
 
 #include <asm/uaccess.h>
 
@@ -517,6 +518,10 @@
 	case IPV6_FLOWLABEL_MGR:
 		retv = ipv6_flowlabel_opt(sk, optval, optlen);
 		break;
+	case IPV6_IPSEC_POLICY:
+	case IPV6_XFRM_POLICY:
+		retv = xfrm_user_policy(sk, optname, optval, optlen);
+		break;
 
 #ifdef CONFIG_NETFILTER
 	default:
@@ -550,6 +555,15 @@
 	if (get_user(len, optlen))
 		return -EFAULT;
 	switch (optname) {
+	case IPV6_ADDRFORM:
+		if (sk->protocol != IPPROTO_UDP &&
+		    sk->protocol != IPPROTO_TCP)
+			return -EINVAL;
+		if (sk->state != TCP_ESTABLISHED)
+			return -ENOTCONN;
+		val = sk->family;
+		break;
+
 	case IPV6_PKTOPTIONS:
 	{
 		struct msghdr msg;
@@ -595,7 +609,7 @@
 		lock_sock(sk);
 		dst = sk_dst_get(sk);
 		if (dst) {
-			val = dst->pmtu;
+			val = dst_pmtu(dst) - dst->header_len;
 			dst_release(dst);
 		}
 		release_sock(sk);
Index: net/ipv6/ipv6_syms.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/ipv6_syms.c,v
retrieving revision 1.1.1.11
retrieving revision 1.1.1.11.2.1
diff -u -r1.1.1.11 -r1.1.1.11.2.1
--- a/net/ipv6/ipv6_syms.c	14 Apr 2004 13:05:41 -0000	1.1.1.11
+++ b/net/ipv6/ipv6_syms.c	16 Apr 2004 13:16:25 -0000	1.1.1.11.2.1
@@ -6,6 +6,7 @@
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
+#include <net/xfrm.h>
 
 EXPORT_SYMBOL(ipv6_addr_type);
 EXPORT_SYMBOL(icmpv6_send);
@@ -33,5 +34,15 @@
 EXPORT_SYMBOL(ipv6_get_saddr);
 EXPORT_SYMBOL(ipv6_chk_addr);
 EXPORT_SYMBOL(in6_dev_finish_destroy);
+EXPORT_SYMBOL(ip6_find_1stfragopt);
+#ifdef CONFIG_XFRM
+EXPORT_SYMBOL(xfrm6_rcv);
+#endif
+EXPORT_SYMBOL(rt6_lookup);
+EXPORT_SYMBOL(fl6_sock_lookup);
+EXPORT_SYMBOL(ipv6_ext_hdr);
+EXPORT_SYMBOL(ip6_append_data);
+EXPORT_SYMBOL(ip6_flush_pending_frames);
+EXPORT_SYMBOL(ip6_push_pending_frames);
 EXPORT_SYMBOL(ipv6_skip_exthdr);
 
Index: net/ipv6/ndisc.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/ndisc.c,v
retrieving revision 1.1.1.28
retrieving revision 1.1.1.28.2.1
diff -u -r1.1.1.28 -r1.1.1.28.2.1
--- a/net/ipv6/ndisc.c	14 Apr 2004 13:05:41 -0000	1.1.1.28
+++ b/net/ipv6/ndisc.c	16 Apr 2004 13:16:25 -0000	1.1.1.28.2.1
@@ -71,6 +71,7 @@
 #include <net/addrconf.h>
 #include <net/icmp.h>
 
+#include <net/flow.h>
 #include <net/checksum.h>
 #include <linux/proc_fs.h>
 
@@ -138,6 +139,19 @@
 	30*HZ, 128, 512, 1024,
 };
 
+/* ND options */
+struct ndisc_options {
+	struct nd_opt_hdr *nd_opt_array[7];
+	struct nd_opt_hdr *nd_opt_piend;
+};
+
+#define nd_opts_src_lladdr	nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_opts_tgt_lladdr	nd_opt_array[ND_OPT_TARGET_LL_ADDR]
+#define nd_opts_pi		nd_opt_array[ND_OPT_PREFIX_INFO]
+#define nd_opts_pi_end		nd_opt_piend
+#define nd_opts_rh		nd_opt_array[ND_OPT_REDIRECT_HDR]
+#define nd_opts_mtu		nd_opt_array[ND_OPT_MTU]
+
 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
 
 static u8 *ndisc_fill_option(u8 *opt, int type, void *data, int data_len)
@@ -154,8 +168,8 @@
 	return opt + space;
 }
 
-struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
-				     struct nd_opt_hdr *end)
+static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
+					    struct nd_opt_hdr *end)
 {
 	int type;
 	if (!cur || !end || cur >= end)
@@ -167,8 +181,8 @@
 	return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
 }
 
-struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
-					  struct ndisc_options *ndopts)
+static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+						 struct ndisc_options *ndopts)
 {
 	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
 
@@ -333,8 +347,6 @@
 	unsigned char ha[MAX_ADDR_LEN];
 	unsigned char *h_dest = NULL;
 
-	skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
-
 	if (dev->hard_header) {
 		if (ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST) {
 			ndisc_mc_map(daddr, ha, dev, 1);
@@ -371,11 +383,38 @@
  *	Send a Neighbour Advertisement
  */
 
+static int ndisc_output(struct sk_buff *skb)
+{
+	if (skb) {
+		struct neighbour *neigh = (skb->dst ? skb->dst->neighbour : NULL);
+		if (ndisc_build_ll_hdr(skb, skb->dev, &skb->nh.ipv6h->daddr, neigh, skb->len) == 0) {
+			kfree_skb(skb);
+			return -EINVAL;
+		}
+		dev_queue_xmit(skb);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static inline void ndisc_flow_init(struct flowi *fl, u8 type,
+			    struct in6_addr *saddr, struct in6_addr *daddr)
+{
+	memset(fl, 0, sizeof(*fl));
+	ipv6_addr_copy(&fl->fl6_src, saddr);
+	ipv6_addr_copy(&fl->fl6_dst, daddr);
+	fl->proto	 	= IPPROTO_ICMPV6;
+	fl->fl_icmp_type	= type;
+	fl->fl_icmp_code	= 0;
+}
+
 void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 		   struct in6_addr *daddr, struct in6_addr *solicited_addr,
-		   int router, int solicited, int override, int inc_opt) 
+	 	   int router, int solicited, int override, int inc_opt) 
 {
-	static struct in6_addr tmpaddr;
+	struct flowi fl;
+	struct dst_entry* dst;
+	struct in6_addr tmpaddr;
 	struct inet6_ifaddr *ifp;
         struct sock *sk = ndisc_socket->sk;
 	struct in6_addr *src_addr;
@@ -386,6 +425,29 @@
 
 	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
 
+	/* for anycast or proxy, solicited_addr != src_addr */
+	ifp = ipv6_get_ifaddr(solicited_addr, dev);
+ 	if (ifp) {
+		src_addr = solicited_addr;
+		in6_ifa_put(ifp);
+	} else {
+		if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr, 0))
+			return;
+		src_addr = &tmpaddr;
+	}
+
+	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr);
+
+	dst = ndisc_dst_alloc(dev, neigh, ndisc_output);
+	if (!dst)
+		return;
+
+	err = xfrm_lookup(&dst, &fl, NULL, 0);
+	if (err < 0) {
+		dst_release(dst);
+		return;
+	}
+
 	if (inc_opt) {
 		if (dev->addr_len)
 			len += NDISC_OPT_SPACE(dev->addr_len);
@@ -398,27 +460,14 @@
 
 	if (skb == NULL) {
 		ND_PRINTK1("send_na: alloc skb failed\n");
-		return;
-	}
-	/* for anycast or proxy, solicited_addr != src_addr */
-	ifp = ipv6_get_ifaddr(solicited_addr, dev);
-	if (ifp) {
-		src_addr = solicited_addr;
-		in6_ifa_put(ifp);
-	} else {
-		if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr, 0))
-			return;
-		src_addr = &tmpaddr;
-	}
-
-	if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) {
-		kfree_skb(skb);
+		dst_release(dst);
 		return;
 	}
 
+	skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
 	ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
 
-	msg = (struct nd_msg *) skb_put(skb, len);
+	skb->h.raw = (unsigned char*) msg = (struct nd_msg *) skb_put(skb, len);
 
         msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
         msg->icmph.icmp6_code = 0;
@@ -441,7 +490,8 @@
 						 csum_partial((__u8 *) msg, 
 							      len, 0));
 
-	dev_queue_xmit(skb);
+	skb->dst = dst;
+	dst_output(skb);
 
 	ICMP6_INC_STATS(Icmp6OutNeighborAdvertisements);
 	ICMP6_INC_STATS(Icmp6OutMsgs);
@@ -451,6 +501,8 @@
 		   struct in6_addr *solicit,
 		   struct in6_addr *daddr, struct in6_addr *saddr) 
 {
+	struct flowi fl;
+	struct dst_entry* dst;
         struct sock *sk = ndisc_socket->sk;
         struct sk_buff *skb;
         struct nd_msg *msg;
@@ -465,6 +517,18 @@
 		saddr = &addr_buf;
 	}
 
+	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr);
+
+	dst = ndisc_dst_alloc(dev, neigh, ndisc_output);
+	if (!dst)
+		return;
+
+	err = xfrm_lookup(&dst, &fl, NULL, 0);
+	if (err < 0) {
+		dst_release(dst);
+		return;
+	}
+
 	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
 	send_llinfo = dev->addr_len && ipv6_addr_type(saddr) != IPV6_ADDR_ANY;
 	if (send_llinfo)
@@ -474,17 +538,14 @@
 				  1, &err);
 	if (skb == NULL) {
 		ND_PRINTK1("send_ns: alloc skb failed\n");
+		dst_release(dst);
 		return;
 	}
 
-	if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) {
-		kfree_skb(skb);
-		return;
-	}
-
+	skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
 	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
 
-	msg = (struct nd_msg *)skb_put(skb, len);
+	skb->h.raw = (unsigned char*) msg = (struct nd_msg *)skb_put(skb, len);
 	msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
 	msg->icmph.icmp6_code = 0;
 	msg->icmph.icmp6_cksum = 0;
@@ -503,7 +564,8 @@
 						 csum_partial((__u8 *) msg, 
 							      len, 0));
 	/* send it! */
-	dev_queue_xmit(skb);
+	skb->dst = dst;
+	dst_output(skb);
 
 	ICMP6_INC_STATS(Icmp6OutNeighborSolicits);
 	ICMP6_INC_STATS(Icmp6OutMsgs);
@@ -512,6 +574,8 @@
 void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 		   struct in6_addr *daddr)
 {
+	struct flowi fl;
+	struct dst_entry* dst;
 	struct sock *sk = ndisc_socket->sk;
         struct sk_buff *skb;
         struct icmp6hdr *hdr;
@@ -519,6 +583,18 @@
         int len;
 	int err;
 
+	ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr);
+
+	dst = ndisc_dst_alloc(dev, NULL, ndisc_output);
+	if (!dst)
+		return;
+
+	err = xfrm_lookup(&dst, &fl, NULL, 0);
+	if (err < 0) {
+		dst_release(dst);
+		return;
+	}
+
 	len = sizeof(struct icmp6hdr);
 	if (dev->addr_len)
 		len += NDISC_OPT_SPACE(dev->addr_len);
@@ -530,14 +606,10 @@
 		return;
 	}
 
-	if (ndisc_build_ll_hdr(skb, dev, daddr, NULL, len) == 0) {
-		kfree_skb(skb);
-		return;
-	}
-
+	skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
 	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
 
-        hdr = (struct icmp6hdr *) skb_put(skb, len);
+        skb->h.raw = (unsigned char*) hdr = (struct icmp6hdr *) skb_put(skb, len);
         hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
         hdr->icmp6_code = 0;
         hdr->icmp6_cksum = 0;
@@ -554,7 +626,8 @@
 					   csum_partial((__u8 *) hdr, len, 0));
 
 	/* send it! */
-	dev_queue_xmit(skb);
+	skb->dst = dst;
+	dst_output(skb);
 
 	ICMP6_INC_STATS(Icmp6OutRouterSolicits);
 	ICMP6_INC_STATS(Icmp6OutMsgs);
@@ -598,7 +671,7 @@
 	}
 }
 
-void ndisc_recv_ns(struct sk_buff *skb)
+static void ndisc_recv_ns(struct sk_buff *skb)
 {
 	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
 	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
@@ -610,6 +683,7 @@
 	struct net_device *dev = skb->dev;
 	struct inet6_ifaddr *ifp;
 	struct neighbour *neigh;
+	int addr_type = ipv6_addr_type(saddr);
 
 	if (skb->len < sizeof(struct nd_msg)) {
 		if (net_ratelimit())
@@ -623,6 +697,20 @@
 		return;
 	}
 
+	/*
+	 * RFC2461 7.1.1:
+	 * DAD has to be destined for solicited node multicast address.
+	 */
+	if (addr_type == IPV6_ADDR_ANY &&
+	    !(daddr->s6_addr32[0] == htonl(0xff020000) &&
+	      daddr->s6_addr32[1] == htonl(0x00000000) &&
+	      daddr->s6_addr32[2] == htonl(0x00000001) &&
+	      daddr->s6_addr [12] == 0xff )) {
+		if (net_ratelimit())
+			printk(KERN_DEBUG "ICMP6 NS: bad DAD packet (wrong destination\n");
+		return;
+	}
+
 	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "ICMP NS: invalid ND option, ignored.\n");
@@ -637,23 +725,20 @@
 				printk(KERN_WARNING "ICMP NS: bad lladdr length.\n");
 			return;
 		}
-	}
 
-	/* XXX: RFC2461 7.1.1:
-	 * 	If the IP source address is the unspecified address, there
-	 *	MUST NOT be source link-layer address option in the message.
-	 *
-	 *	NOTE! Linux kernel < 2.4.4 broke this rule.
-	 */
-		 	
-	/* XXX: RFC2461 7.1.1:
-	 *	If the IP source address is the unspecified address, the IP
-      	 *	destination address MUST be a solicited-node multicast address.
-	 */
+		/* XXX: RFC2461 7.1.1:
+	 	 *	If the IP source address is the unspecified address, 
+		 *	there MUST NOT be source link-layer address option 
+		 *	in the message.
+		 */
+		if (addr_type == IPV6_ADDR_ANY) {
+			if (net_ratelimit())
+				printk(KERN_WARNING "ICMP6 NS: bad DAD packet (link-layer address option)\n");
+			return;
+		}
+	}
 
 	if ((ifp = ipv6_get_ifaddr(&msg->target, dev)) != NULL) {
-		int addr_type = ipv6_addr_type(saddr);
-
 		if (ifp->flags & IFA_F_TENTATIVE) {
 			/* Address is tentative. If the source
 			   is unspecified address, it is someone
@@ -686,8 +771,7 @@
 			ipv6_addr_all_nodes(&maddr);
 			ndisc_send_na(dev, NULL, &maddr, &ifp->addr, 
 				      ifp->idev->cnf.forwarding, 0, 
-				      ipv6_addr_type(&ifp->addr)&IPV6_ADDR_ANYCAST ? 0 : 1, 
-				      1);
+				      1, 1);
 			in6_ifa_put(ifp);
 			return;
 		}
@@ -710,8 +794,7 @@
 			if (neigh || !dev->hard_header) {
 				ndisc_send_na(dev, neigh, saddr, &ifp->addr, 
 					      ifp->idev->cnf.forwarding, 1, 
-					      ipv6_addr_type(&ifp->addr)&IPV6_ADDR_ANYCAST ? 0 : 1, 
-					      1);
+					      1, 1);
 				if (neigh)
 					neigh_release(neigh);
 			}
@@ -719,7 +802,6 @@
 		in6_ifa_put(ifp);
 	} else if (ipv6_chk_acast_addr(dev, &msg->target)) {
 		struct inet6_dev *idev = in6_dev_get(dev);
-		int addr_type = ipv6_addr_type(saddr);
 
 		/* anycast */
 
@@ -763,10 +845,10 @@
 		in6_dev_put(idev);
 	} else {
 		struct inet6_dev *in6_dev = in6_dev_get(dev);
-		int addr_type = ipv6_addr_type(saddr);
 
 		if (in6_dev && in6_dev->cnf.forwarding &&
-		    (addr_type & IPV6_ADDR_UNICAST) &&
+		    (addr_type & IPV6_ADDR_UNICAST ||
+		     addr_type == IPV6_ADDR_ANY) &&
 		    pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
 			int inc = ipv6_addr_type(daddr)&IPV6_ADDR_MULTICAST;
 
@@ -779,12 +861,20 @@
 				else
 					nd_tbl.stats.rcv_probes_ucast++;
 					
-				neigh = neigh_event_ns(&nd_tbl, lladdr, saddr, dev);
+				if (addr_type & IPV6_ADDR_UNICAST) {
+					neigh = neigh_event_ns(&nd_tbl, lladdr, saddr, dev);
 
-				if (neigh) {
-					ndisc_send_na(dev, neigh, saddr, &msg->target,
-						      0, 1, 0, 1);
-					neigh_release(neigh);
+					if (neigh) {
+						ndisc_send_na(dev, neigh, saddr, &msg->target,
+							      0, 1, 0, 1);
+						neigh_release(neigh);
+					}
+				} else {
+					/* proxy should also protect against DAD */
+					struct in6_addr maddr;
+					ipv6_addr_all_nodes(&maddr);
+					ndisc_send_na(dev, NULL, &maddr, &msg->target, 
+						      0, 0, 0, 1);
 				}
 			} else {
 				struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
@@ -800,7 +890,7 @@
 	return;
 }
 
-void ndisc_recv_na(struct sk_buff *skb)
+static void ndisc_recv_na(struct sk_buff *skb)
 {
 	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
 	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
@@ -870,12 +960,8 @@
 				 */
 				struct rt6_info *rt;
 				rt = rt6_get_dflt_router(saddr, dev);
-				if (rt) {
-					/* It is safe only because
-					   we aer in BH */
-					dst_release(&rt->u.dst);
-					ip6_del_rt(rt, NULL);
-				}
+				if (rt)
+					ip6_del_rt(rt, NULL, NULL);
 			}
 		} else {
 			if (msg->icmph.icmp6_router)
@@ -960,7 +1046,7 @@
 	rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
 
 	if (rt && lifetime == 0) {
-		ip6_del_rt(rt, NULL);
+		ip6_del_rt(rt, NULL, NULL);
 		rt = NULL;
 	}
 
@@ -1072,7 +1158,7 @@
 			in6_dev->cnf.mtu6 = mtu;
 
 			if (rt)
-				rt->u.dst.pmtu = mtu;
+				rt->u.dst.metrics[RTAX_MTU-1] = mtu;
 
 			rt6_mtu_change(skb->dev, mtu);
 		}
@@ -1195,27 +1281,44 @@
 	struct in6_addr *addrp;
 	struct net_device *dev;
 	struct rt6_info *rt;
+	struct dst_entry *dst;
+	struct flowi fl;
 	u8 *opt;
 	int rd_len;
 	int err;
 	int hlen;
 
 	dev = skb->dev;
-	rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev->ifindex, 1);
 
+	if (ipv6_get_lladdr(dev, &saddr_buf)) {
+ 		ND_PRINTK1("redirect: no link_local addr for dev\n");
+ 		return;
+ 	}
+
+	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr);
+
+	rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev->ifindex, 1);
 	if (rt == NULL)
 		return;
+	dst = &rt->u.dst;
+
+	err = xfrm_lookup(&dst, &fl, NULL, 0);
+	if (err) {
+		dst_release(dst);
+		return;
+	}
+
+	rt = (struct rt6_info *) dst;
 
 	if (rt->rt6i_flags & RTF_GATEWAY) {
 		ND_PRINTK1("ndisc_send_redirect: not a neighbour\n");
-		dst_release(&rt->u.dst);
+		dst_release(dst);
 		return;
 	}
-	if (!xrlim_allow(&rt->u.dst, 1*HZ)) {
-		dst_release(&rt->u.dst);
+	if (!xrlim_allow(dst, 1*HZ)) {
+		dst_release(dst);
 		return;
 	}
-	dst_release(&rt->u.dst);
 
 	if (dev->addr_len) {
 		if (neigh->nud_state&NUD_VALID) {
@@ -1225,6 +1328,7 @@
 			   We will make it later, when will be sure,
 			   that it is alive.
 			 */
+			dst_release(dst);
 			return;
 		}
 	}
@@ -1234,11 +1338,6 @@
 	rd_len &= ~0x7;
 	len += rd_len;
 
-	if (ipv6_get_lladdr(dev, &saddr_buf)) {
- 		ND_PRINTK1("redirect: no link_local addr for dev\n");
- 		return;
- 	}
-
 	buff = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15,
 				   1, &err);
 	if (buff == NULL) {
@@ -1248,15 +1347,11 @@
 
 	hlen = 0;
 
-	if (ndisc_build_ll_hdr(buff, dev, &skb->nh.ipv6h->saddr, NULL, len) == 0) {
-		kfree_skb(buff);
-		return;
-	}
-
+	skb_reserve(buff, (dev->hard_header_len + 15) & ~15);
 	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr,
 		   IPPROTO_ICMPV6, len);
 
-	icmph = (struct icmp6hdr *) skb_put(buff, len);
+	buff->h.raw = (unsigned char*) icmph = (struct icmp6hdr *) skb_put(buff, len);
 
 	memset(icmph, 0, sizeof(struct icmp6hdr));
 	icmph->icmp6_type = NDISC_REDIRECT;
@@ -1294,7 +1389,8 @@
 					     len, IPPROTO_ICMPV6,
 					     csum_partial((u8 *) icmph, len, 0));
 
-	dev_queue_xmit(buff);
+	buff->dst = dst;
+	dst_output(buff);
 
 	ICMP6_INC_STATS(Icmp6OutRedirects);
 	ICMP6_INC_STATS(Icmp6OutMsgs);
@@ -1414,6 +1510,9 @@
 
 void ndisc_cleanup(void)
 {
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_unregister(&nd_tbl.parms);
+#endif
 	neigh_table_clear(&nd_tbl);
 	sock_release(ndisc_socket);
 	ndisc_socket = NULL; /* For safety. */
Index: net/ipv6/protocol.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/protocol.c,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/net/ipv6/protocol.c	20 May 2001 00:56:43 -0000	1.1.1.15
+++ b/net/ipv6/protocol.c	16 Apr 2004 13:16:26 -0000	1.1.1.15.2.1
@@ -42,77 +42,42 @@
 
 struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
 
-void inet6_add_protocol(struct inet6_protocol *prot)
+int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
 {
-	unsigned char hash;
-	struct inet6_protocol *p2;
+	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
 
-	hash = prot->protocol & (MAX_INET_PROTOS - 1);
 	br_write_lock_bh(BR_NETPROTO_LOCK);
-	prot->next = inet6_protos[hash];
-	inet6_protos[hash] = prot;
-	prot->copy = 0;
-
-	/*
-	 *	Set the copy bit if we need to. 
-	 */
-	 
-	p2 = (struct inet6_protocol *) prot->next;
-	while(p2 != NULL) {
-		if (p2->protocol == prot->protocol) {
-			prot->copy = 1;
-			break;
-		}
-		p2 = (struct inet6_protocol *) p2->next;
+
+	if (inet6_protos[hash]) {
+		ret = -1;
+	} else {
+		inet6_protos[hash] = prot;
+		ret = 0;
 	}
+
 	br_write_unlock_bh(BR_NETPROTO_LOCK);
+
+	return ret;
 }
 
 /*
  *	Remove a protocol from the hash tables.
  */
  
-int inet6_del_protocol(struct inet6_protocol *prot)
+int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
 {
-	struct inet6_protocol *p;
-	struct inet6_protocol *lp = NULL;
-	unsigned char hash;
+	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
 
-	hash = prot->protocol & (MAX_INET_PROTOS - 1);
 	br_write_lock_bh(BR_NETPROTO_LOCK);
-	if (prot == inet6_protos[hash]) {
-		inet6_protos[hash] = (struct inet6_protocol *) inet6_protos[hash]->next;
-		br_write_unlock_bh(BR_NETPROTO_LOCK);
-		return(0);
-	}
-
-	p = (struct inet6_protocol *) inet6_protos[hash];
 
-        if (p != NULL && p->protocol == prot->protocol)
-                lp = p;
-
-	while(p != NULL) {
-		/*
-		 * We have to worry if the protocol being deleted is
-		 * the last one on the list, then we may need to reset
-		 * someone's copied bit.
-		 */
-		if (p->next != NULL && p->next == prot) {
-			/*
-			 * if we are the last one with this protocol and
-			 * there is a previous one, reset its copy bit.
-			 */
-			if (prot->copy == 0 && lp != NULL)
-				lp->copy = 0;
-			p->next = prot->next;
-			br_write_unlock_bh(BR_NETPROTO_LOCK);
-			return(0);
-		}
-		if (p->next != NULL && p->next->protocol == prot->protocol) 
-			lp = p->next;
-
-		p = (struct inet6_protocol *) p->next;
+	if (inet6_protos[hash] != prot) {
+		ret = -1;
+	} else {
+		inet6_protos[hash] = NULL;
+		ret = 0;
 	}
+
 	br_write_unlock_bh(BR_NETPROTO_LOCK);
-	return(-1);
+
+	return ret;
 }
Index: net/ipv6/raw.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/raw.c,v
retrieving revision 1.1.1.25
retrieving revision 1.1.1.25.2.1
diff -u -r1.1.1.25 -r1.1.1.25.2.1
--- a/net/ipv6/raw.c	28 Nov 2003 18:26:21 -0000	1.1.1.25
+++ b/net/ipv6/raw.c	16 Apr 2004 13:16:26 -0000	1.1.1.25.2.1
@@ -12,6 +12,7 @@
  *	Fixes:
  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
  *	YOSHIFUJI,H.@USAGI	:	raw checksum (RFC2292(bis) compliance) 
+ *	Kazunori MIYAZAWA @USAGI:	change process style to use ip6_append_data
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -29,6 +30,8 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/icmpv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
@@ -45,6 +48,7 @@
 #include <net/inet_common.h>
 
 #include <net/rawv6.h>
+#include <net/xfrm.h>
 
 struct sock *raw_v6_htable[RAWV6_HTABLE_SIZE];
 rwlock_t raw_v6_lock = RW_LOCK_UNLOCKED;
@@ -133,12 +137,14 @@
  *	demultiplex raw sockets.
  *	(should consider queueing the skb in the sock receive_queue
  *	without calling rawv6.c)
+ *
+ *	Caller owns SKB so we must make clones.
  */
-struct sock * ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
+void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 {
 	struct in6_addr *saddr;
 	struct in6_addr *daddr;
-	struct sock *sk, *sk2;
+	struct sock *sk;
 	__u8 hash;
 
 	saddr = &skb->nh.ipv6h->saddr;
@@ -159,30 +165,18 @@
 
 	sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr);
 
-	if (sk) {
-		sk2 = sk;
-
-		while ((sk2 = __raw_v6_lookup(sk2->next, nexthdr, daddr, saddr))) {
-			struct sk_buff *buff;
-
-			if (nexthdr == IPPROTO_ICMPV6 &&
-			    icmpv6_filter(sk2, skb))
-				continue;
-
-			buff = skb_clone(skb, GFP_ATOMIC);
-			if (buff)
-				rawv6_rcv(sk2, buff);
+	while (sk) {
+		if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) {
+			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
+
+			/* Not releasing hash table! */
+			if (clone)
+				rawv6_rcv(sk, clone);
 		}
+		sk = __raw_v6_lookup(sk->next, nexthdr, daddr, saddr);
 	}
-
-	if (sk && nexthdr == IPPROTO_ICMPV6 && icmpv6_filter(sk, skb))
-		sk = NULL;
-
 out:
-	if (sk)
-		sock_hold(sk);
 	read_unlock(&raw_v6_lock);
-	return sk;
 }
 
 /* This cleans up af_inet6 a bit. -DaveM */
@@ -309,6 +303,11 @@
  */
 int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 {
+        if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+                kfree_skb(skb);
+                return NET_RX_DROP;
+        }
+
 	if (!sk->tp_pinfo.tp_raw.checksum)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
@@ -434,86 +433,114 @@
 	goto out_free;
 }
 
-/*
- *	Sending...
- */
+static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct raw6_opt *opt, int len)
+{
+	struct sk_buff *skb;
+	int err = 0;
+	u16 *csum;
 
-struct rawv6_fakehdr {
-	struct iovec	*iov;
-	struct sock	*sk;
-	__u32		len;
-	__u32		cksum;
-	__u32		proto;
-	struct in6_addr *daddr;
-};
+	if ((skb = skb_peek(&sk->write_queue)) == NULL)
+		goto out;
 
-static int rawv6_getfrag(const void *data, struct in6_addr *saddr, 
-			  char *buff, unsigned int offset, unsigned int len)
-{
-	struct iovec *iov = (struct iovec *) data;
+	if (opt->offset + 1 < len)
+		csum = (u16 *)(skb->h.raw + opt->offset);
+	else {
+		err = -EINVAL;
+		goto out;
+	}
 
-	return memcpy_fromiovecend(buff, iov, offset, len);
+	if (skb_queue_len(&sk->write_queue) == 1) {
+		/*
+		 * Only one fragment on the socket.
+		 */
+		/* should be check HW csum miyazawa */
+		*csum = csum_ipv6_magic(&fl->fl6_src,
+					&fl->fl6_dst,
+					len, fl->proto, skb->csum);
+	} else {
+		u32 tmp_csum = 0;
+
+		skb_queue_walk(&sk->write_queue, skb) {
+			tmp_csum = csum_add(tmp_csum, skb->csum);
+		}
+
+		tmp_csum = csum_ipv6_magic(&fl->fl6_src,
+					   &fl->fl6_dst,
+					   len, fl->proto, tmp_csum);
+		*csum = tmp_csum;
+	}
+	if (*csum == 0)
+		*csum = -1;
+	ip6_push_pending_frames(sk);
+out:
+	return err;
 }
 
-static int rawv6_frag_cksum(const void *data, struct in6_addr *addr,
-			     char *buff, unsigned int offset, 
-			     unsigned int len)
+static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
+			struct flowi *fl, struct rt6_info *rt, 
+			unsigned int flags)
 {
-	struct rawv6_fakehdr *hdr = (struct rawv6_fakehdr *) data;
-	
-	if (csum_partial_copy_fromiovecend(buff, hdr->iov, offset, 
-						    len, &hdr->cksum))
-		return -EFAULT;
-	
-	if (offset == 0) {
-		struct sock *sk;
-		struct raw6_opt *opt;
-		struct in6_addr *daddr;
-		
-		sk = hdr->sk;
-		opt = &sk->tp_pinfo.tp_raw;
+	struct inet_opt *inet = inet_sk(sk);
+	struct ipv6hdr *iph;
+	struct sk_buff *skb;
+	unsigned int hh_len;
+	int err;
 
-		if (hdr->daddr)
-			daddr = hdr->daddr;
-		else
-			daddr = addr + 1;
-		
-		hdr->cksum = csum_ipv6_magic(addr, daddr, hdr->len,
-					     hdr->proto, hdr->cksum);
-		
-		if (opt->offset + 1 < len) {
-			__u16 *csum;
+	if (length > rt->u.dst.dev->mtu) {
+		ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
+		return -EMSGSIZE;
+	}
+	if (flags&MSG_PROBE)
+		goto out;
 
-			csum = (__u16 *) (buff + opt->offset);
-			if (*csum) {
-				/* in case cksum was not initialized */
-				__u32 sum = hdr->cksum;
-				sum += *csum;
-				*csum = hdr->cksum = (sum + (sum>>16));
-			} else {
-				*csum = hdr->cksum;
-			}
-		} else {
-			if (net_ratelimit())
-				printk(KERN_DEBUG "icmp: cksum offset too big\n");
-			return -EINVAL;
-		}
-	}	
-	return 0; 
-}
+	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+
+	skb = sock_alloc_send_skb(sk, length+hh_len+15,
+				  flags&MSG_DONTWAIT, &err);
+	if (skb == NULL)
+		goto error; 
+	skb_reserve(skb, hh_len);
+
+	skb->priority = sk->priority;
+	skb->dst = dst_clone(&rt->u.dst);
+
+	skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
+
+	skb->ip_summed = CHECKSUM_NONE;
 
+	skb->h.raw = skb->nh.raw;
+	err = memcpy_fromiovecend((void *)iph, from, 0, length);
+	if (err)
+		goto error_fault;
+
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+		      dst_output);
+	if (err > 0)
+		err = inet->recverr ? net_xmit_errno(err) : 0;
+	if (err)
+		goto error;
+out:
+	return 0;
 
+error_fault:
+	err = -EFAULT;
+	kfree_skb(skb);
+error:
+	IP6_INC_STATS(Ip6OutDiscards);
+	return err; 
+}
 static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
 {
 	struct ipv6_txoptions opt_space;
 	struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
+	struct in6_addr *daddr;
 	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+	struct raw6_opt *raw_opt = raw6_sk(sk);
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
+	struct dst_entry *dst = NULL;
 	struct flowi fl;
 	int addr_len = msg->msg_namelen;
-	struct in6_addr *daddr;
-	struct raw6_opt *raw_opt;
 	int hlimit = -1;
 	u16 proto;
 	int err;
@@ -531,9 +558,7 @@
 	/*
 	 *	Get and verify the address. 
 	 */
-
-	fl.fl6_flowlabel = 0;
-	fl.oif = 0;
+	memset(&fl, 0, sizeof(fl));
 
 	if (sin6) {
 		if (addr_len < SIN6_LEN_RFC2133) 
@@ -547,6 +572,8 @@
 
 		if (!proto)
 			proto = sk->num;
+		else if (proto != sk->num)
+			return(-EINVAL);
 
 		if (proto > 255)
 			return(-EINVAL);
@@ -585,16 +612,17 @@
 		 * unspecfied destination address 
 		 * treated as error... is this correct ?
 		 */
+		fl6_sock_release(flowlabel);
 		return(-EINVAL);
 	}
 
 	if (fl.oif == 0)
 		fl.oif = sk->bound_dev_if;
-	fl.fl6_src = NULL;
 
 	if (msg->msg_controllen) {
 		opt = &opt_space;
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
+		opt->tot_len = sizeof(struct ipv6_txoptions);
 
 		err = datagram_send_ctl(msg, &fl, opt, &hlimit);
 		if (err < 0) {
@@ -614,39 +642,71 @@
 	if (flowlabel)
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
 
-	raw_opt = &sk->tp_pinfo.tp_raw;
-
 	fl.proto = proto;
-	fl.fl6_dst = daddr;
-	if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr))
-		fl.fl6_src = &np->saddr;
-	fl.uli_u.icmpt.type = 0;
-	fl.uli_u.icmpt.code = 0;
-	
-	if (raw_opt->checksum) {
-		struct rawv6_fakehdr hdr;
-		
-		hdr.iov = msg->msg_iov;
-		hdr.sk  = sk;
-		hdr.len = len;
-		hdr.cksum = 0;
-		hdr.proto = proto;
+	ipv6_addr_copy(&fl.fl6_dst, daddr);
+	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
+		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+
+	/* merge ip6_build_xmit from ip6_output */
+	if (opt && opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+	}
+
+	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
+		fl.oif = np->mcast_oif;
+
+	err = ip6_dst_lookup(sk, &dst, &fl);
+	if (err)
+		goto out;
 
-		if (opt && opt->srcrt)
-			hdr.daddr = daddr;
+	if (hlimit < 0) {
+		if (ipv6_addr_is_multicast(&fl.fl6_dst))
+			hlimit = np->mcast_hops;
 		else
-			hdr.daddr = NULL;
+			hlimit = np->hop_limit;
+		if (hlimit < 0)
+			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+	}
+
+	if (msg->msg_flags&MSG_CONFIRM)
+		goto do_confirm;
 
-		err = ip6_build_xmit(sk, rawv6_frag_cksum, &hdr, &fl, len,
-				     opt, hlimit, msg->msg_flags);
+back_from_confirm:
+	if (sk->protinfo.af_inet.hdrincl) {
+		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags);
 	} else {
-		err = ip6_build_xmit(sk, rawv6_getfrag, msg->msg_iov, &fl, len,
-				     opt, hlimit, msg->msg_flags);
+		lock_sock(sk);
+		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
+					hlimit, opt, &fl, (struct rt6_info*)dst, msg->msg_flags);
+
+		if (err)
+			ip6_flush_pending_frames(sk);
+		else if (!(msg->msg_flags & MSG_MORE)) {
+			if (raw_opt->checksum) {
+				err = rawv6_push_pending_frames(sk, &fl, raw_opt, len);
+			} else {
+				err = ip6_push_pending_frames(sk);
+			}
+		}
 	}
+done:
+	ip6_dst_store(sk, dst,
+		      !ipv6_addr_cmp(&fl.fl6_dst, &np->daddr) ?
+		      &np->daddr : NULL);
+	if (err > 0)
+		err = np->recverr ? net_xmit_errno(err) : 0;
 
+	release_sock(sk);
+out:	
 	fl6_sock_release(flowlabel);
-
 	return err<0?err:len;
+do_confirm:
+	dst_confirm(dst);
+	if (!(msg->msg_flags & MSG_PROBE) || len)
+		goto back_from_confirm;
+	err = 0;
+	goto done;
 }
 
 static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, 
Index: net/ipv6/reassembly.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/reassembly.c,v
retrieving revision 1.1.1.21
retrieving revision 1.1.1.21.2.1
diff -u -r1.1.1.21 -r1.1.1.21.2.1
--- a/net/ipv6/reassembly.c	25 Aug 2003 11:44:44 -0000	1.1.1.21
+++ b/net/ipv6/reassembly.c	16 Apr 2004 13:16:26 -0000	1.1.1.21.2.1
@@ -23,6 +23,10 @@
  *      Horst von Brand Add missing #include <linux/string.h>
  *	Alexey Kuznetsov	SMP races, threading, cleanup.
  *	Patrick McHardy		LRU queue of frag heads for evictor.
+ *	Mitsuru KANDA @USAGI	Register inet6_protocol{}.
+ *	David Stevens and
+ *	YOSHIFUJI,H. @USAGI	Always remove fragment header to
+ *				calculate ICV correctly.
  */
 #include <linux/config.h>
 #include <linux/errno.h>
@@ -421,7 +425,7 @@
 	end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
 			((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
 
-	if ((unsigned int)end >= 65536) {
+	if ((unsigned int)end > IPV6_MAXPLEN) {
  		icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw);
  		return;
 	}
@@ -431,7 +435,7 @@
  				     csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
 
 	/* Is this the final fragment? */
-	if (!(fhdr->frag_off & htons(0x0001))) {
+	if (!(fhdr->frag_off & htons(IP6_MF))) {
 		/* If we already have some bits beyond end
 		 * or have different end, the segment is corrupted.
 		 */
@@ -579,12 +583,12 @@
  *	the last and the first frames arrived and all the bits are here.
  */
 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
+			  unsigned int *nhoffp,
 			  struct net_device *dev)
 {
 	struct sk_buff *fp, *head = fq->fragments;
-	int    remove_fraghdr = 0;
 	int    payload_len;
-	int    nhoff;
+	unsigned int nhoff;
 
 	fq_kill(fq);
 
@@ -592,15 +596,9 @@
 	BUG_TRAP(FRAG6_CB(head)->offset == 0);
 
 	/* Unfragmented part is taken from the first segment. */
-	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len;
-	nhoff = head->h.raw - head->nh.raw;
-
-	if (payload_len > 65535) {
-		payload_len -= 8;
-		if (payload_len > 65535)
-			goto out_oversize;
-		remove_fraghdr = 1;
-	}
+	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+	if (payload_len > IPV6_MAXPLEN)
+		goto out_oversize;
 
 	/* Head of list must not be cloned. */
 	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
@@ -629,18 +627,14 @@
 		atomic_add(clone->truesize, &ip6_frag_mem);
 	}
 
-	/* Normally we do not remove frag header from datagram, but
-	 * we have to do this and to relocate header, when payload
-	 * is > 65535-8. */
-	if (remove_fraghdr) {
-		nhoff = fq->nhoffset;
-		head->nh.raw[nhoff] = head->h.raw[0];
-		memmove(head->head+8, head->head, (head->data-head->head)-8);
-		head->mac.raw += 8;
-		head->nh.raw += 8;
-	} else {
-		((struct frag_hdr*)head->h.raw)->frag_off = 0;
-	}
+	/* We have to remove fragment header from datagram and to relocate
+	 * header in order to calculate ICV correctly. */
+	nhoff = fq->nhoffset;
+	head->nh.raw[nhoff] = head->h.raw[0];
+	memmove(head->head + sizeof(struct frag_hdr), head->head, 
+		(head->data - head->head) - sizeof(struct frag_hdr));
+	head->mac.raw += sizeof(struct frag_hdr);
+	head->nh.raw += sizeof(struct frag_hdr);
 
 	skb_shinfo(head)->frag_list = head->next;
 	head->h.raw = head->data;
@@ -671,7 +665,8 @@
 
 	IP6_INC_STATS_BH(Ip6ReasmOKs);
 	fq->fragments = NULL;
-	return nhoff;
+	*nhoffp = nhoff;
+	return 1;
 
 out_oversize:
 	if (net_ratelimit())
@@ -685,7 +680,7 @@
 	return -1;
 }
 
-int ipv6_reassembly(struct sk_buff **skbp, int nhoff)
+static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
 {
 	struct sk_buff *skb = *skbp; 
 	struct net_device *dev = skb->dev;
@@ -715,7 +710,8 @@
 		skb->h.raw += sizeof(struct frag_hdr);
 		IP6_INC_STATS_BH(Ip6ReasmOKs);
 
-		return (u8*)fhdr - skb->nh.raw;
+		*nhoffp = (u8*)fhdr - skb->nh.raw;
+		return 1;
 	}
 
 	if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
@@ -726,11 +722,11 @@
 
 		spin_lock(&fq->lock);
 
-		ip6_frag_queue(fq, skb, fhdr, nhoff);
+		ip6_frag_queue(fq, skb, fhdr, *nhoffp);
 
 		if (fq->last_in == (FIRST_IN|LAST_IN) &&
 		    fq->meat == fq->len)
-			ret = ip6_frag_reasm(fq, skbp, dev);
+			ret = ip6_frag_reasm(fq, skbp, nhoffp, dev);
 
 		spin_unlock(&fq->lock);
 		fq_put(fq);
@@ -742,8 +738,17 @@
 	return -1;
 }
 
+static struct inet6_protocol frag_protocol =
+{
+	.handler	=	ipv6_frag_rcv,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
 void __init ipv6_frag_init(void)
 {
+	if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
+		printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
+
 	ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
 				   (jiffies ^ (jiffies >> 6)));
 
Index: net/ipv6/route.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/route.c,v
retrieving revision 1.1.1.27
retrieving revision 1.1.1.27.2.1
diff -u -r1.1.1.27 -r1.1.1.27.2.1
--- a/net/ipv6/route.c	18 Feb 2004 13:36:32 -0000	1.1.1.27
+++ b/net/ipv6/route.c	16 Apr 2004 13:16:26 -0000	1.1.1.27.2.1
@@ -49,6 +49,8 @@
 #include <net/addrconf.h>
 #include <net/tcp.h>
 #include <linux/rtnetlink.h>
+#include <net/dst.h>
+#include <net/xfrm.h>
 
 #include <asm/uaccess.h>
 
@@ -56,8 +58,6 @@
 #include <linux/sysctl.h>
 #endif
 
-#undef CONFIG_RT6_POLICY
-
 /* Set to 3 to get tracing. */
 #define RT6_DEBUG 2
 
@@ -80,39 +80,43 @@
 
 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
-static struct dst_entry	*ip6_dst_reroute(struct dst_entry *dst,
-					 struct sk_buff *skb);
 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 static int		 ip6_dst_gc(void);
 
 static int		ip6_pkt_discard(struct sk_buff *skb);
 static void		ip6_link_failure(struct sk_buff *skb);
+static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 
 struct dst_ops ip6_dst_ops = {
-	AF_INET6,
-	__constant_htons(ETH_P_IPV6),
-	1024,
-
-        ip6_dst_gc,
-	ip6_dst_check,
-	ip6_dst_reroute,
-	NULL,
-	ip6_negative_advice,
-	ip6_link_failure,
-	sizeof(struct rt6_info),
+	.family			=	AF_INET6,
+	.protocol		=	__constant_htons(ETH_P_IPV6),
+	.gc			=	ip6_dst_gc,
+	.gc_thresh		=	1024,
+	.check			=	ip6_dst_check,
+	.negative_advice	=	ip6_negative_advice,
+	.link_failure		=	ip6_link_failure,
+	.update_pmtu		=	ip6_rt_update_pmtu,
+	.entry_size		=	sizeof(struct rt6_info),
 };
 
 struct rt6_info ip6_null_entry = {
-	{{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
-	  -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	  -ENETUNREACH, NULL, NULL,
-	  ip6_pkt_discard, ip6_pkt_discard,
-#ifdef CONFIG_NET_CLS_ROUTE
-	  0,
-#endif
-	  &ip6_dst_ops}},
-	NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
-	255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
+	.u = {
+		.dst = {
+			.__refcnt	= ATOMIC_INIT(1),
+			.__use		= 1,
+			.dev		= &loopback_dev,
+			.obsolete	= -1,
+			.error		= -ENETUNREACH,
+			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+			.input		= ip6_pkt_discard,
+			.output		= ip6_pkt_discard,
+			.ops		= &ip6_dst_ops,
+			.path		= (struct dst_entry*)&ip6_null_entry,
+		}
+	},
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_metric	= ~(u32) 0,
+	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
 struct fib6_node ip6_routing_table = {
@@ -121,29 +125,17 @@
 	0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
 };
 
-#ifdef CONFIG_RT6_POLICY
-int	ip6_rt_policy = 0;
-
-struct pol_chain *rt6_pol_list = NULL;
-
-
-static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
-static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
-
-static struct rt6_info	*rt6_flow_lookup(struct rt6_info *rt,
-					 struct in6_addr *daddr,
-					 struct in6_addr *saddr,
-					 struct fl_acc_args *args);
-
-#else
-#define ip6_rt_policy (0)
-#endif
-
 /* Protects all the ip6 fib */
 
 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
 
 
+/* allocate dst with ip6_dst_ops */
+static __inline__ struct rt6_info *ip6_dst_alloc(void)
+{
+	return dst_alloc(&ip6_dst_ops);
+}
+
 /*
  *	Route lookup. Any rt6_lock is implied.
  */
@@ -269,9 +261,12 @@
 		}
 	}
 
-	if (match)
+	if (match) {
+		if (rt6_dflt_pointer != match)
+			RT6_TRACE("changed default router: %p->%p\n",
+				  rt6_dflt_pointer, match);
 		rt6_dflt_pointer = match;
-
+	}
 	spin_unlock(&rt6_dflt_lock);
 
 	if (!match) {
@@ -325,12 +320,12 @@
    be destroyed.
  */
 
-static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh)
+static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
 {
 	int err;
 
 	write_lock_bh(&rt6_lock);
-	err = fib6_add(&ip6_routing_table, rt, nlh);
+	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
 	write_unlock_bh(&rt6_lock);
 
 	return err;
@@ -373,7 +368,7 @@
 
 		dst_hold(&rt->u.dst);
 
-		err = rt6_ins(rt, NULL);
+		err = rt6_ins(rt, NULL, NULL);
 		if (err == 0)
 			return rt;
 
@@ -385,38 +380,6 @@
 	return &ip6_null_entry;
 }
 
-#ifdef CONFIG_RT6_POLICY
-static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
-						      struct sk_buff *skb)
-{
-	struct in6_addr *daddr, *saddr;
-	struct fl_acc_args arg;
-
-	arg.type = FL_ARG_FORWARD;
-	arg.fl_u.skb = skb;
-
-	saddr = &skb->nh.ipv6h->saddr;
-	daddr = &skb->nh.ipv6h->daddr;
-
-	return rt6_flow_lookup(rt, daddr, saddr, &arg);
-}
-
-static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
-						       struct sock *sk,
-						       struct flowi *fl)
-{
-	struct fl_acc_args arg;
-
-	arg.type = FL_ARG_ORIGIN;
-	arg.fl_u.fl_o.sk = sk;
-	arg.fl_u.fl_o.flow = fl;
-
-	return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
-			       &arg);
-}
-
-#endif
-
 #define BACKTRACK() \
 if (rt == &ip6_null_entry && strict) { \
        while ((fn = fn->parent) != NULL) { \
@@ -449,53 +412,30 @@
 	rt = fn->leaf;
 
 	if ((rt->rt6i_flags & RTF_CACHE)) {
-		if (ip6_rt_policy == 0) {
-			rt = rt6_device_match(rt, skb->dev->ifindex, strict);
-			BACKTRACK();
-			dst_hold(&rt->u.dst);
-			goto out;
-		}
-
-#ifdef CONFIG_RT6_POLICY
-		if ((rt->rt6i_flags & RTF_FLOW)) {
-			struct rt6_info *sprt;
-
-			for (sprt = rt; sprt; sprt = sprt->u.next) {
-				if (rt6_flow_match_in(sprt, skb)) {
-					rt = sprt;
-					dst_hold(&rt->u.dst);
-					goto out;
-				}
-			}
-		}
-#endif
+		rt = rt6_device_match(rt, skb->dev->ifindex, strict);
+		BACKTRACK();
+		dst_hold(&rt->u.dst);
+		goto out;
 	}
 
 	rt = rt6_device_match(rt, skb->dev->ifindex, 0);
 	BACKTRACK();
 
-	if (ip6_rt_policy == 0) {
-		if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
-			read_unlock_bh(&rt6_lock);
+	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
+		read_unlock_bh(&rt6_lock);
 
-			rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
-				     &skb->nh.ipv6h->saddr);
+		rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
+			     &skb->nh.ipv6h->saddr);
 			
-			if (rt->u.dst.error != -EEXIST || --attempts <= 0)
-				goto out2;
-			/* Race condition! In the gap, when rt6_lock was
-			   released someone could insert this route.  Relookup.
-			 */
-			goto relookup;
-		}
-		dst_hold(&rt->u.dst);
-	} else {
-#ifdef CONFIG_RT6_POLICY
-		rt = rt6_flow_lookup_in(rt, skb);
-#else
-		/* NEVER REACHED */
-#endif
+		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
+			goto out2;
+		/* Race condition! In the gap, when rt6_lock was
+		   released someone could insert this route.  Relookup.
+		*/
+		dst_release(&rt->u.dst);
+		goto relookup;
 	}
+	dst_hold(&rt->u.dst);
 
 out:
 	read_unlock_bh(&rt6_lock);
@@ -512,38 +452,21 @@
 	int strict;
 	int attempts = 3;
 
-	strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
+	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
 
 relookup:
 	read_lock_bh(&rt6_lock);
 
-	fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
-			 fl->nl_u.ip6_u.saddr);
+	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
 	rt = fn->leaf;
 
 	if ((rt->rt6i_flags & RTF_CACHE)) {
-		if (ip6_rt_policy == 0) {
-			rt = rt6_device_match(rt, fl->oif, strict);
-			BACKTRACK();
-			dst_hold(&rt->u.dst);
-			goto out;
-		}
-
-#ifdef CONFIG_RT6_POLICY
-		if ((rt->rt6i_flags & RTF_FLOW)) {
-			struct rt6_info *sprt;
-
-			for (sprt = rt; sprt; sprt = sprt->u.next) {
-				if (rt6_flow_match_out(sprt, sk)) {
-					rt = sprt;
-					dst_hold(&rt->u.dst);
-					goto out;
-				}
-			}
-		}
-#endif
+		rt = rt6_device_match(rt, fl->oif, strict);
+		BACKTRACK();
+		dst_hold(&rt->u.dst);
+		goto out;
 	}
 	if (rt->rt6i_flags & RTF_DEFAULT) {
 		if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
@@ -553,29 +476,21 @@
 		BACKTRACK();
 	}
 
-	if (ip6_rt_policy == 0) {
-		if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
-			read_unlock_bh(&rt6_lock);
+	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
+		read_unlock_bh(&rt6_lock);
 
-			rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
-				     fl->nl_u.ip6_u.saddr);
-			
-			if (rt->u.dst.error != -EEXIST || --attempts <= 0)
-				goto out2;
+		rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
 
-			/* Race condition! In the gap, when rt6_lock was
-			   released someone could insert this route.  Relookup.
-			 */
-			goto relookup;
-		}
-		dst_hold(&rt->u.dst);
-	} else {
-#ifdef CONFIG_RT6_POLICY
-		rt = rt6_flow_lookup_out(rt, sk, fl);
-#else
-		/* NEVER REACHED */
-#endif
+		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
+			goto out2;
+
+		/* Race condition! In the gap, when rt6_lock was
+		   released someone could insert this route.  Relookup.
+		*/
+		dst_release(&rt->u.dst);
+		goto relookup;
 	}
+	dst_hold(&rt->u.dst);
 
 out:
 	read_unlock_bh(&rt6_lock);
@@ -603,23 +518,13 @@
 	return NULL;
 }
 
-static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
-{
-	/*
-	 *	FIXME
-	 */
-	RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb,
-	      __builtin_return_address(0)));
-	return NULL;
-}
-
 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *) dst;
 
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE)
-			ip6_del_rt(rt, NULL);
+			ip6_del_rt(rt, NULL, NULL);
 		else
 			dst_release(dst);
 	}
@@ -642,7 +547,76 @@
 	}
 }
 
-static int ip6_dst_gc()
+static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+	struct rt6_info *rt6 = (struct rt6_info*)dst;
+
+	if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
+		rt6->rt6i_flags |= RTF_MODIFIED;
+		dst->metrics[RTAX_MTU-1] = mtu;
+	}
+}
+
+/* Protected by rt6_lock.  */
+static struct dst_entry *ndisc_dst_gc_list;
+
+struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
+				  struct neighbour *neigh,
+				  int (*output)(struct sk_buff *))
+{
+	struct rt6_info *rt = ip6_dst_alloc();
+
+	if (unlikely(rt == NULL))
+		goto out;
+
+	if (dev)
+		dev_hold(dev);
+	if (neigh)
+		neigh_hold(neigh);
+
+	rt->rt6i_dev	  = dev;
+	rt->rt6i_nexthop  = neigh;
+	rt->rt6i_expires  = 0;
+	rt->rt6i_flags    = RTF_LOCAL | RTF_NDISC;
+	rt->rt6i_metric   = 0;
+	atomic_set(&rt->u.dst.__refcnt, 1);
+	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
+	rt->u.dst.output  = output;
+
+	write_lock_bh(&rt6_lock);
+	rt->u.dst.next = ndisc_dst_gc_list;
+	ndisc_dst_gc_list = &rt->u.dst;
+	write_unlock_bh(&rt6_lock);
+
+	fib6_force_start_gc();
+
+out:
+	return (struct dst_entry *)rt;
+}
+
+int ndisc_dst_gc(int *more)
+{
+	struct dst_entry *dst, *next, **pprev;
+	int freed;
+
+	next = NULL;
+	pprev = &ndisc_dst_gc_list;
+	freed = 0;
+	while ((dst = *pprev) != NULL) {
+		if (!atomic_read(&dst->__refcnt)) {
+			*pprev = dst->next;
+			dst_free(dst);
+			freed++;
+		} else {
+			pprev = &dst->next;
+			(*more)++;
+		}
+	}
+
+	return freed;
+}
+
+static int ip6_dst_gc(void)
 {
 	static unsigned expire = 30*HZ;
 	static unsigned long last_gc;
@@ -669,19 +643,6 @@
    Remove it only when all the things will work!
  */
 
-static void ipv6_addr_prefix(struct in6_addr *pfx,
-			     const struct in6_addr *addr, int plen)
-{
-	int b = plen&0x7;
-	int o = plen>>3;
-
-	memcpy(pfx->s6_addr, addr, o);
-	if (o < 16)
-		memset(pfx->s6_addr + o, 0, 16 - o);
-	if (b != 0)
-		pfx->s6_addr[o] = addr->s6_addr[o]&(0xff00 >> b);
-}
-
 static int ipv6_get_mtu(struct net_device *dev)
 {
 	int mtu = IPV6_MIN_MTU;
@@ -695,6 +656,24 @@
 	return mtu;
 }
 
+static inline unsigned int ipv6_advmss(unsigned int mtu)
+{
+	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+
+	if (mtu < ip6_rt_min_advmss)
+		mtu = ip6_rt_min_advmss;
+
+	/*
+	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
+	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
+	 * IPV6_MAXPLEN is also valid and means: "any MSS, 
+	 * rely only on pmtu discovery"
+	 */
+	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
+		mtu = IPV6_MAXPLEN;
+	return mtu;
+}
+
 static int ipv6_get_hoplimit(struct net_device *dev)
 {
 	int hoplimit = ipv6_devconf.hop_limit;
@@ -712,14 +691,17 @@
  *
  */
 
-int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh)
+int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
 {
 	int err;
 	struct rtmsg *r;
+	struct rtattr **rta;
 	struct rt6_info *rt;
 	struct net_device *dev = NULL;
 	int addr_type;
 
+	rta = (struct rtattr **) _rtattr;
+
 	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
 		return -EINVAL;
 #ifndef CONFIG_IPV6_SUBTREES
@@ -729,7 +711,7 @@
 	if (rtmsg->rtmsg_metric == 0)
 		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
 
-	rt = dst_alloc(&ip6_dst_ops);
+	rt = ip6_dst_alloc();
 
 	if (rt == NULL)
 		return -ENOMEM;
@@ -849,23 +831,42 @@
 		}
 	}
 
-	if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
-		rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
-	else
-		rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
-	rt->rt6i_flags = rtmsg->rtmsg_flags;
+	rt->rt6i_flags = rtmsg->rtmsg_flags & ~RTF_NDISC;
 
 install_route:
-	rt->u.dst.pmtu = ipv6_get_mtu(dev);
-	rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
-	/* Maximal non-jumbo IPv6 payload is 65535 and corresponding
-	   MSS is 65535 - tcp_header_size. 65535 is also valid and
-	   means: "any MSS, rely only on pmtu discovery"
-	 */
-	if (rt->u.dst.advmss > 65535-20)
-		rt->u.dst.advmss = 65535;
+	if (rta && rta[RTA_METRICS-1]) {
+		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
+		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > RTAX_MAX) {
+					err = -EINVAL;
+					goto out;
+				}
+				rt->u.dst.metrics[flavor-1] =
+					*(u32 *)RTA_DATA(attr);
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+
+	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
+		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
+			rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
+				IPV6_DEFAULT_MCASTHOPS;
+		else
+			rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
+				ipv6_get_hoplimit(dev);
+	}
+
+	if (!rt->u.dst.metrics[RTAX_MTU-1])
+		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
+	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
+		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
 	rt->u.dst.dev = dev;
-	return rt6_ins(rt, nlh);
+	return rt6_ins(rt, nlh, _rtattr);
 
 out:
 	if (dev)
@@ -874,7 +875,7 @@
 	return err;
 }
 
-int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh)
+int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
 {
 	int err;
 
@@ -886,13 +887,13 @@
 
 	dst_release(&rt->u.dst);
 
-	err = fib6_del(rt, nlh);
+	err = fib6_del(rt, nlh, _rtattr);
 	write_unlock_bh(&rt6_lock);
 
 	return err;
 }
 
-int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh)
+static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt;
@@ -919,7 +920,7 @@
 			dst_hold(&rt->u.dst);
 			read_unlock_bh(&rt6_lock);
 
-			return ip6_del_rt(rt, nlh);
+			return ip6_del_rt(rt, nlh, _rtattr);
 		}
 	}
 	read_unlock_bh(&rt6_lock);
@@ -1015,17 +1016,14 @@
 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
 	nrt->rt6i_nexthop = neigh_clone(neigh);
 	/* Reset pmtu, it may be better */
-	nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
-	nrt->u.dst.advmss = max_t(unsigned int, nrt->u.dst.pmtu - 60, ip6_rt_min_advmss);
-	if (rt->u.dst.advmss > 65535-20)
-		rt->u.dst.advmss = 65535;
-	nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);
+	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
+	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
 
-	if (rt6_ins(nrt, NULL))
+	if (rt6_ins(nrt, NULL, NULL))
 		goto out;
 
 	if (rt->rt6i_flags&RTF_CACHE) {
-		ip6_del_rt(rt, NULL);
+		ip6_del_rt(rt, NULL, NULL);
 		return;
 	}
 
@@ -1060,7 +1058,7 @@
 	if (rt == NULL)
 		return;
 
-	if (pmtu >= rt->u.dst.pmtu)
+	if (pmtu >= dst_pmtu(&rt->u.dst))
 		goto out;
 
 	/* New mtu received -> path was valid.
@@ -1075,7 +1073,7 @@
 	   would return automatically.
 	 */
 	if (rt->rt6i_flags & RTF_CACHE) {
-		rt->u.dst.pmtu = pmtu;
+		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
 		goto out;
@@ -1089,7 +1087,7 @@
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
 		nrt = rt6_cow(rt, daddr, saddr);
 		if (!nrt->u.dst.error) {
-			nrt->u.dst.pmtu = pmtu;
+			nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
 			/* According to RFC 1981, detecting PMTU increase shouldn't be
 			   happened within 5 mins, the recommended timer is 10 mins.
 			   Here this route expiration time is set to ip6_rt_mtu_expires 
@@ -1098,8 +1096,8 @@
 			 */
 			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
 			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
-			dst_release(&nrt->u.dst);
 		}
+		dst_release(&nrt->u.dst);
 	} else {
 		nrt = ip6_rt_copy(rt);
 		if (nrt == NULL)
@@ -1110,8 +1108,8 @@
 		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
-		nrt->u.dst.pmtu = pmtu;
-		rt6_ins(nrt, NULL);
+		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+		rt6_ins(nrt, NULL, NULL);
 	}
 
 out:
@@ -1124,20 +1122,19 @@
 
 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 {
-	struct rt6_info *rt;
+	struct rt6_info *rt = ip6_dst_alloc();
 
-	rt = dst_alloc(&ip6_dst_ops);
+	BUG_ON(ort->rt6i_flags & RTF_NDISC);
 
 	if (rt) {
 		rt->u.dst.input = ort->u.dst.input;
 		rt->u.dst.output = ort->u.dst.output;
 
-		memcpy(&rt->u.dst.mxlock, &ort->u.dst.mxlock, RTAX_MAX*sizeof(unsigned));
+		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
 		rt->u.dst.dev = ort->u.dst.dev;
 		if (rt->u.dst.dev)
 			dev_hold(rt->u.dst.dev);
 		rt->u.dst.lastuse = jiffies;
-		rt->rt6i_hoplimit = ort->rt6i_hoplimit;
 		rt->rt6i_expires = 0;
 
 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
@@ -1184,7 +1181,7 @@
 
 	rtmsg.rtmsg_ifindex = dev->ifindex;
 
-	ip6_route_add(&rtmsg, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL);
 	return rt6_get_dflt_router(gwaddr, dev);
 }
 
@@ -1210,7 +1207,7 @@
 
 			read_unlock_bh(&rt6_lock);
 
-			ip6_del_rt(rt, NULL);
+			ip6_del_rt(rt, NULL, NULL);
 
 			goto restart;
 		}
@@ -1236,10 +1233,10 @@
 		rtnl_lock();
 		switch (cmd) {
 		case SIOCADDRT:
-			err = ip6_route_add(&rtmsg, NULL);
+			err = ip6_route_add(&rtmsg, NULL, NULL);
 			break;
 		case SIOCDELRT:
-			err = ip6_route_del(&rtmsg, NULL);
+			err = ip6_route_del(&rtmsg, NULL, NULL);
 			break;
 		default:
 			err = -EINVAL;
@@ -1268,11 +1265,10 @@
  *	Add address
  */
 
-int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev)
+int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
 {
-	struct rt6_info *rt;
+	struct rt6_info *rt = ip6_dst_alloc();
 
-	rt = dst_alloc(&ip6_dst_ops);
 	if (rt == NULL)
 		return -ENOMEM;
 
@@ -1280,14 +1276,14 @@
 	rt->u.dst.input = ip6_input;
 	rt->u.dst.output = ip6_output;
 	rt->rt6i_dev = dev_get_by_name("lo");
-	rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
-	rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
-	if (rt->u.dst.advmss > 65535-20)
-		rt->u.dst.advmss = 65535;
-	rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
+	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
+	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
 	rt->u.dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
+	if (!anycast)
+		rt->rt6i_flags |= RTF_LOCAL;
 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 	if (rt->rt6i_nexthop == NULL) {
 		dst_free((struct dst_entry *) rt);
@@ -1296,7 +1292,7 @@
 
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
 	rt->rt6i_dst.plen = 128;
-	rt6_ins(rt, NULL);
+	rt6_ins(rt, NULL, NULL);
 
 	return 0;
 }
@@ -1313,129 +1309,13 @@
 	rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
 	if (rt) {
 		if (rt->rt6i_dst.plen == 128)
-			err = ip6_del_rt(rt, NULL);
+			err = ip6_del_rt(rt, NULL, NULL);
 		else
 			dst_release(&rt->u.dst);
 	}
 
 	return err;
 }
-
-#ifdef CONFIG_RT6_POLICY
-
-static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
-{
-	struct flow_filter *frule;
-	struct pkt_filter *filter;
-	int res = 1;
-
-	if ((frule = rt->rt6i_filter) == NULL)
-		goto out;
-
-	if (frule->type != FLR_INPUT) {
-		res = 0;
-		goto out;
-	}
-
-	for (filter = frule->u.filter; filter; filter = filter->next) {
-		__u32 *word;
-
-		word = (__u32 *) skb->h.raw;
-		word += filter->offset;
-
-		if ((*word ^ filter->value) & filter->mask) {
-			res = 0;
-			break;
-		}
-	}
-
-out:
-	return res;
-}
-
-static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
-{
-	struct flow_filter *frule;
-	int res = 1;
-
-	if ((frule = rt->rt6i_filter) == NULL)
-		goto out;
-
-	if (frule->type != FLR_INPUT) {
-		res = 0;
-		goto out;
-	}
-
-	if (frule->u.sk != sk)
-		res = 0;
-out:
-	return res;
-}
-
-static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
-					struct in6_addr *daddr,
-					struct in6_addr *saddr,
-					struct fl_acc_args *args)
-{
-	struct flow_rule *frule;
-	struct rt6_info *nrt = NULL;
-	struct pol_chain *pol;
-
-	for (pol = rt6_pol_list; pol; pol = pol->next) {
-		struct fib6_node *fn;
-		struct rt6_info *sprt;
-
-		fn = fib6_lookup(pol->rules, daddr, saddr);
-
-		do {
-			for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
-				int res;
-
-				frule = sprt->rt6i_flowr;
-#if RT6_DEBUG >= 2
-				if (frule == NULL) {
-					printk(KERN_DEBUG "NULL flowr\n");
-					goto error;
-				}
-#endif
-				res = frule->ops->accept(rt, sprt, args, &nrt);
-
-				switch (res) {
-				case FLOWR_SELECT:
-					goto found;
-				case FLOWR_CLEAR:
-					goto next_policy;
-				case FLOWR_NODECISION:
-					break;
-				default:
-					goto error;
-				};
-			}
-
-			fn = fn->parent;
-
-		} while ((fn->fn_flags & RTN_TL_ROOT) == 0);
-
-	next_policy:
-	}
-
-error:
-	dst_hold(&ip6_null_entry.u.dst);
-	return &ip6_null_entry;
-
-found:
-	if (nrt == NULL)
-		goto error;
-
-	nrt->rt6i_flags |= RTF_CACHE;
-	dst_hold(&nrt->u.dst);
-	err = rt6_ins(nrt, NULL);
-	if (err)
-		nrt->u.dst.error = err;
-	return nrt;
-}
-#endif
-
 static int fib6_ifdown(struct rt6_info *rt, void *arg)
 {
 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
@@ -1487,14 +1367,12 @@
 	   PMTU discouvery. 
 	 */
 	if (rt->rt6i_dev == arg->dev &&
-	    !(rt->u.dst.mxlock&(1<<RTAX_MTU)) &&
-	      (rt->u.dst.pmtu > arg->mtu ||
-	       (rt->u.dst.pmtu < arg->mtu &&
-		rt->u.dst.pmtu == idev->cnf.mtu6)))
-		rt->u.dst.pmtu = arg->mtu;
-	rt->u.dst.advmss = max_t(unsigned int, arg->mtu - 60, ip6_rt_min_advmss);
-	if (rt->u.dst.advmss > 65535-20)
-		rt->u.dst.advmss = 65535;
+	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
+            (dst_pmtu(&rt->u.dst) > arg->mtu ||
+             (dst_pmtu(&rt->u.dst) < arg->mtu &&
+	      dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
+		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
+	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
 	return 0;
 }
 
@@ -1556,7 +1434,7 @@
 
 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
 		return -EINVAL;
-	return ip6_route_del(&rtmsg, nlh);
+	return ip6_route_del(&rtmsg, nlh, arg);
 }
 
 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -1566,7 +1444,7 @@
 
 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
 		return -EINVAL;
-	return ip6_route_add(&rtmsg, nlh);
+	return ip6_route_add(&rtmsg, nlh, arg);
 }
 
 struct rt6_rtnl_dump_arg
@@ -1642,7 +1520,7 @@
 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
 			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
-	if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
+	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
 		goto rtattr_failure;
 	if (rt->u.dst.neighbour)
 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
@@ -1798,15 +1676,13 @@
 	skb->mac.raw = skb->data;
 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
-	fl.proto = 0;
-	fl.nl_u.ip6_u.daddr = NULL;
-	fl.nl_u.ip6_u.saddr = NULL;
-	fl.uli_u.icmpt.type = 0;
-	fl.uli_u.icmpt.code = 0;
+	memset(&fl, 0, sizeof(fl));
 	if (rta[RTA_SRC-1])
-		fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]);
+		ipv6_addr_copy(&fl.fl6_src,
+			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
 	if (rta[RTA_DST-1])
-		fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]);
+		ipv6_addr_copy(&fl.fl6_dst,
+			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
 
 	if (rta[RTA_IIF-1])
 		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
@@ -1830,8 +1706,7 @@
 
 	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
 	err = rt6_fill_node(skb, rt, 
-			    fl.nl_u.ip6_u.daddr,
-			    fl.nl_u.ip6_u.saddr,
+			    &fl.fl6_dst, &fl.fl6_src,
 			    iif,
 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
 			    nlh->nlmsg_seq, nlh, 0);
@@ -2043,7 +1918,6 @@
 
 #endif
 
-
 void __init ip6_route_init(void)
 {
 	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
@@ -2055,6 +1929,9 @@
 	proc_net_create("ipv6_route", 0, rt6_proc_info);
 	proc_net_create("rt6_stats", 0, rt6_proc_stats);
 #endif
+#ifdef CONFIG_XFRM
+	xfrm6_init();
+#endif
 }
 
 #ifdef MODULE
@@ -2064,8 +1941,11 @@
 	proc_net_remove("ipv6_route");
 	proc_net_remove("rt6_stats");
 #endif
-
+#ifdef CONFIG_XFRM
+	xfrm6_fini();
+#endif
 	rt6_ifdown(NULL);
 	fib6_gc_cleanup();
+	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
 }
 #endif	/* MODULE */
Index: net/ipv6/sit.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/sit.c,v
retrieving revision 1.1.1.24
retrieving revision 1.1.1.24.2.1
diff -u -r1.1.1.24 -r1.1.1.24.2.1
--- a/net/ipv6/sit.c	28 Nov 2003 18:26:21 -0000	1.1.1.24
+++ b/net/ipv6/sit.c	16 Apr 2004 13:16:26 -0000	1.1.1.24.2.1
@@ -49,6 +49,7 @@
 #include <net/icmp.h>
 #include <net/ipip.h>
 #include <net/inet_ecn.h>
+#include <net/xfrm.h>
 
 /*
    This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
@@ -392,6 +393,7 @@
 
 	read_lock(&ipip6_lock);
 	if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+		secpath_reset(skb);
 		skb->mac.raw = skb->nh.raw;
 		skb->nh.raw = skb->data;
 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
@@ -422,13 +424,6 @@
 	return 0;
 }
 
-/* Need this wrapper because NF_HOOK takes the function address */
-static inline int do_ip_send(struct sk_buff *skb)
-{
-	return ip_send(skb);
-}
-
-
 /* Returns the embedded IPv4 address if the IPv6 address
    comes from 6to4 (draft-ietf-ngtrans-6to4-04) addr space */
 
@@ -501,9 +496,17 @@
 		dst = addr6->s6_addr32[3];
 	}
 
-	if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
-		tunnel->stat.tx_carrier_errors++;
-		goto tx_error_icmp;
+	{
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = dst,
+						.saddr = tiph->saddr,
+						.tos = RT_TOS(tos) } },
+				    .oif = tunnel->parms.link,
+				    .proto = IPPROTO_IPV6 };
+		if (ip_route_output_key(&rt, &fl)) {
+			tunnel->stat.tx_carrier_errors++;
+			goto tx_error_icmp;
+		}
 	}
 	if (rt->rt_type != RTN_UNICAST) {
 		tunnel->stat.tx_carrier_errors++;
@@ -518,9 +521,9 @@
 	}
 
 	if (tiph->frag_off)
-		mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+		mtu = dst_pmtu(&rt->u.dst) - sizeof(struct iphdr);
 	else
-		mtu = skb->dst ? skb->dst->pmtu : dev->mtu;
+		mtu = skb->dst ? dst_pmtu(skb->dst) : dev->mtu;
 
 	if (mtu < 68) {
 		tunnel->stat.collisions++;
@@ -529,15 +532,9 @@
 	}
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
-	if (skb->dst && mtu < skb->dst->pmtu) {
-		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
-		if (mtu < rt6->u.dst.pmtu) {
-			if (tunnel->parms.iph.daddr || rt6->rt6i_dst.plen == 128) {
-				rt6->rt6i_flags |= RTF_MODIFIED;
-				rt6->u.dst.pmtu = mtu;
-			}
-		}
-	}
+	if (tunnel->parms.iph.daddr && skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
+
 	if (skb->len > mtu) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
 		ip_rt_put(rt);
@@ -555,7 +552,7 @@
 	/*
 	 * Okay, now see if we can stuff it in the buffer as-is.
 	 */
-	max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
+	max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
 
 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
@@ -776,8 +773,14 @@
 	ipip6_tunnel_init_gen(dev);
 
 	if (iph->daddr) {
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = iph->daddr,
+						.saddr = iph->saddr,
+						.tos = RT_TOS(iph->tos) } },
+				    .oif = tunnel->parms.link,
+				    .proto = IPPROTO_IPV6 };
 		struct rtable *rt;
-		if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
+		if (!ip_route_output_key(&rt, &fl)) {
 			tdev = rt->u.dst.dev;
 			ip_rt_put(rt);
 		}
@@ -834,19 +837,14 @@
 }
 
 static struct inet_protocol sit_protocol = {
-	ipip6_rcv,
-	ipip6_err,
-	0,
-	IPPROTO_IPV6,
-	0,
-	NULL,
-	"IPv6"
+	.handler	=	ipip6_rcv,
+	.err_handler	=	ipip6_err,
 };
 
 #ifdef MODULE
 void sit_cleanup(void)
 {
-	inet_del_protocol(&sit_protocol);
+	inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
 	unregister_netdev(&ipip6_fb_tunnel_dev);
 }
 #endif
@@ -855,9 +853,13 @@
 {
 	printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
 
+	if (inet_add_protocol(&sit_protocol, IPPROTO_IPV6) < 0) {
+		printk(KERN_INFO "sit init: Can't add protocol\n");
+		return -EAGAIN;
+	}
+
 	ipip6_fb_tunnel_dev.priv = (void*)&ipip6_fb_tunnel;
 	strcpy(ipip6_fb_tunnel_dev.name, ipip6_fb_tunnel.parms.name);
 	register_netdev(&ipip6_fb_tunnel_dev);
-	inet_add_protocol(&sit_protocol);
 	return 0;
 }
Index: net/ipv6/tcp_ipv6.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/tcp_ipv6.c,v
retrieving revision 1.1.1.30
retrieving revision 1.1.1.30.2.1
diff -u -r1.1.1.30 -r1.1.1.30.2.1
--- a/net/ipv6/tcp_ipv6.c	14 Apr 2004 13:05:41 -0000	1.1.1.30
+++ b/net/ipv6/tcp_ipv6.c	16 Apr 2004 13:16:26 -0000	1.1.1.30.2.1
@@ -38,6 +38,7 @@
 #include <linux/init.h>
 #include <linux/jhash.h>
 #include <linux/ipsec.h>
+#include <net/xfrm.h>
 
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
@@ -553,7 +554,6 @@
 	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 	struct in6_addr *saddr = NULL;
-	struct in6_addr saddr_buf;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int addr_type;
@@ -565,7 +565,8 @@
 	if (usin->sin6_family != AF_INET6) 
 		return(-EAFNOSUPPORT);
 
-	fl.fl6_flowlabel = 0;
+	memset(&fl, 0, sizeof(fl));
+
 	if (np->sndflow) {
 		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 		IP6_ECN_flow_init(fl.fl6_flowlabel);
@@ -659,43 +660,45 @@
 		saddr = &np->rcv_saddr;
 
 	fl.proto = IPPROTO_TCP;
-	fl.fl6_dst = &np->daddr;
-	fl.fl6_src = saddr;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+	ipv6_addr_copy(&fl.fl6_src,
+		       (saddr ? saddr : &np->saddr));
 	fl.oif = sk->bound_dev_if;
-	fl.uli_u.ports.dport = usin->sin6_port;
-	fl.uli_u.ports.sport = sk->sport;
+	fl.fl_ip_dport = usin->sin6_port;
+	fl.fl_ip_sport = sk->sport;
 
 	if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-		fl.nl_u.ip6_u.daddr = rt0->addr;
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 	}
 
-	dst = ip6_route_output(sk, &fl);
+	err = ip6_dst_lookup(sk, &dst, &fl);
 
-	if ((err = dst->error) != 0) {
-		dst_release(dst);
+	if (err)
 		goto failure;
-	}
-
-	ip6_dst_store(sk, dst, NULL);
-	sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
 
 	if (saddr == NULL) {
-		err = ipv6_get_saddr(dst, &np->daddr, &saddr_buf);
-		if (err)
-			goto failure;
-
-		saddr = &saddr_buf;
+		saddr = &fl.fl6_src;
+		ipv6_addr_copy(&np->rcv_saddr, saddr);
 	}
 
 	/* set the source address */
-	ipv6_addr_copy(&np->rcv_saddr, saddr);
 	ipv6_addr_copy(&np->saddr, saddr);
 	sk->rcv_saddr= LOOPBACK4_IPV6;
 
+	ip6_dst_store(sk, dst, NULL);
+	sk->route_caps = dst->dev->features &
+		~(NETIF_F_IP_CSUM
+#ifdef NETIF_F_TSO
+		  | NETIF_F_TSO
+#endif
+			);
+
 	tp->ext_header_len = 0;
 	if (np->opt)
 		tp->ext_header_len = np->opt->opt_flen+np->opt->opt_nflen;
+	tp->ext2_header_len = dst->header_len;
+
 	tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
 	sk->dport = usin->sin6_port;
@@ -717,8 +720,8 @@
 
 late_failure:
 	tcp_set_state(sk, TCP_CLOSE); 
-failure:
 	__sk_dst_reset(sk);
+failure:
 	sk->dport = 0;
 	sk->route_caps = 0;
 	return err;
@@ -781,21 +784,23 @@
 			   to handle rthdr case. Ignore this complexity
 			   for now.
 			 */
+			memset(&fl, 0, sizeof(fl));
 			fl.proto = IPPROTO_TCP;
-			fl.nl_u.ip6_u.daddr = &np->daddr;
-			fl.nl_u.ip6_u.saddr = &np->saddr;
+			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 			fl.oif = sk->bound_dev_if;
-			fl.uli_u.ports.dport = sk->dport;
-			fl.uli_u.ports.sport = sk->sport;
+			fl.fl_ip_dport = sk->dport;
+			fl.fl_ip_sport = sk->sport;
 
-			dst = ip6_route_output(sk, &fl);
+			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
+				sk->err_soft = -err;
+				goto out;
+			}
 		} else
 			dst_hold(dst);
 
-		if (dst->error) {
-			sk->err_soft = -dst->error;
-		} else if (tp->pmtu_cookie > dst->pmtu) {
-			tcp_sync_mss(sk, dst->pmtu);
+		if (tp->pmtu_cookie > dst_pmtu(dst)) {
+			tcp_sync_mss(sk, dst_pmtu(dst));
 			tcp_simple_retransmit(sk);
 		} /* else let the usual retransmit timer handle it */
 		dst_release(dst);
@@ -865,13 +870,14 @@
 	struct flowi fl;
 	int err = -1;
 
+	memset(&fl, 0, sizeof(fl));
 	fl.proto = IPPROTO_TCP;
-	fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
-	fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr;
+	ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
+	ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
 	fl.fl6_flowlabel = 0;
 	fl.oif = req->af.v6_req.iif;
-	fl.uli_u.ports.dport = req->rmt_port;
-	fl.uli_u.ports.sport = sk->sport;
+	fl.fl_ip_dport = req->rmt_port;
+	fl.fl_ip_sport = sk->sport;
 
 	if (dst == NULL) {
 		opt = sk->net_pinfo.af_inet6.opt;
@@ -886,11 +892,11 @@
 
 		if (opt && opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-			fl.nl_u.ip6_u.daddr = rt0->addr;
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 		}
 
-		dst = ip6_route_output(sk, &fl);
-		if (dst->error)
+		err = ip6_dst_lookup(sk, &dst, &fl);
+		if (err)
 			goto done;
 	}
 
@@ -902,7 +908,7 @@
 					 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
 					 csum_partial((char *)th, skb->len, skb->csum));
 
-		fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
+		ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
 		err = ip6_xmit(sk, skb, &fl, opt);
 		if (err == NET_XMIT_CN)
 			err = 0;
@@ -970,7 +976,7 @@
 	if (th->rst)
 		return;
 
-	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
+	if (!ipv6_unicast_destination(skb))
 		return; 
 
 	/*
@@ -1003,24 +1009,21 @@
 
 	buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
 
-	fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr;
-	fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr;
-	fl.fl6_flowlabel = 0;
+	memset(&fl, 0, sizeof(fl));
+	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
 
-	t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr,
-				    fl.nl_u.ip6_u.daddr, 
+	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
 				    sizeof(*t1), IPPROTO_TCP,
 				    buff->csum);
 
 	fl.proto = IPPROTO_TCP;
 	fl.oif = tcp_v6_iif(skb);
-	fl.uli_u.ports.dport = t1->dest;
-	fl.uli_u.ports.sport = t1->source;
+	fl.fl_ip_dport = t1->dest;
+	fl.fl_ip_sport = t1->source;
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
-	buff->dst = ip6_route_output(NULL, &fl);
-
-	if (buff->dst->error == 0) {
+	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
 		ip6_xmit(NULL, buff, &fl, NULL);
 		TCP_INC_STATS_BH(TcpOutSegs);
 		TCP_INC_STATS_BH(TcpOutRsts);
@@ -1070,23 +1073,20 @@
 
 	buff->csum = csum_partial((char *)t1, tot_len, 0);
 
-	fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr;
-	fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr;
-	fl.fl6_flowlabel = 0;
+	memset(&fl, 0, sizeof(fl));
+	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
 
-	t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr,
-				    fl.nl_u.ip6_u.daddr, 
+	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
 				    tot_len, IPPROTO_TCP,
 				    buff->csum);
 
 	fl.proto = IPPROTO_TCP;
 	fl.oif = tcp_v6_iif(skb);
-	fl.uli_u.ports.dport = t1->dest;
-	fl.uli_u.ports.sport = t1->source;
+	fl.fl_ip_dport = t1->dest;
+	fl.fl_ip_sport = t1->source;
 
-	buff->dst = ip6_route_output(NULL, &fl);
-
-	if (buff->dst->error == 0) {
+	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
 		ip6_xmit(NULL, buff, &fl, NULL);
 		TCP_INC_STATS_BH(TcpOutSegs);
 		return;
@@ -1177,8 +1177,7 @@
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_conn_request(sk, skb);
 
-	/* FIXME: do the same check for anycast */
-	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
+	if (!ipv6_unicast_destination(skb))
 		goto drop; 
 
 	/*
@@ -1248,7 +1247,6 @@
 					  struct dst_entry *dst)
 {
 	struct ipv6_pinfo *np;
-	struct flowi fl;
 	struct tcp_opt *newtp;
 	struct sock *newsk;
 	struct ipv6_txoptions *opt;
@@ -1310,23 +1308,23 @@
 	}
 
 	if (dst == NULL) {
+		struct flowi fl;
+
+		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_TCP;
-		fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
+		ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
 		if (opt && opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-			fl.nl_u.ip6_u.daddr = rt0->addr;
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 		}
-		fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr;
-		fl.fl6_flowlabel = 0;
+		ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
 		fl.oif = sk->bound_dev_if;
-		fl.uli_u.ports.dport = req->rmt_port;
-		fl.uli_u.ports.sport = sk->sport;
-
-		dst = ip6_route_output(sk, &fl);
-	}
+		fl.fl_ip_dport = req->rmt_port;
+		fl.fl_ip_sport = sk->sport;
 
-	if (dst->error)
-		goto out;
+		if (ip6_dst_lookup(sk, &dst, &fl))
+			goto out;
+	} 
 
 	newsk = tcp_create_openreq_child(sk, req, skb);
 	if (newsk == NULL)
@@ -1339,7 +1337,12 @@
 	MOD_INC_USE_COUNT;
 
 	ip6_dst_store(newsk, dst, NULL);
-	sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
+	newsk->route_caps = dst->dev->features&
+		~(NETIF_F_IP_CSUM
+#ifdef NETIF_F_TSO
+		  | NETIF_F_TSO
+#endif
+		  );
 
 	newtp = &(newsk->tp_pinfo.af_tcp);
 
@@ -1387,8 +1390,10 @@
 	if (np->opt)
 		newtp->ext_header_len = np->opt->opt_nflen + np->opt->opt_flen;
 
-	tcp_sync_mss(newsk, dst->pmtu);
-	newtp->advmss = dst->advmss;
+	newtp->ext2_header_len = dst->header_len;
+
+	tcp_sync_mss(newsk, dst_pmtu(dst));
+	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
 	tcp_initialize_rcv_mss(newsk);
 
 	newsk->daddr	= LOOPBACK4_IPV6;
@@ -1557,8 +1562,9 @@
 	return 0;
 }
 
-int tcp_v6_rcv(struct sk_buff *skb)
+static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 {
+	struct sk_buff *skb = *pskb;
 	struct tcphdr *th;	
 	struct sock *sk;
 	int ret;
@@ -1601,11 +1607,12 @@
 		goto no_tcp_socket;
 
 process:
-	if(!ipsec_sk_policy(sk,skb))
-		goto discard_and_relse;
 	if(sk->state == TCP_TIME_WAIT)
 		goto do_time_wait;
 
+	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto discard_and_relse;
+
 	if (sk_filter(sk, skb, 0))
 		goto discard_and_relse;
 		
@@ -1621,9 +1628,12 @@
 	bh_unlock_sock(sk);
 
 	sock_put(sk);
-	return ret;
+	return ret ? -1 : 0;
 
 no_tcp_socket:
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+		goto discard_and_relse;
+
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
 bad_packet:
 		TCP_INC_STATS_BH(TcpInErrs);
@@ -1645,6 +1655,10 @@
 	goto discard_it;
 
 do_time_wait:
+ 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+		sock_put(sk);
+ 		goto discard_it;
+	} 
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
 		TCP_INC_STATS_BH(TcpInErrs);
 		tcp_tw_put((struct tcp_tw_bucket *) sk);	
@@ -1688,30 +1702,35 @@
 	if (dst == NULL) {
 		struct flowi fl;
 
+		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_TCP;
-		fl.nl_u.ip6_u.daddr = &np->daddr;
-		fl.nl_u.ip6_u.saddr = &np->saddr;
+		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 		fl.fl6_flowlabel = np->flow_label;
 		fl.oif = sk->bound_dev_if;
-		fl.uli_u.ports.dport = sk->dport;
-		fl.uli_u.ports.sport = sk->sport;
+		fl.fl_ip_dport = sk->dport;
+		fl.fl_ip_sport = sk->sport;
 
 		if (np->opt && np->opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			fl.nl_u.ip6_u.daddr = rt0->addr;
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 		}
 
-		dst = ip6_route_output(sk, &fl);
+		err = ip6_dst_lookup(sk, &dst, &fl);
 
-		if (dst->error) {
-			err = dst->error;
-			dst_release(dst);
+		if (err) {
 			sk->route_caps = 0;
 			return err;
 		}
 
 		ip6_dst_store(sk, dst, NULL);
-		sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
+		sk->route_caps = dst->dev->features&
+			~(NETIF_F_IP_CSUM
+#ifdef NETIF_F_TSO
+			  | NETIF_F_TSO
+#endif
+				);
+		tcp_sk(sk)->ext2_header_len = dst->header_len;
 	}
 
 	return 0;
@@ -1724,38 +1743,45 @@
 	struct flowi fl;
 	struct dst_entry *dst;
 
+	memset(&fl, 0, sizeof(fl));
 	fl.proto = IPPROTO_TCP;
-	fl.fl6_dst = &np->daddr;
-	fl.fl6_src = &np->saddr;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 	fl.fl6_flowlabel = np->flow_label;
 	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
 	fl.oif = sk->bound_dev_if;
-	fl.uli_u.ports.sport = sk->sport;
-	fl.uli_u.ports.dport = sk->dport;
+	fl.fl_ip_sport = sk->sport;
+	fl.fl_ip_dport = sk->dport;
 
 	if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-		fl.nl_u.ip6_u.daddr = rt0->addr;
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 	}
 
 	dst = __sk_dst_check(sk, np->dst_cookie);
 
 	if (dst == NULL) {
-		dst = ip6_route_output(sk, &fl);
+		int err = ip6_dst_lookup(sk, &dst, &fl);
 
-		if (dst->error) {
-			sk->err_soft = -dst->error;
-			dst_release(dst);
-			return -sk->err_soft;
+		if (err) {
+			sk->err_soft = -err;
+			return err;
 		}
 
 		ip6_dst_store(sk, dst, NULL);
+		sk->route_caps = dst->dev->features &
+			~(NETIF_F_IP_CSUM
+#ifdef NETIF_F_TSO
+			  | NETIF_F_TSO
+#endif
+				);
+		tcp_sk(sk)->ext2_header_len = dst->header_len;
 	}
 
 	skb->dst = dst_clone(dst);
 
 	/* Restore final destination back after routing done */
-	fl.nl_u.ip6_u.daddr = &np->daddr;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 
 	return ip6_xmit(sk, skb, &fl, np->opt);
 }
@@ -1865,6 +1891,7 @@
 static int tcp_v6_destroy_sock(struct sock *sk)
 {
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct inet_opt *inet = inet_sk(sk);
 
 	tcp_clear_xmit_timers(sk);
 
@@ -1882,8 +1909,8 @@
 		tcp_put_port(sk);
 
 	/* If sendmsg cached page exists, toss it. */
-	if (tp->sndmsg_page != NULL)
-		__free_page(tp->sndmsg_page);
+	if (inet->sndmsg_page != NULL)
+		__free_page(inet->sndmsg_page);
 
 	atomic_dec(&tcp_sockets_allocated);
 
@@ -2143,15 +2170,10 @@
 	get_port:	tcp_v6_get_port,
 };
 
-static struct inet6_protocol tcpv6_protocol =
-{
-	tcp_v6_rcv,		/* TCP handler		*/
-	tcp_v6_err,		/* TCP error control	*/
-	NULL,			/* next			*/
-	IPPROTO_TCP,		/* protocol ID		*/
-	0,			/* copy			*/
-	NULL,			/* data			*/
-	"TCPv6"			/* name			*/
+static struct inet6_protocol tcpv6_protocol = {
+	.handler	=	tcp_v6_rcv,
+	.err_handler	=	tcp_v6_err,
+	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
 extern struct proto_ops inet6_stream_ops;
@@ -2169,6 +2191,7 @@
 void __init tcpv6_init(void)
 {
 	/* register inet6 protocol */
-	inet6_add_protocol(&tcpv6_protocol);
+	if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
+		printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
 	inet6_register_protosw(&tcpv6_protosw);
 }
Index: net/ipv6/udp.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/udp.c,v
retrieving revision 1.1.1.26
retrieving revision 1.1.1.26.2.1
diff -u -r1.1.1.26 -r1.1.1.26.2.1
--- a/net/ipv6/udp.c	14 Apr 2004 13:05:41 -0000	1.1.1.26
+++ b/net/ipv6/udp.c	16 Apr 2004 13:16:26 -0000	1.1.1.26.2.1
@@ -14,6 +14,7 @@
  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
  *					a single port at the same time.
+ *      Kazunori MIYAZAWA @USAGI:       change process style to use ip6_append_data
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -50,6 +51,7 @@
 #include <net/inet_common.h>
 
 #include <net/checksum.h>
+#include <net/xfrm.h>
 
 struct udp_mib udp_stats_in6[NR_CPUS*2];
 
@@ -226,7 +228,6 @@
 	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
 	struct ipv6_pinfo      	*np = &sk->net_pinfo.af_inet6;
 	struct in6_addr		*daddr;
-	struct in6_addr		saddr;
 	struct dst_entry	*dst;
 	struct flowi		fl;
 	struct ip6_flowlabel	*flowlabel = NULL;
@@ -246,7 +247,7 @@
 	if (usin->sin6_family != AF_INET6) 
 	  	return -EAFNOSUPPORT;
 
-	fl.fl6_flowlabel = 0;
+	memset(&fl, 0, sizeof(fl));
 	if (np->sndflow) {
 		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 		if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
@@ -271,9 +272,10 @@
 	if (addr_type == IPV6_ADDR_MAPPED) {
 		struct sockaddr_in sin;
 
-		if (__ipv6_only_sock(sk))
-			return -ENETUNREACH;
-
+		if (__ipv6_only_sock(sk)) {
+			err = -ENETUNREACH;
+			goto out;
+		}
 		sin.sin_family = AF_INET;
 		sin.sin_addr.s_addr = daddr->s6_addr32[3];
 		sin.sin_port = usin->sin6_port;
@@ -281,8 +283,8 @@
 		err = udp_connect(sk, (struct sockaddr*) &sin, sizeof(sin));
 
 ipv4_connected:
-		if (err < 0)
-			return err;
+		if (err)
+			goto out;
 		
 		ipv6_addr_set(&np->daddr, 0, 0, 
 			      htonl(0x0000ffff),
@@ -299,15 +301,15 @@
 				      htonl(0x0000ffff),
 				      sk->rcv_saddr);
 		}
-		return 0;
+		goto out;
 	}
 
 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    usin->sin6_scope_id) {
 			if (sk->bound_dev_if && sk->bound_dev_if != usin->sin6_scope_id) {
-				fl6_sock_release(flowlabel);
-				return -EINVAL;
+				err = -EINVAL;
+				goto out;
 			}
 			sk->bound_dev_if = usin->sin6_scope_id;
 			if (!sk->bound_dev_if && (addr_type&IPV6_ADDR_MULTICAST))
@@ -315,8 +317,10 @@
 		}
 
 		/* Connect to link-local address requires an interface */
-		if (sk->bound_dev_if == 0)
-			return -EINVAL;
+		if (sk->bound_dev_if == 0) {
+			err = -EINVAL;
+			goto out;
+		}
 	}
 
 	ipv6_addr_copy(&np->daddr, daddr);
@@ -330,11 +334,11 @@
 	 */
 
 	fl.proto = IPPROTO_UDP;
-	fl.fl6_dst = &np->daddr;
-	fl.fl6_src = &saddr;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 	fl.oif = sk->bound_dev_if;
-	fl.uli_u.ports.dport = sk->dport;
-	fl.uli_u.ports.sport = sk->sport;
+	fl.fl_ip_dport = sk->dport;
+	fl.fl_ip_sport = sk->sport;
 
 	if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
 		fl.oif = np->mcast_oif;
@@ -342,37 +346,33 @@
 	if (flowlabel) {
 		if (flowlabel->opt && flowlabel->opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
-			fl.fl6_dst = rt0->addr;
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 		}
 	} else if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-		fl.fl6_dst = rt0->addr;
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 	}
 
-	dst = ip6_route_output(sk, &fl);
-
-	if ((err = dst->error) != 0) {
-		dst_release(dst);
-		fl6_sock_release(flowlabel);
-		return err;
-	}
+	err = ip6_dst_lookup(sk, &dst, &fl);
+	if (err)
+		goto out;
 
-	ip6_dst_store(sk, dst, fl.fl6_dst);
+	/* source address lookup done in ip6_dst_lookup */
 
-	/* get the source adddress used in the apropriate device */
+	if (ipv6_addr_any(&np->saddr))
+		ipv6_addr_copy(&np->saddr, &fl.fl6_src);
 
-	err = ipv6_get_saddr(dst, daddr, &saddr);
+	if (ipv6_addr_any(&np->rcv_saddr)) {
+		ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
+		sk->rcv_saddr = LOOPBACK4_IPV6;
+	}
 
-	if (err == 0) {
-		if(ipv6_addr_any(&np->saddr))
-			ipv6_addr_copy(&np->saddr, &saddr);
+	ip6_dst_store(sk, dst,
+		      !ipv6_addr_cmp(&fl.fl6_dst, &np->daddr) ?
+		      &np->daddr : NULL);
 
-		if(ipv6_addr_any(&np->rcv_saddr)) {
-			ipv6_addr_copy(&np->rcv_saddr, &saddr);
-			sk->rcv_saddr = LOOPBACK4_IPV6;
-		}
-		sk->state = TCP_ESTABLISHED;
-	}
+	sk->state = TCP_ESTABLISHED;
+out:
 	fl6_sock_release(flowlabel);
 
 	return err;
@@ -521,6 +521,11 @@
 
 static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
+	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+		kfree_skb(skb);
+		return -1;
+	}
+
 #if defined(CONFIG_FILTER)
 	if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
 		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
@@ -617,8 +622,9 @@
 	read_unlock(&udp_hash_lock);
 }
 
-int udpv6_rcv(struct sk_buff *skb)
+static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 {
+	struct sk_buff *skb = *pskb;
 	struct sock *sk;
   	struct udphdr *uh;
 	struct net_device *dev = skb->dev;
@@ -685,6 +691,9 @@
 	sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex);
 
 	if (sk == NULL) {
+		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+			goto discard;
+
 		if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
 		    (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
 			goto discard;
@@ -711,103 +720,126 @@
 	kfree_skb(skb);
 	return(0);	
 }
-
 /*
- *	Sending
+ * Throw away all pending data and cancel the corking. Socket is locked.
  */
-
-struct udpv6fakehdr 
+static void udp_v6_flush_pending_frames(struct sock *sk)
 {
-	struct udphdr	uh;
-	struct iovec	*iov;
-	__u32		wcheck;
-	__u32		pl_len;
-	struct in6_addr *daddr;
-};
+	struct udp_opt *up = udp_sk(sk);
+
+	if (up->pending) {
+		up->len = 0;
+		up->pending = 0;
+		ip6_flush_pending_frames(sk);
+        }
+}
 
 /*
- *	with checksum
+ *	Sending
  */
 
-static int udpv6_getfrag(const void *data, struct in6_addr *addr,
-			 char *buff, unsigned int offset, unsigned int len)
+static int udp_v6_push_pending_frames(struct sock *sk, struct udp_opt *up)
 {
-	struct udpv6fakehdr *udh = (struct udpv6fakehdr *) data;
-	char *dst;
-	int final = 0;
-	int clen = len;
+	struct sk_buff *skb;
+	struct udphdr *uh;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct flowi *fl = &np->cork.fl;
+	int err = 0;
 
-	dst = buff;
+	/* Grab the skbuff where UDP header space exists. */
+	if ((skb = skb_peek(&sk->write_queue)) == NULL)
+		goto out;
 
-	if (offset) {
-		offset -= sizeof(struct udphdr);
+	/*
+	 * Create a UDP header
+	 */
+	uh = skb->h.uh;
+	uh->source = fl->fl_ip_sport;
+	uh->dest = fl->fl_ip_dport;
+	uh->len = htons(up->len);
+	uh->check = 0;
+
+	if (sk->no_check == UDP_CSUM_NOXMIT) {
+		skb->ip_summed = CHECKSUM_NONE;
+		goto send;
+	}
+
+	if (skb_queue_len(&sk->write_queue) == 1) {
+		skb->csum = csum_partial((char *)uh,
+				sizeof(struct udphdr), skb->csum);
+		uh->check = csum_ipv6_magic(&fl->fl6_src,
+					    &fl->fl6_dst,
+					    up->len, fl->proto, skb->csum);
 	} else {
-		dst += sizeof(struct udphdr);
-		final = 1;
-		clen -= sizeof(struct udphdr);
-	}
+		u32 tmp_csum = 0;
 
-	if (csum_partial_copy_fromiovecend(dst, udh->iov, offset,
-					   clen, &udh->wcheck))
-		return -EFAULT;
-
-	if (final) {
-		struct in6_addr *daddr;
-		
-		udh->wcheck = csum_partial((char *)udh, sizeof(struct udphdr),
-					   udh->wcheck);
-
-		if (udh->daddr) {
-			daddr = udh->daddr;
-		} else {
-			/*
-			 *	use packet destination address
-			 *	this should improve cache locality
-			 */
-			daddr = addr + 1;
-		}
-		udh->uh.check = csum_ipv6_magic(addr, daddr,
-						udh->pl_len, IPPROTO_UDP,
-						udh->wcheck);
-		if (udh->uh.check == 0)
-			udh->uh.check = -1;
+		skb_queue_walk(&sk->write_queue, skb) {
+			tmp_csum = csum_add(tmp_csum, skb->csum);
+		}
+		tmp_csum = csum_partial((char *)uh,
+				sizeof(struct udphdr), tmp_csum);
+                tmp_csum = csum_ipv6_magic(&fl->fl6_src,
+					   &fl->fl6_dst,
+					   up->len, fl->proto, tmp_csum);
+                uh->check = tmp_csum;
 
-		memcpy(buff, udh, sizeof(struct udphdr));
 	}
-	return 0;
+	if (uh->check == 0)
+		uh->check = -1;
+
+send:
+	err = ip6_push_pending_frames(sk);
+out:
+	up->len = 0;
+	up->pending = 0;
+	return err;
 }
 
-static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
+static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
 {
 	struct ipv6_txoptions opt_space;
-	struct udpv6fakehdr udh;
+	struct udp_opt *up = udp_sk(sk);
 	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
+	struct in6_addr *daddr;
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
-	struct flowi fl;
+	struct flowi *fl = &np->cork.fl;
+	struct dst_entry *dst;
 	int addr_len = msg->msg_namelen;
-	struct in6_addr *daddr;
-	int len = ulen + sizeof(struct udphdr);
+	int ulen = len;
 	int addr_type;
 	int hlimit = -1;
-	
+	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
 	int err;
 	
 	/* Rough check on arithmetic overflow,
 	   better check is made in ip6_build_xmit
 	   */
-	if (ulen < 0 || ulen > INT_MAX - sizeof(struct udphdr))
+	if (len < 0 || len > INT_MAX - sizeof(struct udphdr))
 		return -EMSGSIZE;
 	
-	fl.fl6_flowlabel = 0;
-	fl.oif = 0;
+	if (up->pending) {
+		/*
+		 * There are pending frames.
+		 * The socket lock must be held while it's corked.
+		 */
+		lock_sock(sk);
+		if (likely(up->pending)) {
+			dst = NULL;
+			goto do_append_data;
+		}
+		release_sock(sk);
+	}
+	ulen += sizeof(struct udphdr);
+
+	memset(fl, 0, sizeof(fl));
 
 	if (sin6) {
 		if (sin6->sin6_family == AF_INET) {
 			if (__ipv6_only_sock(sk))
 				return -ENETUNREACH;
-			return udp_sendmsg(sk, msg, ulen);
+			return udp_sendmsg(sk, msg, len);
 		}
 
 		if (addr_len < SIN6_LEN_RFC2133)
@@ -819,13 +851,13 @@
 		if (sin6->sin6_port == 0)
 			return -EINVAL;
 
-		udh.uh.dest = sin6->sin6_port;
+		up->dport = sin6->sin6_port;
 		daddr = &sin6->sin6_addr;
 
 		if (np->sndflow) {
-			fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
-			if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
-				flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+			fl->fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+			if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+				flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
 				if (flowlabel == NULL)
 					return -EINVAL;
 				daddr = &flowlabel->dst;
@@ -840,14 +872,14 @@
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    sin6->sin6_scope_id &&
 		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
-			fl.oif = sin6->sin6_scope_id;
+			fl->oif = sin6->sin6_scope_id;
 	} else {
 		if (sk->state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
 
-		udh.uh.dest = sk->dport;
+		up->dport = sk->dport;
 		daddr = &sk->net_pinfo.af_inet6.daddr;
-		fl.fl6_flowlabel = np->flow_label;
+		fl->fl6_flowlabel = np->flow_label;
 	}
 
 	addr_type = ipv6_addr_type(daddr);
@@ -860,30 +892,28 @@
 
 		sin.sin_family = AF_INET;
 		sin.sin_addr.s_addr = daddr->s6_addr32[3];
-		sin.sin_port = udh.uh.dest;
+		sin.sin_port = up->dport;
 		msg->msg_name = (struct sockaddr *)(&sin);
 		msg->msg_namelen = sizeof(sin);
 		fl6_sock_release(flowlabel);
 
-		return udp_sendmsg(sk, msg, ulen);
+		return udp_sendmsg(sk, msg, len);
 	}
 
-	udh.daddr = NULL;
-	if (!fl.oif)
-		fl.oif = sk->bound_dev_if;
-	fl.fl6_src = NULL;
+	if (!fl->oif)
+		fl->oif = sk->bound_dev_if;
 
 	if (msg->msg_controllen) {
 		opt = &opt_space;
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 
-		err = datagram_send_ctl(msg, &fl, opt, &hlimit);
+		err = datagram_send_ctl(msg, fl, opt, &hlimit);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
 		}
-		if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
-			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+		if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+			flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
 		}
@@ -894,44 +924,181 @@
 		opt = np->opt;
 	if (flowlabel)
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
-	if (opt && opt->srcrt)
-		udh.daddr = daddr;
 
-	udh.uh.source = sk->sport;
-	udh.uh.len = len < 0x10000 ? htons(len) : 0;
-	udh.uh.check = 0;
-	udh.iov = msg->msg_iov;
-	udh.wcheck = 0;
-	udh.pl_len = len;
+	fl->proto = IPPROTO_UDP;
+	ipv6_addr_copy(&fl->fl6_dst, daddr);
+	if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr))
+		ipv6_addr_copy(&fl->fl6_src, &np->saddr);
+	fl->fl_ip_dport = up->dport;
+	fl->fl_ip_sport = sk->sport;
+	
+	/* merge ip6_build_xmit from ip6_output */
+	if (opt && opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+		ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
+	}
 
-	fl.proto = IPPROTO_UDP;
-	fl.fl6_dst = daddr;
-	if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr))
-		fl.fl6_src = &np->saddr;
-	fl.uli_u.ports.dport = udh.uh.dest;
-	fl.uli_u.ports.sport = udh.uh.source;
+	if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst))
+		fl->oif = np->mcast_oif;
+
+	err = ip6_dst_lookup(sk, &dst, fl);
+	if (err)
+		goto out;
 
-	err = ip6_build_xmit(sk, udpv6_getfrag, &udh, &fl, len, opt, hlimit,
-			     msg->msg_flags);
+	if (hlimit < 0) {
+		if (ipv6_addr_is_multicast(&fl->fl6_dst))
+			hlimit = np->mcast_hops;
+		else
+			hlimit = np->hop_limit;
+		if (hlimit < 0)
+			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+	}
+
+	if (msg->msg_flags&MSG_CONFIRM)
+		goto do_confirm;
+back_from_confirm:
+
+	lock_sock(sk);
+	if (unlikely(up->pending)) {
+		/* The socket is already corked while preparing it. */
+		/* ... which is an evident application bug. --ANK */
+		release_sock(sk);
 
+		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n"));
+		err = -EINVAL;
+		goto out;
+	}
+
+	up->pending = 1;
+
+do_append_data:
+	up->len += ulen;
+	err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, sizeof(struct udphdr),
+			      hlimit, opt, fl, (struct rt6_info*)dst,
+			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
+	if (err)
+		udp_v6_flush_pending_frames(sk);
+	else if (!corkreq)
+		err = udp_v6_push_pending_frames(sk, up);
+
+	if (dst)
+		ip6_dst_store(sk, dst,
+			      !ipv6_addr_cmp(&fl->fl6_dst, &np->daddr) ?
+			      &np->daddr : NULL);
+	if (err > 0)
+		err = np->recverr ? net_xmit_errno(err) : 0;
+	release_sock(sk);
+out:
 	fl6_sock_release(flowlabel);
+	if (!err) {
+		UDP6_INC_STATS_USER(UdpOutDatagrams);
+		return len;
+	}
+	return err;
+
+do_confirm:
+	dst_confirm(dst);
+	if (!(msg->msg_flags&MSG_PROBE) || len)
+		goto back_from_confirm;
+	err = 0;
+	goto out;
+}
+
+static int udpv6_destroy_sock(struct sock *sk)
+{
+	lock_sock(sk);
+	udp_v6_flush_pending_frames(sk);
+	release_sock(sk);
 
-	if (err < 0)
-		return err;
+	inet6_destroy_sock(sk);
 
-	UDP6_INC_STATS_USER(UdpOutDatagrams);
-	return ulen;
+	return 0;
 }
 
-static struct inet6_protocol udpv6_protocol = 
+/*
+ *	Socket option code for UDP
+ */
+static int udpv6_setsockopt(struct sock *sk, int level, int optname, 
+			  char *optval, int optlen)
 {
-	udpv6_rcv,		/* UDP handler		*/
-	udpv6_err,		/* UDP error control	*/
-	NULL,			/* next			*/
-	IPPROTO_UDP,		/* protocol ID		*/
-	0,			/* copy			*/
-	NULL,			/* data			*/
-	"UDPv6"			/* name			*/
+	struct udp_opt *up = udp_sk(sk);
+	int val;
+	int err = 0;
+
+	if (level != SOL_UDP)
+		return ipv6_setsockopt(sk, level, optname, optval, optlen);
+
+	if(optlen<sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int *)optval))
+		return -EFAULT;
+
+	switch(optname) {
+	case UDP_CORK:
+		if (val != 0) {
+			up->corkflag = 1;
+		} else {
+			up->corkflag = 0;
+			lock_sock(sk);
+			udp_v6_push_pending_frames(sk, up);
+			release_sock(sk);
+		}
+		break;
+		
+	case UDP_ENCAP:
+		up->encap_type = val;
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	};
+
+	return err;
+}
+
+static int udpv6_getsockopt(struct sock *sk, int level, int optname, 
+			  char *optval, int *optlen)
+{
+	struct udp_opt *up = udp_sk(sk);
+	int val, len;
+
+	if (level != SOL_UDP)
+		return ipv6_getsockopt(sk, level, optname, optval, optlen);
+
+	if(get_user(len,optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+	
+	if(len < 0)
+		return -EINVAL;
+
+	switch(optname) {
+	case UDP_CORK:
+		val = up->corkflag;
+		break;
+
+	case UDP_ENCAP:
+		val = up->encap_type;
+		break;
+
+	default:
+		return -ENOPROTOOPT;
+	};
+
+  	if(put_user(len, optlen))
+  		return -EFAULT;
+	if(copy_to_user(optval, &val,len))
+		return -EFAULT;
+  	return 0;
+}
+
+static struct inet6_protocol udpv6_protocol = {
+	.handler	=	udpv6_rcv,
+	.err_handler	=	udpv6_err,
+	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
 #define LINE_LEN 190
@@ -1008,20 +1175,20 @@
 }
 
 struct proto udpv6_prot = {
-	name:		"UDP",
-	close:		udpv6_close,
-	connect:	udpv6_connect,
-	disconnect:	udp_disconnect,
-	ioctl:		udp_ioctl,
-	destroy:	inet6_destroy_sock,
-	setsockopt:	ipv6_setsockopt,
-	getsockopt:	ipv6_getsockopt,
-	sendmsg:	udpv6_sendmsg,
-	recvmsg:	udpv6_recvmsg,
-	backlog_rcv:	udpv6_queue_rcv_skb,
-	hash:		udp_v6_hash,
-	unhash:		udp_v6_unhash,
-	get_port:	udp_v6_get_port,
+	.name		=	"UDP",
+	.close		=	udpv6_close,
+	.connect	=	udpv6_connect,
+	.disconnect	=	udp_disconnect,
+	.ioctl		=	udp_ioctl,
+	.destroy	=	udpv6_destroy_sock,
+	.setsockopt	=	udpv6_setsockopt,
+	.getsockopt	=	udpv6_getsockopt,
+	.sendmsg	=	udpv6_sendmsg,
+	.recvmsg	=	udpv6_recvmsg,
+	.backlog_rcv	=	udpv6_queue_rcv_skb,
+	.hash		=	udp_v6_hash,
+	.unhash		=	udp_v6_unhash,
+	.get_port	=	udp_v6_get_port,
 };
 
 extern struct proto_ops inet6_dgram_ops;
@@ -1039,6 +1206,7 @@
 
 void __init udpv6_init(void)
 {
-	inet6_add_protocol(&udpv6_protocol);
+	if (inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP) < 0)
+		printk(KERN_ERR "udpv6_init: Could not register protocol\n");
 	inet6_register_protosw(&udpv6_protosw);
 }
Index: net/ipv6/xfrm6_input.c
===================================================================
RCS file: net/ipv6/xfrm6_input.c
diff -N net/ipv6/xfrm6_input.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv6/xfrm6_input.c	16 Apr 2004 13:16:26 -0000	1.7.14.1
@@ -0,0 +1,142 @@
+/*
+ * xfrm6_input.c: based on net/ipv4/xfrm4_input.c
+ *
+ * Authors:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ *	YOSHIFUJI Hideaki @USAGI
+ *		IPv6 support
+ */
+
+#include <linux/string.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
+{
+	struct ipv6hdr *outer_iph = skb->nh.ipv6h;
+	struct ipv6hdr *inner_iph = skb->h.ipv6h;
+
+	if (INET_ECN_is_ce(ip6_get_dsfield(outer_iph)) &&
+	    INET_ECN_is_not_ce(ip6_get_dsfield(inner_iph)))
+		IP6_ECN_set_ce(inner_iph);
+}
+
+int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+	struct sk_buff *skb = *pskb;
+	int err;
+	u32 spi, seq;
+	struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH];
+	struct xfrm_state *x;
+	int xfrm_nr = 0;
+	int decaps = 0;
+	int nexthdr = 0;
+	u8 *prevhdr = NULL;
+	int hhlen;
+
+	ip6_find_1stfragopt(skb, &prevhdr);
+	nexthdr = *prevhdr;
+	*nhoffp = prevhdr - skb->nh.raw;
+	hhlen = skb->nh.raw - skb->mac.raw;
+
+	if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
+		goto drop;
+	
+	do {
+		struct ipv6hdr *iph = skb->nh.ipv6h;
+
+		if (xfrm_nr == XFRM_MAX_DEPTH)
+			goto drop_put;
+
+		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, nexthdr, AF_INET6);
+		if (x == NULL)
+			goto drop_put;
+		spin_lock(&x->lock);
+		if (unlikely(x->km.state != XFRM_STATE_VALID))
+			goto drop_unlock;
+
+		if (x->props.replay_window && xfrm_replay_check(x, seq))
+			goto drop_unlock;
+
+		if (xfrm_state_check_expire(x))
+			goto drop_unlock;
+
+		nexthdr = x->type->input(x, &(xfrm_vec[xfrm_nr].decap), skb);
+		if (nexthdr <= 0)
+			goto drop_unlock;
+
+		if (x->props.replay_window)
+			xfrm_replay_advance(x, seq);
+
+		x->curlft.bytes += skb->len;
+		x->curlft.packets++;
+
+		spin_unlock(&x->lock);
+
+		xfrm_vec[xfrm_nr++].xvec = x;
+
+		if (x->props.mode) { /* XXX */
+			if (nexthdr != IPPROTO_IPV6)
+				goto drop_put;
+			decaps = 1;
+			break;
+		}
+
+		if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0)
+			goto drop_put;
+	} while (!err);
+
+	/* Allocate new secpath or COW existing one. */
+	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
+		struct sec_path *sp;
+		sp = secpath_dup(skb->sp);
+		if (!sp)
+			goto drop_put;
+		if (skb->sp)
+			secpath_put(skb->sp);
+		skb->sp = sp;
+	}
+
+	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
+		goto drop_put;
+
+	memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
+	skb->sp->len += xfrm_nr;
+	skb->ip_summed = CHECKSUM_NONE;
+
+	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		goto drop;
+
+	if (decaps) {
+		skb->mac.raw = memmove(skb->data - hhlen, skb->mac.raw, hhlen);
+		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+			goto drop;
+		if (!(x->props.flags & XFRM_STATE_NOECN))
+			ipip6_ecn_decapsulate(skb);
+		skb->nh.raw = skb->data;
+		if (!(skb->dev->flags&IFF_LOOPBACK)) {
+			dst_release(skb->dst);
+			skb->dst = NULL;
+		}
+		netif_rx(skb);
+		return -1;
+	} else {
+		skb->mac.raw = memmove(skb->nh.raw - hhlen, skb->mac.raw,
+				       hhlen);
+		return 1;
+	}
+
+drop_unlock:
+	spin_unlock(&x->lock);
+	xfrm_state_put(x);
+drop_put:
+	while (--xfrm_nr >= 0)
+		xfrm_state_put(xfrm_vec[xfrm_nr].xvec);
+drop:
+	kfree_skb(skb);
+	return -1;
+}
Index: net/ipv6/xfrm6_policy.c
===================================================================
RCS file: net/ipv6/xfrm6_policy.c
diff -N net/ipv6/xfrm6_policy.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv6/xfrm6_policy.c	16 Apr 2004 13:16:26 -0000	1.8.12.1
@@ -0,0 +1,296 @@
+/*
+ * xfrm6_policy.c: based on xfrm4_policy.c
+ *
+ * Authors:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	YOSHIFUJI Hideaki
+ * 		Split up af-specific portion
+ * 
+ */
+
+#include <linux/config.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+
+extern struct dst_ops xfrm6_dst_ops;
+extern struct xfrm_policy_afinfo xfrm6_policy_afinfo;
+
+static struct xfrm_type_map xfrm6_type_map = { .lock = RW_LOCK_UNLOCKED };
+
+int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
+{
+	int err = 0;
+	*dst = (struct xfrm_dst*)ip6_route_output(NULL, fl);
+	if (!*dst)
+		err = -ENETUNREACH;
+	return err;
+}
+
+/* Check that the bundle accepts the flow and its components are
+ * still valid.
+ */
+
+static int __xfrm6_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl)
+{
+	do {
+		if (xdst->u.dst.ops != &xfrm6_dst_ops)
+			return 1;
+
+		if (!xfrm_selector_match(&xdst->u.dst.xfrm->sel, fl, AF_INET6))
+			return 0;
+		if (xdst->u.dst.xfrm->km.state != XFRM_STATE_VALID ||
+		    xdst->u.dst.path->obsolete > 0)
+			return 0;
+		xdst = (struct xfrm_dst*)xdst->u.dst.child;
+	} while (xdst);
+	return 0;
+}
+
+static struct dst_entry *
+__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
+{
+	struct dst_entry *dst;
+	u32 ndisc_bit = 0;
+
+	if (fl->proto == IPPROTO_ICMPV6 &&
+	    (fl->fl_icmp_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
+	     fl->fl_icmp_type == NDISC_NEIGHBOUR_SOLICITATION  ||
+	     fl->fl_icmp_type == NDISC_ROUTER_SOLICITATION))
+		ndisc_bit = RTF_NDISC;
+
+	/* Still not clear if we should set fl->fl6_{src,dst}... */
+	read_lock_bh(&policy->lock);
+	for (dst = policy->bundles; dst; dst = dst->next) {
+		struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
+		struct in6_addr fl_dst_prefix, fl_src_prefix;
+
+		if ((xdst->u.rt6.rt6i_flags & RTF_NDISC) != ndisc_bit)
+			continue;
+
+		ipv6_addr_prefix(&fl_dst_prefix,
+				 &fl->fl6_dst,
+				 xdst->u.rt6.rt6i_dst.plen);
+		ipv6_addr_prefix(&fl_src_prefix,
+				 &fl->fl6_src,
+				 xdst->u.rt6.rt6i_src.plen);
+		if (!ipv6_addr_cmp(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
+		    !ipv6_addr_cmp(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
+		    __xfrm6_bundle_ok(xdst, fl)) {
+			dst_clone(dst);
+			break;
+		}
+	}
+	read_unlock_bh(&policy->lock);
+	return dst;
+}
+
+/* Allocate chain of dst_entry's, attach known xfrm's, calculate
+ * all the metrics... Shortly, bundle a bundle.
+ */
+
+static int
+__xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
+		      struct flowi *fl, struct dst_entry **dst_p)
+{
+	struct dst_entry *dst, *dst_prev;
+	struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
+	struct rt6_info *rt  = rt0;
+	struct in6_addr *remote = &fl->fl6_dst;
+	struct in6_addr *local  = &fl->fl6_src;
+	int i;
+	int err = 0;
+	int header_len = 0;
+	int trailer_len = 0;
+
+	dst = dst_prev = NULL;
+
+	for (i = 0; i < nx; i++) {
+		struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops);
+
+		if (unlikely(dst1 == NULL)) {
+			err = -ENOBUFS;
+			goto error;
+		}
+
+		dst1->xfrm = xfrm[i];
+		if (!dst)
+			dst = dst1;
+		else {
+			dst_prev->child = dst1;
+			dst1->flags |= DST_NOHASH;
+			dst_clone(dst1);
+		}
+		dst_prev = dst1;
+		if (xfrm[i]->props.mode) {
+			remote = (struct in6_addr*)&xfrm[i]->id.daddr;
+			local  = (struct in6_addr*)&xfrm[i]->props.saddr;
+		}
+		header_len += xfrm[i]->props.header_len;
+		trailer_len += xfrm[i]->props.trailer_len;
+	}
+
+	if (ipv6_addr_cmp(remote, &fl->fl6_dst)) {
+		struct flowi fl_tunnel;
+
+		memset(&fl_tunnel, 0, sizeof(fl_tunnel));
+		ipv6_addr_copy(&fl_tunnel.fl6_dst, remote);
+		ipv6_addr_copy(&fl_tunnel.fl6_src, local);
+
+		err = xfrm_dst_lookup((struct xfrm_dst **) &rt,
+				      &fl_tunnel, AF_INET6);
+		if (err)
+			goto error;
+	} else {
+		dst_hold(&rt->u.dst);
+	}
+	dst_prev->child = &rt->u.dst;
+	for (dst_prev = dst; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
+		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
+
+		dst_prev->dev = rt->u.dst.dev;
+		if (rt->u.dst.dev)
+			dev_hold(rt->u.dst.dev);
+		dst_prev->obsolete	= -1;
+		dst_prev->flags	       |= DST_HOST;
+		dst_prev->lastuse	= jiffies;
+		dst_prev->header_len	= header_len;
+		dst_prev->trailer_len	= trailer_len;
+		memcpy(&dst_prev->metrics, &rt->u.dst.metrics, sizeof(dst_prev->metrics));
+		dst_prev->path		= &rt->u.dst;
+
+		/* Copy neighbour for reachability confirmation */
+		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
+		dst_prev->input		= rt->u.dst.input;
+		dst_prev->output	= dst_prev->xfrm->type->output;
+		/* Sheit... I remember I did this right. Apparently,
+		 * it was magically lost, so this code needs audit */
+		x->u.rt6.rt6i_flags    = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL|RTF_NDISC);
+		x->u.rt6.rt6i_metric   = rt0->rt6i_metric;
+		x->u.rt6.rt6i_node     = rt0->rt6i_node;
+		x->u.rt6.rt6i_gateway  = rt0->rt6i_gateway;
+		memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); 
+		x->u.rt6.rt6i_dst      = rt0->rt6i_dst;
+		x->u.rt6.rt6i_src      = rt0->rt6i_src;	
+		header_len -= x->u.dst.xfrm->props.header_len;
+		trailer_len -= x->u.dst.xfrm->props.trailer_len;
+	}
+	*dst_p = dst;
+	return 0;
+
+error:
+	if (dst)
+		dst_free(dst);
+	return err;
+}
+
+static inline void
+_decode_session6(struct sk_buff *skb, struct flowi *fl)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	u8 nexthdr = skb->nh.ipv6h->nexthdr;
+
+	memset(fl, 0, sizeof(struct flowi));
+	ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
+	ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
+
+	while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
+		switch (nexthdr) {
+		case NEXTHDR_ROUTING:
+		case NEXTHDR_HOP:
+		case NEXTHDR_DEST:
+			offset += ipv6_optlen(exthdr);
+			nexthdr = exthdr->nexthdr;
+			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+			break;
+
+		case IPPROTO_UDP:
+		case IPPROTO_TCP:
+		case IPPROTO_SCTP:
+			if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
+				u16 *ports = (u16 *)exthdr;
+
+				fl->fl_ip_sport = ports[0];
+				fl->fl_ip_dport = ports[1];
+			}
+			fl->proto = nexthdr;
+			return;
+
+		/* XXX Why are there these headers? */
+		case IPPROTO_AH:
+		case IPPROTO_ESP:
+		case IPPROTO_COMP:
+		default:
+			fl->fl_ipsec_spi = 0;
+			fl->proto = nexthdr;
+			return;
+		};
+	}
+}
+
+static inline int xfrm6_garbage_collect(void)
+{
+	read_lock(&xfrm6_policy_afinfo.lock);
+	xfrm6_policy_afinfo.garbage_collect();
+	read_unlock(&xfrm6_policy_afinfo.lock);
+	return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
+}
+
+static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+	struct dst_entry *path = dst->path;
+
+	if (mtu >= 1280 && mtu < dst_pmtu(dst))
+		return;
+
+	path->ops->update_pmtu(path, mtu);
+}
+
+struct dst_ops xfrm6_dst_ops = {
+	.family =		AF_INET6,
+	.protocol =		__constant_htons(ETH_P_IPV6),
+	.gc =			xfrm6_garbage_collect,
+	.update_pmtu =		xfrm6_update_pmtu,
+	.gc_thresh =		1024,
+	.entry_size =		sizeof(struct xfrm_dst),
+};
+
+struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
+	.family =		AF_INET6,
+	.lock = 		RW_LOCK_UNLOCKED,
+	.type_map = 		&xfrm6_type_map,
+	.dst_ops =		&xfrm6_dst_ops,
+	.dst_lookup =		xfrm6_dst_lookup,
+	.find_bundle =		__xfrm6_find_bundle,
+	.bundle_create =	__xfrm6_bundle_create,
+	.decode_session =	_decode_session6,
+};
+
+void __init xfrm6_policy_init(void)
+{
+	xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
+}
+
+void __exit xfrm6_policy_fini(void)
+{
+	xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo);
+}
+
+void __init xfrm6_init(void)
+{
+	xfrm6_policy_init();
+	xfrm6_state_init();
+}
+
+void __exit xfrm6_fini(void)
+{
+	//xfrm6_input_fini();
+	xfrm6_policy_fini();
+	xfrm6_state_fini();
+}
Index: net/ipv6/xfrm6_state.c
===================================================================
RCS file: net/ipv6/xfrm6_state.c
diff -N net/ipv6/xfrm6_state.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/ipv6/xfrm6_state.c	16 Apr 2004 13:16:26 -0000	1.5.18.1
@@ -0,0 +1,134 @@
+/*
+ * xfrm6_state.c: based on xfrm4_state.c
+ *
+ * Authors:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific portion
+ * 	
+ */
+
+#include <net/xfrm.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <net/ipv6.h>
+
+extern struct xfrm_state_afinfo xfrm6_state_afinfo;
+
+static void
+__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
+		     struct xfrm_tmpl *tmpl,
+		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	/* Initialize temporary selector matching only
+	 * to current session. */
+	ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst);
+	ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src);
+	x->sel.dport = fl->fl_ip_dport;
+	x->sel.dport_mask = ~0;
+	x->sel.sport = fl->fl_ip_sport;
+	x->sel.sport_mask = ~0;
+	x->sel.prefixlen_d = 128;
+	x->sel.prefixlen_s = 128;
+	x->sel.proto = fl->proto;
+	x->sel.ifindex = fl->oif;
+	x->id = tmpl->id;
+	if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
+		memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
+	memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
+	if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
+		memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
+	x->props.mode = tmpl->mode;
+	x->props.reqid = tmpl->reqid;
+	x->props.family = AF_INET6;
+}
+
+static struct xfrm_state *
+__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
+{
+	unsigned h = __xfrm6_spi_hash(daddr, spi, proto);
+	struct xfrm_state *x;
+
+	list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) {
+		if (x->props.family == AF_INET6 &&
+		    spi == x->id.spi &&
+		    !ipv6_addr_cmp((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
+		    proto == x->id.proto) {
+			xfrm_state_hold(x);
+			return x;
+		}
+	}
+	return NULL;
+}
+
+static struct xfrm_state *
+__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, 
+		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
+		 int create)
+{
+	struct xfrm_state *x, *x0;
+	unsigned h = __xfrm6_dst_hash(daddr);
+
+	x0 = NULL;
+
+	list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) {
+		if (x->props.family == AF_INET6 &&
+		    !ipv6_addr_cmp((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
+		    mode == x->props.mode &&
+		    proto == x->id.proto &&
+		    !ipv6_addr_cmp((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) &&
+		    reqid == x->props.reqid &&
+		    x->km.state == XFRM_STATE_ACQ) {
+			    if (!x0)
+				    x0 = x;
+			    if (x->id.spi)
+				    continue;
+			    x0 = x;
+			    break;
+		    }
+	}
+	if (x0) {
+		xfrm_state_hold(x0);
+	} else if (create && (x0 = xfrm_state_alloc()) != NULL) {
+		memcpy(x0->sel.daddr.a6, daddr, sizeof(struct in6_addr));
+		memcpy(x0->sel.saddr.a6, saddr, sizeof(struct in6_addr));
+		x0->sel.prefixlen_d = 128;
+		x0->sel.prefixlen_s = 128;
+		memcpy(x0->props.saddr.a6, saddr, sizeof(struct in6_addr));
+		x0->km.state = XFRM_STATE_ACQ;
+		memcpy(x0->id.daddr.a6, daddr, sizeof(struct in6_addr));
+		x0->id.proto = proto;
+		x0->props.family = AF_INET6;
+		x0->props.mode = mode;
+		x0->props.reqid = reqid;
+		x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
+		xfrm_state_hold(x0);
+		mod_timer(&x0->timer, jiffies + XFRM_ACQ_EXPIRES*HZ);
+		xfrm_state_hold(x0);
+		list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h);
+		wake_up(&km_waitq);
+	}
+	return x0;
+}
+
+static struct xfrm_state_afinfo xfrm6_state_afinfo = {
+	.family			= AF_INET6,
+	.lock			= RW_LOCK_UNLOCKED,
+	.init_tempsel		= __xfrm6_init_tempsel,
+	.state_lookup		= __xfrm6_state_lookup,
+	.find_acq		= __xfrm6_find_acq,
+};
+
+void __init xfrm6_state_init(void)
+{
+	xfrm_state_register_afinfo(&xfrm6_state_afinfo);
+}
+
+void __exit xfrm6_state_fini(void)
+{
+	xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
+}
+
Index: net/ipv6/netfilter/ip6t_multiport.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/ipv6/netfilter/ip6t_multiport.c,v
retrieving revision 1.1.1.12
retrieving revision 1.1.1.12.2.1
diff -u -r1.1.1.12 -r1.1.1.12.2.1
--- a/net/ipv6/netfilter/ip6t_multiport.c	30 Oct 2001 23:08:12 -0000	1.1.1.12
+++ b/net/ipv6/netfilter/ip6t_multiport.c	16 Apr 2004 13:16:26 -0000	1.1.1.12.2.1
@@ -5,6 +5,7 @@
 #include <linux/udp.h>
 #include <linux/skbuff.h>
 #include <linux/in.h>
+#include <linux/socket.h>
 
 #include <linux/netfilter_ipv6/ip6t_multiport.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
Index: net/key/Makefile
===================================================================
RCS file: net/key/Makefile
diff -N net/key/Makefile
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/key/Makefile	16 Apr 2004 13:16:26 -0000	1.2.18.1
@@ -0,0 +1,9 @@
+#
+# Makefile for the key AF.
+#
+
+O_TARGET := key.o
+
+obj-$(CONFIG_NET_KEY) += af_key.o
+
+include $(TOPDIR)/Rules.make
Index: net/key/af_key.c
===================================================================
RCS file: net/key/af_key.c
diff -N net/key/af_key.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/key/af_key.c	16 Apr 2004 13:16:26 -0000	1.6.18.1
@@ -0,0 +1,2852 @@
+/*
+ * net/key/af_key.c	An implementation of PF_KEYv2 sockets.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Maxim Giryaev	<gem@asplinux.ru>
+ *		David S. Miller	<davem@redhat.com>
+ *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *		Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ *		Kazunori MIYAZAWA / USAGI Project <miyazawa@linux-ipv6.org>
+ *		Derek Atkins <derek@ihtfp.com>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <net/xfrm.h>
+
+#include <net/sock.h>
+
+#define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x))
+#define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x))
+
+
+/* List of all pfkey sockets. */
+static struct sock * pfkey_table;
+static DECLARE_WAIT_QUEUE_HEAD(pfkey_table_wait);
+static rwlock_t pfkey_table_lock = RW_LOCK_UNLOCKED;
+static atomic_t pfkey_table_users = ATOMIC_INIT(0);
+
+static atomic_t pfkey_socks_nr = ATOMIC_INIT(0);
+
+static void pfkey_sock_destruct(struct sock *sk)
+{
+	skb_queue_purge(&sk->receive_queue);
+
+	if (!sk->dead) {
+		printk("Attempt to release alive pfkey socket: %p\n", sk);
+		return;
+	}
+
+	BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
+	BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
+
+	kfree(pfkey_sk(sk));
+
+	atomic_dec(&pfkey_socks_nr);
+
+	MOD_DEC_USE_COUNT;
+}
+
+static void pfkey_table_grab(void)
+{
+	write_lock_bh(&pfkey_table_lock);
+
+	if (atomic_read(&pfkey_table_users)) {
+		DECLARE_WAITQUEUE(wait, current);
+
+		add_wait_queue_exclusive(&pfkey_table_wait, &wait);
+		for(;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (atomic_read(&pfkey_table_users) == 0)
+				break;
+			write_unlock_bh(&pfkey_table_lock);
+			schedule();
+			write_lock_bh(&pfkey_table_lock);
+		}
+
+		__set_current_state(TASK_RUNNING);
+		remove_wait_queue(&pfkey_table_wait, &wait);
+	}
+}
+
+static __inline__ void pfkey_table_ungrab(void)
+{
+	write_unlock_bh(&pfkey_table_lock);
+	wake_up(&pfkey_table_wait);
+}
+
+static __inline__ void pfkey_lock_table(void)
+{
+	/* read_lock() synchronizes us to pfkey_table_grab */
+
+	read_lock(&pfkey_table_lock);
+	atomic_inc(&pfkey_table_users);
+	read_unlock(&pfkey_table_lock);
+}
+
+static __inline__ void pfkey_unlock_table(void)
+{
+	if (atomic_dec_and_test(&pfkey_table_users))
+		wake_up(&pfkey_table_wait);
+}
+
+
+static struct proto_ops pfkey_ops;
+
+static void pfkey_insert(struct sock *sk)
+{
+	pfkey_table_grab();
+	sk->next = pfkey_table;
+	pfkey_table = sk;
+	sock_hold(sk);
+	pfkey_table_ungrab();
+}
+
+static void pfkey_remove(struct sock *sk)
+{
+	struct sock **skp;
+
+	pfkey_table_grab();
+	for (skp = &pfkey_table; *skp; skp = &((*skp)->next)) {
+		if (*skp == sk) {
+			*skp = sk->next;
+			__sock_put(sk);
+			break;
+		}
+	}
+	pfkey_table_ungrab();
+}
+
+static int pfkey_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct pfkey_opt *pfk;
+	int err;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+	if (sock->type != SOCK_RAW)
+		return -ESOCKTNOSUPPORT;
+	if (protocol != PF_KEY_V2)
+		return -EPROTONOSUPPORT;
+
+	MOD_INC_USE_COUNT;
+
+	err = -ENOMEM;
+	sk = sk_alloc(PF_KEY, GFP_KERNEL, 1);
+	if (sk == NULL)
+		goto out;
+
+	sock->ops = &pfkey_ops;
+	sock_init_data(sock, sk);
+
+	err = -ENOMEM;
+	pfk = pfkey_sk(sk) = kmalloc(sizeof(*pfk), GFP_KERNEL);
+	if (!pfk) {
+		sk_free(sk);
+		goto out;
+	}
+	memset(pfk, 0, sizeof(*pfk));
+
+	sk->family = PF_KEY;
+	sk->destruct = pfkey_sock_destruct;
+
+	atomic_inc(&pfkey_socks_nr);
+
+	pfkey_insert(sk);
+
+	return 0;
+
+out:
+	MOD_DEC_USE_COUNT;
+	return err;
+}
+
+static int pfkey_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 0;
+
+	pfkey_remove(sk);
+
+	sock_orphan(sk);
+	sock->sk = NULL;
+	skb_queue_purge(&sk->write_queue);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
+			       int allocation, struct sock *sk)
+{
+	int err = -ENOBUFS;
+
+	sock_hold(sk);
+	if (*skb2 == NULL) {
+		if (atomic_read(&skb->users) != 1) {
+			*skb2 = skb_clone(skb, allocation);
+		} else {
+			*skb2 = skb;
+			atomic_inc(&skb->users);
+		}
+	}
+	if (*skb2 != NULL) {
+		if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) {
+			skb_orphan(*skb2);
+			skb_set_owner_r(*skb2, sk);
+			skb_queue_tail(&sk->receive_queue, *skb2);
+			sk->data_ready(sk, (*skb2)->len);
+			*skb2 = NULL;
+			err = 0;
+		}
+	}
+	sock_put(sk);
+	return err;
+}
+
+/* Send SKB to all pfkey sockets matching selected criteria.  */
+#define BROADCAST_ALL		0
+#define BROADCAST_ONE		1
+#define BROADCAST_REGISTERED	2
+#define BROADCAST_PROMISC_ONLY	4
+static int pfkey_broadcast(struct sk_buff *skb, int allocation,
+			   int broadcast_flags, struct sock *one_sk)
+{
+	struct sock *sk;
+	struct sk_buff *skb2 = NULL;
+	int err = -ESRCH;
+
+	/* XXX Do we need something like netlink_overrun?  I think
+	 * XXX PF_KEY socket apps will not mind current behavior.
+	 */
+	if (!skb)
+		return -ENOMEM;
+
+	pfkey_lock_table();
+	for (sk = pfkey_table; sk; sk = sk->next) {
+		struct pfkey_opt *pfk = pfkey_sk(sk);
+		int err2;
+
+		/* Yes, it means that if you are meant to receive this
+		 * pfkey message you receive it twice as promiscuous
+		 * socket.
+		 */
+		if (pfk->promisc)
+			pfkey_broadcast_one(skb, &skb2, allocation, sk);
+
+		/* the exact target will be processed later */
+		if (sk == one_sk)
+			continue;
+		if (broadcast_flags != BROADCAST_ALL) {
+			if (broadcast_flags & BROADCAST_PROMISC_ONLY)
+				continue;
+			if ((broadcast_flags & BROADCAST_REGISTERED) &&
+			    !pfk->registered)
+				continue;
+			if (broadcast_flags & BROADCAST_ONE)
+				continue;
+		}
+
+		err2 = pfkey_broadcast_one(skb, &skb2, allocation, sk);
+
+		/* Error is cleare after succecful sending to at least one
+		 * registered KM */
+		if ((broadcast_flags & BROADCAST_REGISTERED) && err)
+			err = err2;
+	}
+	pfkey_unlock_table();
+
+	if (one_sk != NULL)
+		err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
+
+	if (skb2)
+		kfree_skb(skb2);
+	kfree_skb(skb);
+	return err;
+}
+
+static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig)
+{
+	*new = *orig;
+}
+
+static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk)
+{
+	struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
+	struct sadb_msg *hdr;
+
+	if (!skb)
+		return -ENOBUFS;
+
+	/* Woe be to the platform trying to support PFKEY yet
+	 * having normal errnos outside the 1-255 range, inclusive.
+	 */
+	err = -err;
+	if (err == ERESTARTSYS ||
+	    err == ERESTARTNOHAND ||
+	    err == ERESTARTNOINTR)
+		err = EINTR;
+	if (err >= 512)
+		err = EINVAL;
+	if (err <= 0 || err >= 256)
+		BUG();
+
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	pfkey_hdr_dup(hdr, orig);
+	hdr->sadb_msg_errno = (uint8_t) err;
+	hdr->sadb_msg_len = (sizeof(struct sadb_msg) /
+			     sizeof(uint64_t));
+
+	pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk);
+
+	return 0;
+}
+
+static u8 sadb_ext_min_len[] = {
+	[SADB_EXT_RESERVED]		= (u8) 0,
+	[SADB_EXT_SA]			= (u8) sizeof(struct sadb_sa),
+	[SADB_EXT_LIFETIME_CURRENT]	= (u8) sizeof(struct sadb_lifetime),
+	[SADB_EXT_LIFETIME_HARD]	= (u8) sizeof(struct sadb_lifetime),
+	[SADB_EXT_LIFETIME_SOFT]	= (u8) sizeof(struct sadb_lifetime),
+	[SADB_EXT_ADDRESS_SRC]		= (u8) sizeof(struct sadb_address),
+	[SADB_EXT_ADDRESS_DST]		= (u8) sizeof(struct sadb_address),
+	[SADB_EXT_ADDRESS_PROXY]	= (u8) sizeof(struct sadb_address),
+	[SADB_EXT_KEY_AUTH]		= (u8) sizeof(struct sadb_key),
+	[SADB_EXT_KEY_ENCRYPT]		= (u8) sizeof(struct sadb_key),
+	[SADB_EXT_IDENTITY_SRC]		= (u8) sizeof(struct sadb_ident),
+	[SADB_EXT_IDENTITY_DST]		= (u8) sizeof(struct sadb_ident),
+	[SADB_EXT_SENSITIVITY]		= (u8) sizeof(struct sadb_sens),
+	[SADB_EXT_PROPOSAL]		= (u8) sizeof(struct sadb_prop),
+	[SADB_EXT_SUPPORTED_AUTH]	= (u8) sizeof(struct sadb_supported),
+	[SADB_EXT_SUPPORTED_ENCRYPT]	= (u8) sizeof(struct sadb_supported),
+	[SADB_EXT_SPIRANGE]		= (u8) sizeof(struct sadb_spirange),
+	[SADB_X_EXT_KMPRIVATE]		= (u8) sizeof(struct sadb_x_kmprivate),
+	[SADB_X_EXT_POLICY]		= (u8) sizeof(struct sadb_x_policy),
+	[SADB_X_EXT_SA2]		= (u8) sizeof(struct sadb_x_sa2),
+	[SADB_X_EXT_NAT_T_TYPE]		= (u8) sizeof(struct sadb_x_nat_t_type),
+	[SADB_X_EXT_NAT_T_SPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
+	[SADB_X_EXT_NAT_T_DPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
+	[SADB_X_EXT_NAT_T_OA]		= (u8) sizeof(struct sadb_address),
+};
+
+/* Verify sadb_address_{len,prefixlen} against sa_family.  */
+static int verify_address_len(void *p)
+{
+	struct sadb_address *sp = p;
+	struct sockaddr *addr = (struct sockaddr *)(sp + 1);
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+	int len;
+
+	switch (addr->sa_family) {
+	case AF_INET:
+		len  = sizeof(*sp) + sizeof(*sin) + (sizeof(uint64_t) - 1);
+		len /= sizeof(uint64_t);
+		if (sp->sadb_address_len != len ||
+		    sp->sadb_address_prefixlen > 32)
+			return -EINVAL;
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		len  = sizeof(*sp) + sizeof(*sin6) + (sizeof(uint64_t) - 1);
+		len /= sizeof(uint64_t);
+		if (sp->sadb_address_len != len ||
+		    sp->sadb_address_prefixlen > 128)
+			return -EINVAL;
+		break;
+#endif
+	default:
+		/* It is user using kernel to keep track of security
+		 * associations for another protocol, such as
+		 * OSPF/RSVP/RIPV2/MIP.  It is user's job to verify
+		 * lengths.
+		 *
+		 * XXX Actually, association/policy database is not yet
+		 * XXX able to cope with arbitrary sockaddr families.
+		 * XXX When it can, remove this -EINVAL.  -DaveM
+		 */
+		return -EINVAL;
+		break;
+	};
+
+	return 0;
+}
+
+static int present_and_same_family(struct sadb_address *src,
+				   struct sadb_address *dst)
+{
+	struct sockaddr *s_addr, *d_addr;
+
+	if (!src || !dst)
+		return 0;
+
+	s_addr = (struct sockaddr *)(src + 1);
+	d_addr = (struct sockaddr *)(dst + 1);
+	if (s_addr->sa_family != d_addr->sa_family)
+		return 0;
+	if (s_addr->sa_family != AF_INET
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	    && s_addr->sa_family != AF_INET6
+#endif
+		)
+		return 0;
+
+	return 1;
+}
+
+static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	char *p = (char *) hdr;
+	int len = skb->len;
+
+	len -= sizeof(*hdr);
+	p += sizeof(*hdr);
+	while (len > 0) {
+		struct sadb_ext *ehdr = (struct sadb_ext *) p;
+		uint16_t ext_type;
+		int ext_len;
+
+		ext_len  = ehdr->sadb_ext_len;
+		ext_len *= sizeof(uint64_t);
+		ext_type = ehdr->sadb_ext_type;
+		if (ext_len < sizeof(uint64_t) ||
+		    ext_len > len ||
+		    ext_type == SADB_EXT_RESERVED)
+			return -EINVAL;
+
+		if (ext_type <= SADB_EXT_MAX) {
+			int min = (int) sadb_ext_min_len[ext_type];
+			if (ext_len < min)
+				return -EINVAL;
+			if (ext_hdrs[ext_type-1] != NULL)
+				return -EINVAL;
+			if (ext_type == SADB_EXT_ADDRESS_SRC ||
+			    ext_type == SADB_EXT_ADDRESS_DST ||
+			    ext_type == SADB_EXT_ADDRESS_PROXY ||
+			    ext_type == SADB_X_EXT_NAT_T_OA) {
+				if (verify_address_len(p))
+					return -EINVAL;
+			}				
+			ext_hdrs[ext_type-1] = p;
+		}
+		p   += ext_len;
+		len -= ext_len;
+	}
+
+	return 0;
+}
+
+static uint16_t
+pfkey_satype2proto(uint8_t satype)
+{
+	switch (satype) {
+	case SADB_SATYPE_UNSPEC:
+		return IPSEC_PROTO_ANY;
+	case SADB_SATYPE_AH:
+		return IPPROTO_AH;
+	case SADB_SATYPE_ESP:
+		return IPPROTO_ESP;
+	case SADB_X_SATYPE_IPCOMP:
+		return IPPROTO_COMP;
+		break;
+	default:
+		return 0;
+	}
+	/* NOTREACHED */
+}
+
+static uint8_t
+pfkey_proto2satype(uint16_t proto)
+{
+	switch (proto) {
+	case IPPROTO_AH:
+		return SADB_SATYPE_AH;
+	case IPPROTO_ESP:
+		return SADB_SATYPE_ESP;
+	case IPPROTO_COMP:
+		return SADB_X_SATYPE_IPCOMP;
+		break;
+	default:
+		return 0;
+	}
+	/* NOTREACHED */
+}
+
+/* BTW, this scheme means that there is no way with PFKEY2 sockets to
+ * say specifically 'just raw sockets' as we encode them as 255.
+ */
+
+static uint8_t pfkey_proto_to_xfrm(uint8_t proto)
+{
+	return (proto == IPSEC_PROTO_ANY ? 0 : proto);
+}
+
+static uint8_t pfkey_proto_from_xfrm(uint8_t proto)
+{
+	return (proto ? proto : IPSEC_PROTO_ANY);
+}
+
+static int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr,
+				     xfrm_address_t *xaddr)
+{
+	switch (((struct sockaddr*)(addr + 1))->sa_family) {
+	case AF_INET:
+		xaddr->a4 = 
+			((struct sockaddr_in *)(addr + 1))->sin_addr.s_addr;
+		return AF_INET;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		memcpy(xaddr->a6, 
+		       &((struct sockaddr_in6 *)(addr + 1))->sin6_addr,
+		       sizeof(struct in6_addr));
+		return AF_INET6;
+#endif
+	default:
+		return 0;
+	}
+	/* NOTREACHED */
+}
+
+static struct  xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct sadb_sa *sa;
+	struct sadb_address *addr;
+	uint16_t proto;
+	unsigned short family;
+	xfrm_address_t *xaddr;
+
+	sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
+	if (sa == NULL)
+		return NULL;
+
+	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
+	if (proto == 0)
+		return NULL;
+
+	/* sadb_address_len should be checked by caller */
+	addr = (struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1];
+	if (addr == NULL)
+		return NULL;
+
+	family = ((struct sockaddr *)(addr + 1))->sa_family;
+	switch (family) {
+	case AF_INET:
+		xaddr = (xfrm_address_t *)&((struct sockaddr_in *)(addr + 1))->sin_addr;
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		xaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(addr + 1))->sin6_addr;
+		break;
+#endif
+	default:
+		xaddr = NULL;
+	}
+
+	if (!xaddr)
+		return NULL;
+
+	return xfrm_state_lookup(xaddr, sa->sadb_sa_spi, proto, family);
+}
+
+#define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1)))
+static int
+pfkey_sockaddr_size(sa_family_t family)
+{
+	switch (family) {
+	case AF_INET:
+		return PFKEY_ALIGN8(sizeof(struct sockaddr_in));
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		return PFKEY_ALIGN8(sizeof(struct sockaddr_in6));
+#endif
+	default:
+		return 0;
+	}
+	/* NOTREACHED */
+}
+
+static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, int hsc)
+{
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+	struct sadb_sa *sa;
+	struct sadb_lifetime *lifetime;
+	struct sadb_address *addr;
+	struct sadb_key *key;
+	struct sadb_x_sa2 *sa2;
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+	int size;
+	int auth_key_size = 0;
+	int encrypt_key_size = 0;
+	int sockaddr_size;
+	struct xfrm_encap_tmpl *natt = NULL;
+
+	/* address family check */
+	sockaddr_size = pfkey_sockaddr_size(x->props.family);
+	if (!sockaddr_size)
+		ERR_PTR(-EINVAL);
+
+	/* base, SA, (lifetime (HSC),) address(SD), (address(P),)
+	   key(AE), (identity(SD),) (sensitivity)> */
+	size = sizeof(struct sadb_msg) +sizeof(struct sadb_sa) + 
+		sizeof(struct sadb_lifetime) +
+		((hsc & 1) ? sizeof(struct sadb_lifetime) : 0) +
+		((hsc & 2) ? sizeof(struct sadb_lifetime) : 0) +
+			sizeof(struct sadb_address)*2 + 
+				sockaddr_size*2 +
+					sizeof(struct sadb_x_sa2);
+	/* identity & sensitivity */
+
+	if ((x->props.family == AF_INET &&
+	     x->sel.saddr.a4 != x->props.saddr.a4)
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	    || (x->props.family == AF_INET6 &&
+		memcmp (x->sel.saddr.a6, x->props.saddr.a6, sizeof (struct in6_addr)))
+#endif
+		)
+		size += sizeof(struct sadb_address) + sockaddr_size;
+
+	if (add_keys) {
+		if (x->aalg && x->aalg->alg_key_len) {
+			auth_key_size = 
+				PFKEY_ALIGN8((x->aalg->alg_key_len + 7) / 8); 
+			size += sizeof(struct sadb_key) + auth_key_size;
+		}
+		if (x->ealg && x->ealg->alg_key_len) {
+			encrypt_key_size = 
+				PFKEY_ALIGN8((x->ealg->alg_key_len+7) / 8); 
+			size += sizeof(struct sadb_key) + encrypt_key_size;
+		}
+	}
+	if (x->encap)
+		natt = x->encap;
+
+	if (natt && natt->encap_type) {
+		size += sizeof(struct sadb_x_nat_t_type);
+		size += sizeof(struct sadb_x_nat_t_port);
+		size += sizeof(struct sadb_x_nat_t_port);
+	}
+
+	skb =  alloc_skb(size + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return ERR_PTR(-ENOBUFS);
+
+	/* call should fill header later */
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	memset(hdr, 0, size);	/* XXX do we need this ? */
+	hdr->sadb_msg_len = size / sizeof(uint64_t);
+
+	/* sa */
+	sa = (struct sadb_sa *)  skb_put(skb, sizeof(struct sadb_sa));
+	sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t);
+	sa->sadb_sa_exttype = SADB_EXT_SA;
+	sa->sadb_sa_spi = x->id.spi;
+	sa->sadb_sa_replay = x->props.replay_window;
+	sa->sadb_sa_state = SADB_SASTATE_DYING;
+	if (x->km.state == XFRM_STATE_VALID && !x->km.dying)
+		sa->sadb_sa_state = SADB_SASTATE_MATURE;
+	else if (x->km.state == XFRM_STATE_ACQ)
+		sa->sadb_sa_state = SADB_SASTATE_LARVAL;
+	else if (x->km.state == XFRM_STATE_EXPIRED)
+		sa->sadb_sa_state = SADB_SASTATE_DEAD;
+	sa->sadb_sa_auth = 0;
+	if (x->aalg) {
+		struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name);
+		sa->sadb_sa_auth = a ? a->desc.sadb_alg_id : 0;
+	}
+	sa->sadb_sa_encrypt = 0;
+	BUG_ON(x->ealg && x->calg);
+	if (x->ealg) {
+		struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name);
+		sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
+	}
+	/* KAME compatible: sadb_sa_encrypt is overloaded with calg id */
+	if (x->calg) {
+		struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name);
+		sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
+	}
+
+	sa->sadb_sa_flags = 0;
+	if (x->props.flags & XFRM_STATE_NOECN)
+		sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN;
+
+	/* hard time */
+	if (hsc & 2) {
+		lifetime = (struct sadb_lifetime *)  skb_put(skb, 
+							     sizeof(struct sadb_lifetime));
+		lifetime->sadb_lifetime_len =
+			sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+		lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
+		lifetime->sadb_lifetime_allocations =  _X2KEY(x->lft.hard_packet_limit);
+		lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.hard_byte_limit);
+		lifetime->sadb_lifetime_addtime = x->lft.hard_add_expires_seconds;
+		lifetime->sadb_lifetime_usetime = x->lft.hard_use_expires_seconds;
+	}
+	/* soft time */
+	if (hsc & 1) {
+		lifetime = (struct sadb_lifetime *)  skb_put(skb, 
+							     sizeof(struct sadb_lifetime));
+		lifetime->sadb_lifetime_len =
+			sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+		lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
+		lifetime->sadb_lifetime_allocations =  _X2KEY(x->lft.soft_packet_limit);
+		lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.soft_byte_limit);
+		lifetime->sadb_lifetime_addtime = x->lft.soft_add_expires_seconds;
+		lifetime->sadb_lifetime_usetime = x->lft.soft_use_expires_seconds;
+	}
+	/* current time */
+	lifetime = (struct sadb_lifetime *)  skb_put(skb,
+						     sizeof(struct sadb_lifetime));
+	lifetime->sadb_lifetime_len =
+		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
+	lifetime->sadb_lifetime_allocations = x->curlft.packets;
+	lifetime->sadb_lifetime_bytes = x->curlft.bytes;
+	lifetime->sadb_lifetime_addtime = x->curlft.add_time;
+	lifetime->sadb_lifetime_usetime = x->curlft.use_time;
+	/* src address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
+	/* "if the ports are non-zero, then the sadb_address_proto field, 
+	   normally zero, MUST be filled in with the transport 
+	   protocol's number." - RFC2367 */
+	addr->sadb_address_proto = 0; 
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		addr->sadb_address_prefixlen = 32;
+
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = x->props.saddr.a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+ 		addr->sadb_address_prefixlen = 128;
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+ 		memcpy(&sin6->sin6_addr, x->props.saddr.a6,
+		       sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+ 	}
+#endif
+	else
+		BUG();
+
+	/* dst address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
+	addr->sadb_address_proto = 0; 
+	addr->sadb_address_prefixlen = 32; /* XXX */ 
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = x->id.daddr.a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+
+		if (x->sel.saddr.a4 != x->props.saddr.a4) {
+			addr = (struct sadb_address*) skb_put(skb, 
+				sizeof(struct sadb_address)+sockaddr_size);
+			addr->sadb_address_len = 
+				(sizeof(struct sadb_address)+sockaddr_size)/
+				sizeof(uint64_t);
+			addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY;
+			addr->sadb_address_proto =
+				pfkey_proto_from_xfrm(x->sel.proto);
+			addr->sadb_address_prefixlen = x->sel.prefixlen_s;
+			addr->sadb_address_reserved = 0;
+
+			sin = (struct sockaddr_in *) (addr + 1);
+			sin->sin_family = AF_INET;
+			sin->sin_addr.s_addr = x->sel.saddr.a4;
+			sin->sin_port = x->sel.sport;
+			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+		}
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+		addr->sadb_address_prefixlen = 128;
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr, x->id.daddr.a6, sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+
+		if (memcmp (x->sel.saddr.a6, x->props.saddr.a6,
+			    sizeof(struct in6_addr))) {
+			addr = (struct sadb_address *) skb_put(skb, 
+				sizeof(struct sadb_address)+sockaddr_size);
+			addr->sadb_address_len = 
+				(sizeof(struct sadb_address)+sockaddr_size)/
+				sizeof(uint64_t);
+			addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY;
+			addr->sadb_address_proto =
+				pfkey_proto_from_xfrm(x->sel.proto);
+			addr->sadb_address_prefixlen = x->sel.prefixlen_s;
+			addr->sadb_address_reserved = 0;
+
+			sin6 = (struct sockaddr_in6 *) (addr + 1);
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_port = x->sel.sport;
+			sin6->sin6_flowinfo = 0;
+			memcpy(&sin6->sin6_addr, x->sel.saddr.a6,
+			       sizeof(struct in6_addr));
+			sin6->sin6_scope_id = 0;
+		}
+	}
+#endif
+	else
+		BUG();
+
+	/* auth key */
+	if (add_keys && auth_key_size) {
+		key = (struct sadb_key *) skb_put(skb, 
+						  sizeof(struct sadb_key)+auth_key_size);
+		key->sadb_key_len = (sizeof(struct sadb_key) + auth_key_size) /
+			sizeof(uint64_t);
+		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
+		key->sadb_key_bits = x->aalg->alg_key_len;
+		key->sadb_key_reserved = 0;
+		memcpy(key + 1, x->aalg->alg_key, (x->aalg->alg_key_len+7)/8);
+	}
+	/* encrypt key */
+	if (add_keys && encrypt_key_size) {
+		key = (struct sadb_key *) skb_put(skb, 
+						  sizeof(struct sadb_key)+encrypt_key_size);
+		key->sadb_key_len = (sizeof(struct sadb_key) + 
+				     encrypt_key_size) / sizeof(uint64_t);
+		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
+		key->sadb_key_bits = x->ealg->alg_key_len;
+		key->sadb_key_reserved = 0;
+		memcpy(key + 1, x->ealg->alg_key, 
+		       (x->ealg->alg_key_len+7)/8);
+	}
+
+	/* sa */
+	sa2 = (struct sadb_x_sa2 *)  skb_put(skb, sizeof(struct sadb_x_sa2));
+	sa2->sadb_x_sa2_len = sizeof(struct sadb_x_sa2)/sizeof(uint64_t);
+	sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2;
+	sa2->sadb_x_sa2_mode = x->props.mode + 1;
+	sa2->sadb_x_sa2_reserved1 = 0;
+	sa2->sadb_x_sa2_reserved2 = 0;
+	sa2->sadb_x_sa2_sequence = 0;
+	sa2->sadb_x_sa2_reqid = x->props.reqid;
+
+	if (natt && natt->encap_type) {
+		struct sadb_x_nat_t_type *n_type;
+		struct sadb_x_nat_t_port *n_port;
+
+		/* type */
+		n_type = (struct sadb_x_nat_t_type*) skb_put(skb, sizeof(*n_type));
+		n_type->sadb_x_nat_t_type_len = sizeof(*n_type)/sizeof(uint64_t);
+		n_type->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE;
+		n_type->sadb_x_nat_t_type_type = natt->encap_type;
+		n_type->sadb_x_nat_t_type_reserved[0] = 0;
+		n_type->sadb_x_nat_t_type_reserved[1] = 0;
+		n_type->sadb_x_nat_t_type_reserved[2] = 0;
+
+		/* source port */
+		n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+		n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
+		n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT;
+		n_port->sadb_x_nat_t_port_port = natt->encap_sport;
+		n_port->sadb_x_nat_t_port_reserved = 0;
+
+		/* dest port */
+		n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+		n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
+		n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT;
+		n_port->sadb_x_nat_t_port_port = natt->encap_dport;
+		n_port->sadb_x_nat_t_port_reserved = 0;
+	}
+
+	return skb;
+}
+
+static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, 
+						void **ext_hdrs)
+{
+	struct xfrm_state *x; 
+	struct sadb_lifetime *lifetime;
+	struct sadb_sa *sa;
+	struct sadb_key *key;
+	uint16_t proto;
+	
+
+	sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
+	if (!sa ||
+	    !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
+		return ERR_PTR(-EINVAL);
+	if (hdr->sadb_msg_satype == SADB_SATYPE_ESP &&
+	    !ext_hdrs[SADB_EXT_KEY_ENCRYPT-1])
+		return ERR_PTR(-EINVAL);
+	if (hdr->sadb_msg_satype == SADB_SATYPE_AH &&
+	    !ext_hdrs[SADB_EXT_KEY_AUTH-1])
+		return ERR_PTR(-EINVAL);
+	if (!!ext_hdrs[SADB_EXT_LIFETIME_HARD-1] !=
+	    !!ext_hdrs[SADB_EXT_LIFETIME_SOFT-1])
+		return ERR_PTR(-EINVAL);
+
+	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
+	if (proto == 0)
+		return ERR_PTR(-EINVAL);
+
+	/* RFC2367:
+
+   Only SADB_SASTATE_MATURE SAs may be submitted in an SADB_ADD message.
+   SADB_SASTATE_LARVAL SAs are created by SADB_GETSPI and it is not
+   sensible to add a new SA in the DYING or SADB_SASTATE_DEAD state.
+   Therefore, the sadb_sa_state field of all submitted SAs MUST be
+   SADB_SASTATE_MATURE and the kernel MUST return an error if this is
+   not true.
+
+           However, KAME setkey always uses SADB_SASTATE_LARVAL.
+	   Hence, we have to _ignore_ sadb_sa_state, which is also reasonable.
+	 */
+	if (sa->sadb_sa_auth > SADB_AALG_MAX ||
+	    (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP &&
+	     sa->sadb_sa_encrypt > SADB_X_CALG_MAX) ||
+	    sa->sadb_sa_encrypt > SADB_EALG_MAX)
+		return ERR_PTR(-EINVAL);
+	key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
+	if (key != NULL &&
+	    sa->sadb_sa_auth != SADB_X_AALG_NULL &&
+	    ((key->sadb_key_bits+7) / 8 == 0 ||
+	     (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+		return ERR_PTR(-EINVAL);
+	key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1];
+	if (key != NULL &&
+	    sa->sadb_sa_encrypt != SADB_EALG_NULL &&
+	    ((key->sadb_key_bits+7) / 8 == 0 ||
+	     (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+		return ERR_PTR(-EINVAL);
+
+	x = xfrm_state_alloc();
+	if (x == NULL)
+		return ERR_PTR(-ENOBUFS);
+
+	x->id.proto = proto;
+	x->id.spi = sa->sadb_sa_spi;
+	x->props.replay_window = sa->sadb_sa_replay;
+	if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN)
+		x->props.flags |= XFRM_STATE_NOECN;
+
+	lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1];
+	if (lifetime != NULL) {
+		x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
+		x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
+		x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime;
+		x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime;
+	}
+	lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1];
+	if (lifetime != NULL) {
+		x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
+		x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
+		x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime;
+		x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
+	}
+	key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
+	if (sa->sadb_sa_auth) {
+		int keysize = 0;
+		struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth);
+		if (!a)
+			goto out;
+		if (key)
+			keysize = (key->sadb_key_bits + 7) / 8;
+		x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL);
+		if (!x->aalg)
+			goto out;
+		strcpy(x->aalg->alg_name, a->name);
+		x->aalg->alg_key_len = 0;
+		if (key) {
+			x->aalg->alg_key_len = key->sadb_key_bits;
+			memcpy(x->aalg->alg_key, key+1, keysize);
+		}
+		x->props.aalgo = sa->sadb_sa_auth;
+		/* x->algo.flags = sa->sadb_sa_flags; */
+	}
+	if (sa->sadb_sa_encrypt) {
+		if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) {
+			struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt);
+			if (!a)
+				goto out;
+			x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL);
+			if (!x->calg)
+				goto out;
+			strcpy(x->calg->alg_name, a->name);
+			x->props.calgo = sa->sadb_sa_encrypt;
+		} else {
+			int keysize = 0;
+			struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt);
+			if (!a)
+				goto out;
+			key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_ENCRYPT-1];
+			if (key)
+				keysize = (key->sadb_key_bits + 7) / 8;
+			x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL);
+			if (!x->ealg)
+				goto out;
+			strcpy(x->ealg->alg_name, a->name);
+			x->ealg->alg_key_len = 0;
+			if (key) {
+				x->ealg->alg_key_len = key->sadb_key_bits;
+				memcpy(x->ealg->alg_key, key+1, keysize);
+			}
+			x->props.ealgo = sa->sadb_sa_encrypt;
+		}
+	}
+	/* x->algo.flags = sa->sadb_sa_flags; */
+
+	x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 
+						    &x->props.saddr);
+	if (!x->props.family)
+		goto out;
+	pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1], 
+				  &x->id.daddr);
+
+	if (ext_hdrs[SADB_X_EXT_SA2-1]) {
+		struct sadb_x_sa2 *sa2 = (void*)ext_hdrs[SADB_X_EXT_SA2-1];
+		x->props.mode = sa2->sadb_x_sa2_mode;
+		if (x->props.mode)
+			x->props.mode--;
+		x->props.reqid = sa2->sadb_x_sa2_reqid;
+	}
+
+	if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) {
+		struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1];
+
+		/* Nobody uses this, but we try. */
+		pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr);
+		x->sel.prefixlen_s = addr->sadb_address_prefixlen;
+	}
+
+	if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) {
+		struct sadb_x_nat_t_type* n_type;
+		struct xfrm_encap_tmpl *natt;
+
+		x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
+		if (!x->encap)
+			goto out;
+
+		natt = x->encap;
+		n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
+		natt->encap_type = n_type->sadb_x_nat_t_type_type;
+
+		if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) {
+			struct sadb_x_nat_t_port* n_port =
+				ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1];
+			natt->encap_sport = n_port->sadb_x_nat_t_port_port;
+		}
+		if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) {
+			struct sadb_x_nat_t_port* n_port =
+				ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1];
+			natt->encap_dport = n_port->sadb_x_nat_t_port_port;
+		}
+	}
+
+	x->type = xfrm_get_type(proto, x->props.family);
+	if (x->type == NULL)
+		goto out;
+	if (x->type->init_state(x, NULL))
+		goto out;
+	x->km.seq = hdr->sadb_msg_seq;
+	x->km.state = XFRM_STATE_VALID;
+	return x;
+
+out:
+	x->km.state = XFRM_STATE_DEAD;
+	xfrm_state_put(x);
+	return ERR_PTR(-ENOBUFS);
+}
+
+static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	return -EOPNOTSUPP;
+}
+
+static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct sk_buff *resp_skb;
+	struct sadb_x_sa2 *sa2;
+	struct sadb_address *saddr, *daddr;
+	struct sadb_msg *out_hdr;
+	struct xfrm_state *x = NULL;
+	u8 mode;
+	u32 reqid;
+	u8 proto;
+	unsigned short family;
+	xfrm_address_t *xsaddr = NULL, *xdaddr = NULL;
+
+	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
+		return -EINVAL;
+
+	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
+	if (proto == 0)
+		return -EINVAL;
+
+	if ((sa2 = ext_hdrs[SADB_X_EXT_SA2-1]) != NULL) {
+		mode = sa2->sadb_x_sa2_mode - 1;
+		reqid = sa2->sadb_x_sa2_reqid;
+	} else {
+		mode = 0;
+		reqid = 0;
+	}
+
+	saddr = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
+	daddr = ext_hdrs[SADB_EXT_ADDRESS_DST-1];
+
+	family = ((struct sockaddr *)(saddr + 1))->sa_family;
+	switch (family) {
+	case AF_INET:
+		xdaddr = (xfrm_address_t *)&((struct sockaddr_in *)(daddr + 1))->sin_addr.s_addr;
+		xsaddr = (xfrm_address_t *)&((struct sockaddr_in *)(saddr + 1))->sin_addr.s_addr;
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		xdaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(daddr + 1))->sin6_addr;
+		xsaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(saddr + 1))->sin6_addr;
+		break;
+#endif
+	}
+	if (xdaddr)
+		x = xfrm_find_acq(mode, reqid, proto, xdaddr, xsaddr, 1, family);
+
+	if (x == NULL)
+		return -ENOENT;
+
+	resp_skb = ERR_PTR(-ENOENT);
+
+	spin_lock_bh(&x->lock);
+	if (x->km.state != XFRM_STATE_DEAD) {
+		struct sadb_spirange *range = ext_hdrs[SADB_EXT_SPIRANGE-1];
+		u32 min_spi, max_spi;
+
+		if (range != NULL) {
+			min_spi = range->sadb_spirange_min;
+			max_spi = range->sadb_spirange_max;
+		} else {
+			min_spi = htonl(0x100);
+			max_spi = htonl(0x0fffffff);
+		}
+		xfrm_alloc_spi(x, min_spi, max_spi);
+		if (x->id.spi)
+			resp_skb = pfkey_xfrm_state2msg(x, 0, 3);
+	}
+	spin_unlock_bh(&x->lock);
+
+	if (IS_ERR(resp_skb)) {
+		xfrm_state_put(x);
+		return  PTR_ERR(resp_skb);
+	}
+
+	out_hdr = (struct sadb_msg *) resp_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = SADB_GETSPI;
+	out_hdr->sadb_msg_satype = pfkey_proto2satype(proto);
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_reserved = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+
+	xfrm_state_put(x);
+
+	pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk);
+
+	return 0;
+}
+
+static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct xfrm_state *x;
+
+	if (hdr->sadb_msg_len != sizeof(struct sadb_msg)/8)
+		return -EOPNOTSUPP;
+
+	if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
+		return 0;
+
+	x = xfrm_find_acq_byseq(hdr->sadb_msg_seq);
+	if (x == NULL)
+		return 0;
+
+	spin_lock_bh(&x->lock);
+	if (x->km.state == XFRM_STATE_ACQ) {
+		x->km.state = XFRM_STATE_ERROR;
+		wake_up(&km_waitq);
+	}
+	spin_unlock_bh(&x->lock);
+	xfrm_state_put(x);
+	return 0;
+}
+
+
+static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+	struct xfrm_state *x;
+	int err;
+
+	xfrm_probe_algs();
+	
+	x = pfkey_msg2xfrm_state(hdr, ext_hdrs);
+	if (IS_ERR(x))
+		return PTR_ERR(x);
+
+	if (hdr->sadb_msg_type == SADB_ADD)
+		err = xfrm_state_add(x);
+	else
+		err = xfrm_state_update(x);
+
+	if (err < 0) {
+		x->km.state = XFRM_STATE_DEAD;
+		xfrm_state_put(x);
+		return err;
+	}
+
+	out_skb = pfkey_xfrm_state2msg(x, 0, 3);
+	if (IS_ERR(out_skb))
+		return  PTR_ERR(out_skb); /* XXX Should we return 0 here ? */
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = hdr->sadb_msg_type;
+	out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_reserved = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
+
+	return 0;
+}
+
+static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct xfrm_state *x;
+
+	if (!ext_hdrs[SADB_EXT_SA-1] ||
+	    !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
+		return -EINVAL;
+
+	x = pfkey_xfrm_state_lookup(hdr, ext_hdrs);
+	if (x == NULL)
+		return -ESRCH;
+
+	if (xfrm_state_kern(x)) {
+		xfrm_state_put(x);
+		return -EPERM;
+	}
+	
+	xfrm_state_delete(x);
+	xfrm_state_put(x);
+
+	pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, 
+			BROADCAST_ALL, sk);
+
+	return 0;
+}
+
+static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+	struct xfrm_state *x;
+
+	if (!ext_hdrs[SADB_EXT_SA-1] ||
+	    !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
+		return -EINVAL;
+
+	x = pfkey_xfrm_state_lookup(hdr, ext_hdrs);
+	if (x == NULL)
+		return -ESRCH;
+
+	out_skb = pfkey_xfrm_state2msg(x, 1, 3);
+	xfrm_state_put(x);
+	if (IS_ERR(out_skb))
+		return  PTR_ERR(out_skb);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = SADB_DUMP;
+	out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_reserved = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk);
+
+	return 0;
+}
+
+static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig, int allocation)
+{
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+	int len, auth_len, enc_len, i;
+
+	auth_len = xfrm_count_auth_supported();
+	if (auth_len) {
+		auth_len *= sizeof(struct sadb_alg);
+		auth_len += sizeof(struct sadb_supported);
+	}
+	
+	enc_len = xfrm_count_enc_supported();
+	if (enc_len) {
+		enc_len *= sizeof(struct sadb_alg);
+		enc_len += sizeof(struct sadb_supported);
+	}
+	
+	len = enc_len + auth_len + sizeof(struct sadb_msg);
+
+	skb = alloc_skb(len + 16, allocation);
+	if (!skb)
+		goto out_put_algs;
+
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(*hdr));
+	pfkey_hdr_dup(hdr, orig);
+	hdr->sadb_msg_errno = 0;
+	hdr->sadb_msg_len = len / sizeof(uint64_t);
+
+	if (auth_len) {
+		struct sadb_supported *sp;
+		struct sadb_alg *ap;
+
+		sp = (struct sadb_supported *) skb_put(skb, auth_len);
+		ap = (struct sadb_alg *) (sp + 1);
+
+		sp->sadb_supported_len = auth_len / sizeof(uint64_t);
+		sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
+
+		for (i = 0; ; i++) {
+			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
+			if (!aalg)
+				break;
+			if (aalg->available)
+				*ap++ = aalg->desc;
+		}
+	}
+
+	if (enc_len) {
+		struct sadb_supported *sp;
+		struct sadb_alg *ap;
+
+		sp = (struct sadb_supported *) skb_put(skb, enc_len);
+		ap = (struct sadb_alg *) (sp + 1);
+
+		sp->sadb_supported_len = enc_len / sizeof(uint64_t);
+		sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT;
+
+		for (i = 0; ; i++) {
+			struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
+			if (!ealg)
+				break;
+			if (ealg->available)
+				*ap++ = ealg->desc;
+		}
+	}
+
+out_put_algs:
+	return skb;
+}
+
+static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct pfkey_opt *pfk = pfkey_sk(sk);
+	struct sk_buff *supp_skb;
+
+	if (hdr->sadb_msg_satype > SADB_SATYPE_MAX)
+		return -EINVAL;
+
+	if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) {
+		if (pfk->registered&(1<<hdr->sadb_msg_satype))
+			return -EEXIST;
+		pfk->registered |= (1<<hdr->sadb_msg_satype);
+	}
+
+	xfrm_probe_algs();
+	
+	supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
+	if (!supp_skb) {
+		if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
+			pfk->registered &= ~(1<<hdr->sadb_msg_satype);
+
+		return -ENOBUFS;
+	}
+
+	pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk);
+
+	return 0;
+}
+
+static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	unsigned proto;
+	struct sk_buff *skb_out;
+	struct sadb_msg *hdr_out;
+
+	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
+	if (proto == 0)
+		return -EINVAL;
+
+	skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
+	if (!skb_out)
+		return -ENOBUFS;
+
+	xfrm_state_flush(proto);
+
+	hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
+	pfkey_hdr_dup(hdr_out, hdr);
+	hdr_out->sadb_msg_errno = (uint8_t) 0;
+	hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+
+	pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL);
+
+	return 0;
+}
+
+struct pfkey_dump_data
+{
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+	struct sock *sk;
+};
+
+static int dump_sa(struct xfrm_state *x, int count, void *ptr)
+{
+	struct pfkey_dump_data *data = ptr;
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+
+	out_skb = pfkey_xfrm_state2msg(x, 1, 3);
+	if (IS_ERR(out_skb))
+		return PTR_ERR(out_skb);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = data->hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = SADB_DUMP;
+	out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_reserved = 0;
+	out_hdr->sadb_msg_seq = count;
+	out_hdr->sadb_msg_pid = data->hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, data->sk);
+	return 0;
+}
+
+static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	u8 proto;
+	struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk };
+
+	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
+	if (proto == 0)
+		return -EINVAL;
+
+	return xfrm_state_walk(proto, dump_sa, &data);
+}
+
+static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct pfkey_opt *pfk = pfkey_sk(sk);
+	int satype = hdr->sadb_msg_satype;
+
+	if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) {
+		/* XXX we mangle packet... */
+		hdr->sadb_msg_errno = 0;
+		if (satype != 0 && satype != 1)
+			return -EINVAL;
+		pfk->promisc = satype;
+	}
+	pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL);
+	return 0;
+}
+
+static int check_reqid(struct xfrm_policy *xp, int dir, int count, void *ptr)
+{
+	int i;
+	u32 reqid = *(u32*)ptr;
+
+	for (i=0; i<xp->xfrm_nr; i++) {
+		if (xp->xfrm_vec[i].reqid == reqid)
+			return -EEXIST;
+	}
+	return 0;
+}
+
+static u32 gen_reqid(void)
+{
+	u32 start;
+	static u32 reqid = IPSEC_MANUAL_REQID_MAX;
+
+	start = reqid;
+	do {
+		++reqid;
+		if (reqid == 0)
+			reqid = IPSEC_MANUAL_REQID_MAX+1;
+		if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST)
+			return reqid;
+	} while (reqid != start);
+	return 0;
+}
+
+static int
+parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
+{
+	struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr;
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+
+	if (xp->xfrm_nr >= XFRM_MAX_DEPTH)
+		return -ELOOP;
+
+	if (rq->sadb_x_ipsecrequest_mode == 0)
+		return -EINVAL;
+
+	t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */
+	t->mode = rq->sadb_x_ipsecrequest_mode-1;
+	if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE)
+		t->optional = 1;
+	else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) {
+		t->reqid = rq->sadb_x_ipsecrequest_reqid;
+		if (t->reqid > IPSEC_MANUAL_REQID_MAX)
+			t->reqid = 0;
+		if (!t->reqid && !(t->reqid = gen_reqid()))
+			return -ENOBUFS;
+	}
+
+	/* addresses present only in tunnel mode */
+	if (t->mode) {
+		switch (xp->family) {
+		case AF_INET:
+			sin = (void*)(rq+1);
+			if (sin->sin_family != AF_INET)
+				return -EINVAL;
+			t->saddr.a4 = sin->sin_addr.s_addr;
+			sin++;
+			if (sin->sin_family != AF_INET)
+				return -EINVAL;
+			t->id.daddr.a4 = sin->sin_addr.s_addr;
+			break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		case AF_INET6:
+			sin6 = (void *)(rq+1);
+			if (sin6->sin6_family != AF_INET6)
+				return -EINVAL;
+			memcpy(t->saddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr));
+			sin6++;
+			if (sin6->sin6_family != AF_INET6)
+				return -EINVAL;
+			memcpy(t->id.daddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr));
+			break;
+#endif
+		default:
+			return -EINVAL;
+		}
+	}
+	/* No way to set this via kame pfkey */
+	t->aalgos = t->ealgos = t->calgos = ~0;
+	xp->xfrm_nr++;
+	return 0;
+}
+
+static int
+parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
+{
+	int err;
+	int len = pol->sadb_x_policy_len*8 - sizeof(struct sadb_x_policy);
+	struct sadb_x_ipsecrequest *rq = (void*)(pol+1);
+
+	while (len >= sizeof(struct sadb_x_ipsecrequest)) {
+		if ((err = parse_ipsecrequest(xp, rq)) < 0)
+			return err;
+		len -= rq->sadb_x_ipsecrequest_len;
+		rq = (void*)((u8*)rq + rq->sadb_x_ipsecrequest_len);
+	}
+	return 0;
+}
+
+static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
+{
+	int sockaddr_size = pfkey_sockaddr_size(xp->family);
+	int socklen = (xp->family == AF_INET ?
+		       sizeof(struct sockaddr_in) :
+		       sizeof(struct sockaddr_in6));
+
+	return sizeof(struct sadb_msg) +
+		(sizeof(struct sadb_lifetime) * 3) +
+		(sizeof(struct sadb_address) * 2) + 
+		(sockaddr_size * 2) +
+		sizeof(struct sadb_x_policy) +
+		(xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) +
+				(socklen * 2)));
+}
+
+static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
+{
+	struct sk_buff *skb;
+	int size;
+
+	size = pfkey_xfrm_policy2msg_size(xp);
+
+	skb =  alloc_skb(size + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return ERR_PTR(-ENOBUFS);
+
+	return skb;
+}
+
+static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir)
+{
+	struct sadb_msg *hdr;
+	struct sadb_address *addr;
+	struct sadb_lifetime *lifetime;
+	struct sadb_x_policy *pol;
+	struct sockaddr_in   *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6  *sin6;
+#endif
+	int i;
+	int size;
+	int sockaddr_size = pfkey_sockaddr_size(xp->family);
+	int socklen = (xp->family == AF_INET ?
+		       sizeof(struct sockaddr_in) :
+		       sizeof(struct sockaddr_in6));
+
+	size = pfkey_xfrm_policy2msg_size(xp);
+
+	/* call should fill header later */
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	memset(hdr, 0, size);	/* XXX do we need this ? */
+
+	/* src address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
+	addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto);
+	addr->sadb_address_prefixlen = xp->selector.prefixlen_s;
+	addr->sadb_address_reserved = 0;
+	/* src address */
+	if (xp->family == AF_INET) {
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = xp->selector.saddr.a4;
+		sin->sin_port = xp->selector.sport;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (xp->family == AF_INET6) {
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = xp->selector.sport;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr, xp->selector.saddr.a6,
+		       sizeof(struct in6_addr));;
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+
+	/* dst address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len =
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
+	addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto);
+	addr->sadb_address_prefixlen = xp->selector.prefixlen_d; 
+	addr->sadb_address_reserved = 0;
+	if (xp->family == AF_INET) {
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = xp->selector.daddr.a4;
+		sin->sin_port = xp->selector.dport;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (xp->family == AF_INET6) {
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = xp->selector.dport;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr, xp->selector.daddr.a6,
+		       sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+
+	/* hard time */
+	lifetime = (struct sadb_lifetime *)  skb_put(skb, 
+						     sizeof(struct sadb_lifetime));
+	lifetime->sadb_lifetime_len =
+		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
+	lifetime->sadb_lifetime_allocations =  _X2KEY(xp->lft.hard_packet_limit);
+	lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.hard_byte_limit);
+	lifetime->sadb_lifetime_addtime = xp->lft.hard_add_expires_seconds;
+	lifetime->sadb_lifetime_usetime = xp->lft.hard_use_expires_seconds;
+	/* soft time */
+	lifetime = (struct sadb_lifetime *)  skb_put(skb, 
+						     sizeof(struct sadb_lifetime));
+	lifetime->sadb_lifetime_len =
+		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
+	lifetime->sadb_lifetime_allocations =  _X2KEY(xp->lft.soft_packet_limit);
+	lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.soft_byte_limit);
+	lifetime->sadb_lifetime_addtime = xp->lft.soft_add_expires_seconds;
+	lifetime->sadb_lifetime_usetime = xp->lft.soft_use_expires_seconds;
+	/* current time */
+	lifetime = (struct sadb_lifetime *)  skb_put(skb, 
+						     sizeof(struct sadb_lifetime));
+	lifetime->sadb_lifetime_len =
+		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
+	lifetime->sadb_lifetime_allocations = xp->curlft.packets;
+	lifetime->sadb_lifetime_bytes = xp->curlft.bytes;
+	lifetime->sadb_lifetime_addtime = xp->curlft.add_time;
+	lifetime->sadb_lifetime_usetime = xp->curlft.use_time;
+
+	pol = (struct sadb_x_policy *)  skb_put(skb, sizeof(struct sadb_x_policy));
+	pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
+	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
+	pol->sadb_x_policy_type = IPSEC_POLICY_DISCARD;
+	if (xp->action == XFRM_POLICY_ALLOW) {
+		if (xp->xfrm_nr)
+			pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
+		else
+			pol->sadb_x_policy_type = IPSEC_POLICY_NONE;
+	}
+	pol->sadb_x_policy_dir = dir+1;
+	pol->sadb_x_policy_id = xp->index;
+
+	for (i=0; i<xp->xfrm_nr; i++) {
+		struct sadb_x_ipsecrequest *rq;
+		struct xfrm_tmpl *t = xp->xfrm_vec + i;
+		int req_size;
+
+		req_size = sizeof(struct sadb_x_ipsecrequest);
+		if (t->mode)
+			req_size += 2*socklen;
+		else
+			size -= 2*socklen;
+		rq = (void*)skb_put(skb, req_size);
+		pol->sadb_x_policy_len += req_size/8;
+		memset(rq, 0, sizeof(*rq));
+		rq->sadb_x_ipsecrequest_len = req_size;
+		rq->sadb_x_ipsecrequest_proto = t->id.proto;
+		rq->sadb_x_ipsecrequest_mode = t->mode+1;
+		rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_REQUIRE;
+		if (t->reqid)
+			rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_UNIQUE;
+		if (t->optional)
+			rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE;
+		rq->sadb_x_ipsecrequest_reqid = t->reqid;
+		if (t->mode) {
+			switch (xp->family) {
+			case AF_INET:
+				sin = (void*)(rq+1);
+				sin->sin_family = AF_INET;
+				sin->sin_addr.s_addr = t->saddr.a4;
+				sin->sin_port = 0;
+				memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+				sin++;
+				sin->sin_family = AF_INET;
+				sin->sin_addr.s_addr = t->id.daddr.a4;
+				sin->sin_port = 0;
+				memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+				break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+			case AF_INET6:
+				sin6 = (void*)(rq+1);
+				sin6->sin6_family = AF_INET6;
+				sin6->sin6_port = 0;
+				sin6->sin6_flowinfo = 0;
+				memcpy(&sin6->sin6_addr, t->saddr.a6,
+				       sizeof(struct in6_addr));
+				sin6->sin6_scope_id = 0;
+
+				sin6++;
+				sin6->sin6_family = AF_INET6;
+				sin6->sin6_port = 0;
+				sin6->sin6_flowinfo = 0;
+				memcpy(&sin6->sin6_addr, t->id.daddr.a6,
+				       sizeof(struct in6_addr));
+				sin6->sin6_scope_id = 0;
+				break;
+#endif
+			default:
+				break;
+			}
+		}
+	}
+	hdr->sadb_msg_len = size / sizeof(uint64_t);
+	hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
+}
+
+static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	int err;
+	struct sadb_lifetime *lifetime;
+	struct sadb_address *sa;
+	struct sadb_x_policy *pol;
+	struct xfrm_policy *xp;
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+
+	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
+	    !ext_hdrs[SADB_X_EXT_POLICY-1])
+		return -EINVAL;
+
+	pol = ext_hdrs[SADB_X_EXT_POLICY-1];
+	if (pol->sadb_x_policy_type > IPSEC_POLICY_IPSEC)
+		return -EINVAL;
+	if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX)
+		return -EINVAL;
+
+	xp = xfrm_policy_alloc(GFP_KERNEL);
+	if (xp == NULL)
+		return -ENOBUFS;
+
+	xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ?
+		      XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW);
+
+	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 
+	xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr);
+	if (!xp->family) {
+		err = -EINVAL;
+		goto out;
+	}
+	xp->selector.family = xp->family;
+	xp->selector.prefixlen_s = sa->sadb_address_prefixlen;
+	xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
+	xp->selector.sport = ((struct sockaddr_in *)(sa+1))->sin_port;
+	if (xp->selector.sport)
+		xp->selector.sport_mask = ~0;
+
+	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], 
+	pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr);
+	xp->selector.prefixlen_d = sa->sadb_address_prefixlen;
+
+	/* Amusing, we set this twice.  KAME apps appear to set same value
+	 * in both addresses.
+	 */
+	xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
+
+	xp->selector.dport = ((struct sockaddr_in *)(sa+1))->sin_port;
+	if (xp->selector.dport)
+		xp->selector.dport_mask = ~0;
+
+	xp->lft.soft_byte_limit = XFRM_INF;
+	xp->lft.hard_byte_limit = XFRM_INF;
+	xp->lft.soft_packet_limit = XFRM_INF;
+	xp->lft.hard_packet_limit = XFRM_INF;
+	if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD-1]) != NULL) {
+		xp->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
+		xp->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
+		xp->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime;
+		xp->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime;
+	}
+	if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]) != NULL) {
+		xp->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
+		xp->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
+		xp->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime;
+		xp->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
+	}
+	xp->xfrm_nr = 0;
+	if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
+	    (err = parse_ipsecrequests(xp, pol)) < 0)
+		goto out;
+
+	out_skb = pfkey_xfrm_policy2msg_prep(xp);
+	if (IS_ERR(out_skb)) {
+		err =  PTR_ERR(out_skb);
+		goto out;
+	}
+
+	err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
+				 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
+	if (err) {
+		kfree_skb(out_skb);
+		goto out;
+	}
+
+	pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
+
+	xfrm_pol_put(xp);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = hdr->sadb_msg_type;
+	out_hdr->sadb_msg_satype = 0;
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
+	return 0;
+
+out:
+	kfree(xp);
+	return err;
+}
+
+static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	int err;
+	struct sadb_address *sa;
+	struct sadb_x_policy *pol;
+	struct xfrm_policy *xp;
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+	struct xfrm_selector sel;
+
+	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
+	    !ext_hdrs[SADB_X_EXT_POLICY-1])
+		return -EINVAL;
+
+	pol = ext_hdrs[SADB_X_EXT_POLICY-1];
+	if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX)
+		return -EINVAL;
+
+	memset(&sel, 0, sizeof(sel));
+
+	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 
+	pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr);
+	sel.prefixlen_s = sa->sadb_address_prefixlen;
+	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
+	sel.sport = ((struct sockaddr_in *)(sa+1))->sin_port;
+	if (sel.sport)
+		sel.sport_mask = ~0;
+
+	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], 
+	pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr);
+	sel.prefixlen_d = sa->sadb_address_prefixlen;
+	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
+	sel.dport = ((struct sockaddr_in *)(sa+1))->sin_port;
+	if (sel.dport)
+		sel.dport_mask = ~0;
+
+	xp = xfrm_policy_bysel(pol->sadb_x_policy_dir-1, &sel, 1);
+	if (xp == NULL)
+		return -ENOENT;
+
+	err = 0;
+
+	out_skb = pfkey_xfrm_policy2msg_prep(xp);
+	if (IS_ERR(out_skb)) {
+		err =  PTR_ERR(out_skb);
+		goto out;
+	}
+	pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = SADB_X_SPDDELETE;
+	out_hdr->sadb_msg_satype = 0;
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
+	err = 0;
+
+out:
+	xfrm_pol_put(xp);
+	return err;
+}
+
+static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	int err;
+	struct sadb_x_policy *pol;
+	struct xfrm_policy *xp;
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+
+	if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL)
+		return -EINVAL;
+
+	xp = xfrm_policy_byid(0, pol->sadb_x_policy_id,
+			      hdr->sadb_msg_type == SADB_X_SPDDELETE2);
+	if (xp == NULL)
+		return -ENOENT;
+
+	err = 0;
+
+	out_skb = pfkey_xfrm_policy2msg_prep(xp);
+	if (IS_ERR(out_skb)) {
+		err =  PTR_ERR(out_skb);
+		goto out;
+	}
+	pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = hdr->sadb_msg_type;
+	out_hdr->sadb_msg_satype = 0;
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
+	err = 0;
+
+out:
+	xfrm_pol_put(xp);
+	return err;
+}
+
+static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
+{
+	struct pfkey_dump_data *data = ptr;
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+
+	out_skb = pfkey_xfrm_policy2msg_prep(xp);
+	if (IS_ERR(out_skb))
+		return PTR_ERR(out_skb);
+
+	pfkey_xfrm_policy2msg(out_skb, xp, dir);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = data->hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = SADB_X_SPDDUMP;
+	out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_seq = count;
+	out_hdr->sadb_msg_pid = data->hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, data->sk);
+	return 0;
+}
+
+static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk };
+
+	return xfrm_policy_walk(dump_sp, &data);
+}
+
+static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct sk_buff *skb_out;
+	struct sadb_msg *hdr_out;
+
+	skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
+	if (!skb_out)
+		return -ENOBUFS;
+
+	xfrm_policy_flush();
+
+	hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
+	pfkey_hdr_dup(hdr_out, hdr);
+	hdr_out->sadb_msg_errno = (uint8_t) 0;
+	hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+	pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL);
+
+	return 0;
+}
+
+typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb,
+			     struct sadb_msg *hdr, void **ext_hdrs);
+static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
+	[SADB_RESERVED]		= pfkey_reserved,
+	[SADB_GETSPI]		= pfkey_getspi,
+	[SADB_UPDATE]		= pfkey_add,
+	[SADB_ADD]		= pfkey_add,
+	[SADB_DELETE]		= pfkey_delete,
+	[SADB_GET]		= pfkey_get,
+	[SADB_ACQUIRE]		= pfkey_acquire,
+	[SADB_REGISTER]		= pfkey_register,
+	[SADB_EXPIRE]		= NULL,
+	[SADB_FLUSH]		= pfkey_flush,
+	[SADB_DUMP]		= pfkey_dump,
+	[SADB_X_PROMISC]	= pfkey_promisc,
+	[SADB_X_PCHANGE]	= NULL,
+	[SADB_X_SPDUPDATE]	= pfkey_spdadd,
+	[SADB_X_SPDADD]		= pfkey_spdadd,
+	[SADB_X_SPDDELETE]	= pfkey_spddelete,
+	[SADB_X_SPDGET]		= pfkey_spdget,
+	[SADB_X_SPDACQUIRE]	= NULL,
+	[SADB_X_SPDDUMP]	= pfkey_spddump,
+	[SADB_X_SPDFLUSH]	= pfkey_spdflush,
+	[SADB_X_SPDSETIDX]	= pfkey_spdadd,
+	[SADB_X_SPDDELETE2]	= pfkey_spdget,
+};
+
+static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr)
+{
+	void *ext_hdrs[SADB_EXT_MAX];
+	int err;
+
+	pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
+			BROADCAST_PROMISC_ONLY, NULL);
+
+	memset(ext_hdrs, 0, sizeof(ext_hdrs));
+	err = parse_exthdrs(skb, hdr, ext_hdrs);
+	if (!err) {
+		err = -EOPNOTSUPP;
+		if (pfkey_funcs[hdr->sadb_msg_type])
+			err = pfkey_funcs[hdr->sadb_msg_type](sk, skb, hdr, ext_hdrs);
+	}
+	return err;
+}
+
+static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp)
+{
+	struct sadb_msg *hdr = NULL;
+
+	if (skb->len < sizeof(*hdr)) {
+		*errp = -EMSGSIZE;
+	} else {
+		hdr = (struct sadb_msg *) skb->data;
+		if (hdr->sadb_msg_version != PF_KEY_V2 ||
+		    hdr->sadb_msg_reserved != 0 ||
+		    (hdr->sadb_msg_type <= SADB_RESERVED ||
+		     hdr->sadb_msg_type > SADB_MAX)) {
+			hdr = NULL;
+			*errp = -EINVAL;
+		} else if (hdr->sadb_msg_len != (skb->len /
+						 sizeof(uint64_t)) ||
+			   hdr->sadb_msg_len < (sizeof(struct sadb_msg) /
+						sizeof(uint64_t))) {
+			hdr = NULL;
+			*errp = -EMSGSIZE;
+		} else {
+			*errp = 0;
+		}
+	}
+	return hdr;
+}
+
+static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
+{
+	return t->aalgos & (1 << d->desc.sadb_alg_id);
+}
+
+static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
+{
+	return t->ealgos & (1 << d->desc.sadb_alg_id);
+}
+
+static int count_ah_combs(struct xfrm_tmpl *t)
+{
+	int i, sz = 0;
+
+	for (i = 0; ; i++) {
+		struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
+		if (!aalg)
+			break;
+		if (aalg_tmpl_set(t, aalg) && aalg->available)
+			sz += sizeof(struct sadb_comb);
+	}
+	return sz + sizeof(struct sadb_prop);
+}
+
+static int count_esp_combs(struct xfrm_tmpl *t)
+{
+	int i, k, sz = 0;
+
+	for (i = 0; ; i++) {
+		struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
+		if (!ealg)
+			break;
+			
+		if (!(ealg_tmpl_set(t, ealg) && ealg->available))
+			continue;
+			
+		for (k = 1; ; k++) {
+			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
+			if (!aalg)
+				break;
+				
+			if (aalg_tmpl_set(t, aalg) && aalg->available)
+				sz += sizeof(struct sadb_comb);
+		}
+	}
+	return sz + sizeof(struct sadb_prop);
+}
+
+static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
+{
+	struct sadb_prop *p;
+	int i;
+
+	p = (struct sadb_prop*)skb_put(skb, sizeof(struct sadb_prop));
+	p->sadb_prop_len = sizeof(struct sadb_prop)/8;
+	p->sadb_prop_exttype = SADB_EXT_PROPOSAL;
+	p->sadb_prop_replay = 32;
+	memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
+
+	for (i = 0; ; i++) {
+		struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
+		if (!aalg)
+			break;
+
+		if (aalg_tmpl_set(t, aalg) && aalg->available) {
+			struct sadb_comb *c;
+			c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
+			memset(c, 0, sizeof(*c));
+			p->sadb_prop_len += sizeof(struct sadb_comb)/8;
+			c->sadb_comb_auth = aalg->desc.sadb_alg_id;
+			c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits;
+			c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits;
+			c->sadb_comb_hard_addtime = 24*60*60;
+			c->sadb_comb_soft_addtime = 20*60*60;
+			c->sadb_comb_hard_usetime = 8*60*60;
+			c->sadb_comb_soft_usetime = 7*60*60;
+		}
+	}
+}
+
+static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
+{
+	struct sadb_prop *p;
+	int i, k;
+
+	p = (struct sadb_prop*)skb_put(skb, sizeof(struct sadb_prop));
+	p->sadb_prop_len = sizeof(struct sadb_prop)/8;
+	p->sadb_prop_exttype = SADB_EXT_PROPOSAL;
+	p->sadb_prop_replay = 32;
+	memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
+
+	for (i=0; ; i++) {
+		struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
+		if (!ealg)
+			break;
+	
+		if (!(ealg_tmpl_set(t, ealg) && ealg->available))
+			continue;
+			
+		for (k = 1; ; k++) {
+			struct sadb_comb *c;
+			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
+			if (!aalg)
+				break;
+			if (!(aalg_tmpl_set(t, aalg) && aalg->available))
+				continue;
+			c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
+			memset(c, 0, sizeof(*c));
+			p->sadb_prop_len += sizeof(struct sadb_comb)/8;
+			c->sadb_comb_auth = aalg->desc.sadb_alg_id;
+			c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits;
+			c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits;
+			c->sadb_comb_encrypt = ealg->desc.sadb_alg_id;
+			c->sadb_comb_encrypt_minbits = ealg->desc.sadb_alg_minbits;
+			c->sadb_comb_encrypt_maxbits = ealg->desc.sadb_alg_maxbits;
+			c->sadb_comb_hard_addtime = 24*60*60;
+			c->sadb_comb_soft_addtime = 20*60*60;
+			c->sadb_comb_hard_usetime = 8*60*60;
+			c->sadb_comb_soft_usetime = 7*60*60;
+		}
+	}
+}
+
+static int pfkey_send_notify(struct xfrm_state *x, int hard)
+{
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+	int hsc = (hard ? 2 : 1);
+
+	out_skb = pfkey_xfrm_state2msg(x, 0, hsc);
+	if (IS_ERR(out_skb))
+		return PTR_ERR(out_skb);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = PF_KEY_V2;
+	out_hdr->sadb_msg_type = SADB_EXPIRE;
+	out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_reserved = 0;
+	out_hdr->sadb_msg_seq = 0;
+	out_hdr->sadb_msg_pid = 0;
+
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
+	return 0;
+}
+
+static u32 get_acqseq(void)
+{
+	u32 res;
+	static u32 acqseq;
+	static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
+
+	spin_lock_bh(&acqseq_lock);
+	res = (++acqseq ? : ++acqseq);
+	spin_unlock_bh(&acqseq_lock);
+	return res;
+}
+
+static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp, int dir)
+{
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+	struct sadb_address *addr;
+	struct sadb_x_policy *pol;
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+	int sockaddr_size;
+	int size;
+	
+	sockaddr_size = pfkey_sockaddr_size(x->props.family);
+	if (!sockaddr_size)
+		return -EINVAL;
+
+	size = sizeof(struct sadb_msg) +
+		(sizeof(struct sadb_address) * 2) +
+		(sockaddr_size * 2) +
+		sizeof(struct sadb_x_policy);
+	
+	if (x->id.proto == IPPROTO_AH)
+		size += count_ah_combs(t);
+	else if (x->id.proto == IPPROTO_ESP)
+		size += count_esp_combs(t);
+
+	skb =  alloc_skb(size + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+	
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	hdr->sadb_msg_version = PF_KEY_V2;
+	hdr->sadb_msg_type = SADB_ACQUIRE;
+	hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
+	hdr->sadb_msg_len = size / sizeof(uint64_t);
+	hdr->sadb_msg_errno = 0;
+	hdr->sadb_msg_reserved = 0;
+	hdr->sadb_msg_seq = x->km.seq = get_acqseq();
+	hdr->sadb_msg_pid = 0;
+
+	/* src address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
+	addr->sadb_address_proto = 0;
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		addr->sadb_address_prefixlen = 32;
+
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = x->props.saddr.a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+		addr->sadb_address_prefixlen = 128;
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr,
+		       x->props.saddr.a6, sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+	
+	/* dst address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len =
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
+	addr->sadb_address_proto = 0;
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		addr->sadb_address_prefixlen = 32; 
+
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = x->id.daddr.a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+		addr->sadb_address_prefixlen = 128; 
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr,
+		       x->id.daddr.a6, sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+
+	pol = (struct sadb_x_policy *)  skb_put(skb, sizeof(struct sadb_x_policy));
+	pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
+	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
+	pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
+	pol->sadb_x_policy_dir = dir+1;
+	pol->sadb_x_policy_id = xp->index;
+
+	/* Set sadb_comb's. */
+	if (x->id.proto == IPPROTO_AH)
+		dump_ah_combs(skb, t);
+	else if (x->id.proto == IPPROTO_ESP)
+		dump_esp_combs(skb, t);
+
+	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
+}
+
+static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
+                                                u8 *data, int len, int *dir)
+{
+	struct xfrm_policy *xp;
+	struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
+
+	switch (family) {
+	case AF_INET:
+		if (opt != IP_IPSEC_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		if (opt != IPV6_IPSEC_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#endif
+	default:
+		*dir = -EINVAL;
+		return NULL;
+	}
+
+	*dir = -EINVAL;
+
+	if (len < sizeof(struct sadb_x_policy) ||
+	    pol->sadb_x_policy_len*8 > len ||
+	    pol->sadb_x_policy_type > IPSEC_POLICY_BYPASS ||
+	    (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir > IPSEC_DIR_OUTBOUND))
+		return NULL;
+
+	xp = xfrm_policy_alloc(GFP_ATOMIC);
+	if (xp == NULL) {
+		*dir = -ENOBUFS;
+		return NULL;
+	}
+
+	xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ?
+		      XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW);
+
+	xp->lft.soft_byte_limit = XFRM_INF;
+	xp->lft.hard_byte_limit = XFRM_INF;
+	xp->lft.soft_packet_limit = XFRM_INF;
+	xp->lft.hard_packet_limit = XFRM_INF;
+	xp->family = family;
+
+	xp->xfrm_nr = 0;
+	if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
+	    (*dir = parse_ipsecrequests(xp, pol)) < 0)
+		goto out;
+
+	*dir = pol->sadb_x_policy_dir-1;
+	return xp;
+
+out:
+	kfree(xp);
+	return NULL;
+}
+
+static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
+{
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+	struct sadb_sa *sa;
+	struct sadb_address *addr;
+	struct sadb_x_nat_t_port *n_port;
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+	int sockaddr_size;
+	int size;
+	__u8 satype = (x->id.proto == IPPROTO_ESP ? SADB_SATYPE_ESP : 0);
+	struct xfrm_encap_tmpl *natt = NULL;
+
+	sockaddr_size = pfkey_sockaddr_size(x->props.family);
+	if (!sockaddr_size)
+		return -EINVAL;
+
+	if (!satype)
+		return -EINVAL;
+
+	if (!x->encap)
+		return -EINVAL;
+
+	natt = x->encap;
+
+	/* Build an SADB_X_NAT_T_NEW_MAPPING message:
+	 *
+	 * HDR | SA | ADDRESS_SRC (old addr) | NAT_T_SPORT (old port) |
+	 * ADDRESS_DST (new addr) | NAT_T_DPORT (new port)
+	 */
+	
+	size = sizeof(struct sadb_msg) +
+		sizeof(struct sadb_sa) +
+		(sizeof(struct sadb_address) * 2) +
+		(sockaddr_size * 2) +
+		(sizeof(struct sadb_x_nat_t_port) * 2);
+	
+	skb =  alloc_skb(size + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+	
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	hdr->sadb_msg_version = PF_KEY_V2;
+	hdr->sadb_msg_type = SADB_X_NAT_T_NEW_MAPPING;
+	hdr->sadb_msg_satype = satype;
+	hdr->sadb_msg_len = size / sizeof(uint64_t);
+	hdr->sadb_msg_errno = 0;
+	hdr->sadb_msg_reserved = 0;
+	hdr->sadb_msg_seq = x->km.seq = get_acqseq();
+	hdr->sadb_msg_pid = 0;
+
+	/* SA */
+	sa = (struct sadb_sa *) skb_put(skb, sizeof(struct sadb_sa));
+	sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t);
+	sa->sadb_sa_exttype = SADB_EXT_SA;
+	sa->sadb_sa_spi = x->id.spi;
+	sa->sadb_sa_replay = 0;
+	sa->sadb_sa_state = 0;
+	sa->sadb_sa_auth = 0;
+	sa->sadb_sa_encrypt = 0;
+	sa->sadb_sa_flags = 0;
+
+	/* ADDRESS_SRC (old addr) */
+	addr = (struct sadb_address*)
+		skb_put(skb, sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
+	addr->sadb_address_proto = 0;
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		addr->sadb_address_prefixlen = 32;
+
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = x->props.saddr.a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+		addr->sadb_address_prefixlen = 128;
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr,
+		       x->props.saddr.a6, sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+
+	/* NAT_T_SPORT (old port) */
+	n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+	n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
+	n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT;
+	n_port->sadb_x_nat_t_port_port = natt->encap_sport;
+	n_port->sadb_x_nat_t_port_reserved = 0;
+
+	/* ADDRESS_DST (new addr) */
+	addr = (struct sadb_address*)
+		skb_put(skb, sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
+	addr->sadb_address_proto = 0;
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		addr->sadb_address_prefixlen = 32;
+
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = ipaddr->a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+		addr->sadb_address_prefixlen = 128;
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr, &ipaddr->a6, sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+
+	/* NAT_T_DPORT (new port) */
+	n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+	n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
+	n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT;
+	n_port->sadb_x_nat_t_port_port = sport;
+	n_port->sadb_x_nat_t_port_reserved = 0;
+
+	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
+}
+
+static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, int len,
+			 struct scm_cookie *scm)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb = NULL;
+	struct sadb_msg *hdr = NULL;
+	int err;
+
+	err = -EOPNOTSUPP;
+	if (msg->msg_flags & MSG_OOB)
+		goto out;
+
+	err = -EMSGSIZE;
+	if ((unsigned)len > sk->sndbuf-32)
+		goto out;
+
+	err = -ENOBUFS;
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (skb == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
+		goto out;
+
+	hdr = pfkey_get_base_msg(skb, &err);
+	if (!hdr)
+		goto out;
+
+	down(&xfrm_cfg_sem);
+	err = pfkey_process(sk, skb, hdr);
+	up(&xfrm_cfg_sem);
+
+out:
+	if (err && hdr && pfkey_error(hdr, err, sk) == 0)
+		err = 0;
+	if (skb)
+		kfree_skb(skb);
+
+	return err ? : len;
+}
+
+static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, int len,
+			 int flags, struct scm_cookie *scm)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int copied, err;
+
+	err = -EINVAL;
+	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC))
+		goto out;
+
+	msg->msg_namelen = 0;
+	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	skb->h.raw = skb->data;
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	err = (flags & MSG_TRUNC) ? skb->len : copied;
+
+out_free:
+	skb_free_datagram(sk, skb);
+out:
+	return err;
+}
+
+static struct proto_ops pfkey_ops = {
+	.family		=	PF_KEY,
+
+	/* Operations that make no sense on pfkey sockets. */
+	.bind		=	sock_no_bind,
+	.connect	=	sock_no_connect,
+	.socketpair	=	sock_no_socketpair,
+	.accept		=	sock_no_accept,
+	.getname	=	sock_no_getname,
+	.ioctl		=	sock_no_ioctl,
+	.listen		=	sock_no_listen,
+	.shutdown	=	sock_no_shutdown,
+	.setsockopt	=	sock_no_setsockopt,
+	.getsockopt	=	sock_no_getsockopt,
+	.mmap		=	sock_no_mmap,
+	.sendpage	=	sock_no_sendpage,
+
+	/* Now the operations that really occur. */
+	.release	=	pfkey_release,
+	.poll		=	datagram_poll,
+	.sendmsg	=	pfkey_sendmsg,
+	.recvmsg	=	pfkey_recvmsg,
+};
+
+static struct net_proto_family pfkey_family_ops = {
+	.family	=	PF_KEY,
+	.create	=	pfkey_create,
+};
+
+#ifdef CONFIG_PROC_FS
+static int pfkey_read_proc(char *buffer, char **start, off_t offset,
+			   int length, int *eof, void *data)
+{
+	off_t pos = 0;
+	off_t begin = 0;
+	int len = 0;
+	struct sock *s;
+
+	len += sprintf(buffer,"sk       RefCnt Rmem   Wmem   User   Inode\n");
+
+	read_lock(&pfkey_table_lock);
+
+	for (s = pfkey_table; s; s = s->next) {
+		len += sprintf(buffer+len,"%p %-6d %-6u %-6u %-6u %-6lu",
+			       s,
+			       atomic_read(&s->refcnt),
+			       atomic_read(&s->rmem_alloc),
+			       atomic_read(&s->wmem_alloc),
+			       sock_i_uid(s),
+			       sock_i_ino(s)
+			       );
+
+		buffer[len++] = '\n';
+		
+		pos = begin + len;
+		if (pos < offset) {
+			len = 0;
+			begin = pos;
+		}
+		if(pos > offset + length)
+			goto done;
+	}
+	*eof = 1;
+
+done:
+	read_unlock(&pfkey_table_lock);
+
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+#endif
+
+static struct xfrm_mgr pfkeyv2_mgr =
+{
+	.id		= "pfkeyv2",
+	.notify		= pfkey_send_notify,
+	.acquire	= pfkey_send_acquire,
+	.compile_policy	= pfkey_compile_policy,
+	.new_mapping	= pfkey_send_new_mapping,
+};
+
+static void __exit ipsec_pfkey_exit(void)
+{
+	xfrm_unregister_km(&pfkeyv2_mgr);
+	remove_proc_entry("net/pfkey", 0);
+	sock_unregister(PF_KEY);
+}
+
+static int __init ipsec_pfkey_init(void)
+{
+	sock_register(&pfkey_family_ops);
+#ifdef CONFIG_PROC_FS
+	create_proc_read_entry("net/pfkey", 0, 0, pfkey_read_proc, NULL);
+#endif
+	xfrm_register_km(&pfkeyv2_mgr);
+	return 0;
+}
+
+module_init(ipsec_pfkey_init);
+module_exit(ipsec_pfkey_exit);
+MODULE_LICENSE("GPL");
Index: net/netlink/af_netlink.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/netlink/af_netlink.c,v
retrieving revision 1.1.1.21
retrieving revision 1.1.1.21.2.1
diff -u -r1.1.1.21 -r1.1.1.21.2.1
--- a/net/netlink/af_netlink.c	18 Feb 2004 13:36:32 -0000	1.1.1.21
+++ b/net/netlink/af_netlink.c	16 Apr 2004 13:16:26 -0000	1.1.1.21.2.1
@@ -496,13 +496,13 @@
 	return -1;
 }
 
-void netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
-		       u32 group, int allocation)
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+		      u32 group, int allocation)
 {
 	struct sock *sk;
 	struct sk_buff *skb2 = NULL;
 	int protocol = ssk->protocol;
-	int failure = 0;
+	int failure = 0, delivered = 0;
 
 	/* While we sleep in clone, do not allow to change socket list */
 
@@ -536,8 +536,10 @@
 			failure = 1;
 		} else if (netlink_broadcast_deliver(sk, skb2)) {
 			netlink_overrun(sk);
-		} else
+		} else {
+			delivered = 1;
 			skb2 = NULL;
+		}
 		sock_put(sk);
 	}
 
@@ -546,6 +548,12 @@
 	if (skb2)
 		kfree_skb(skb2);
 	kfree_skb(skb);
+
+	if (delivered)
+		return 0;
+	if (failure)
+		return -ENOBUFS;
+	return -ESRCH;
 }
 
 void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
Index: net/sched/cls_route.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/sched/cls_route.c,v
retrieving revision 1.1.1.16
retrieving revision 1.1.1.16.2.1
diff -u -r1.1.1.16 -r1.1.1.16.2.1
--- a/net/sched/cls_route.c	21 Dec 2001 17:42:06 -0000	1.1.1.16
+++ b/net/sched/cls_route.c	16 Apr 2004 13:16:27 -0000	1.1.1.16.2.1
@@ -154,7 +154,7 @@
 	if (head == NULL)
 		goto old_method;
 
-	iif = ((struct rtable*)dst)->key.iif;
+	iif = ((struct rtable*)dst)->fl.iif;
 
 	h = route4_fastmap_hash(id, iif);
 	if (id == head->fastmap[h].id &&
Index: net/sctp/input.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/sctp/input.c,v
retrieving revision 1.1.1.7
retrieving revision 1.1.1.7.2.1
diff -u -r1.1.1.7 -r1.1.1.7.2.1
--- a/net/sctp/input.c	14 Apr 2004 13:05:41 -0000	1.1.1.7
+++ b/net/sctp/input.c	16 Apr 2004 13:16:27 -0000	1.1.1.7.2.1
@@ -58,6 +58,7 @@
 #include <net/snmp.h>
 #include <net/sock.h>
 #include <linux/ipsec.h>
+#include <net/xfrm.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
@@ -175,7 +176,7 @@
 	rcvr = asoc ? &asoc->base : &ep->base;
 	sk = rcvr->sk;
 
-	if (!ipsec_sk_policy(sk, skb))
+	if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family))
 		goto discard_release;
 
 	ret = sk_filter(sk, skb, 1);
Index: net/sctp/ipv6.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/sctp/ipv6.c,v
retrieving revision 1.1.1.9
retrieving revision 1.1.1.9.2.2
diff -u -r1.1.1.9 -r1.1.1.9.2.2
--- a/net/sctp/ipv6.c	14 Apr 2004 13:05:41 -0000	1.1.1.9
+++ b/net/sctp/ipv6.c	16 Apr 2004 23:32:09 -0000	1.1.1.9.2.2
@@ -82,14 +82,14 @@
 
 /* FIXME: This macro needs to be moved to a common header file. */
 #define NIP6(addr) \
-	ntohs((addr)->s6_addr16[0]), \
-	ntohs((addr)->s6_addr16[1]), \
-	ntohs((addr)->s6_addr16[2]), \
-	ntohs((addr)->s6_addr16[3]), \
-	ntohs((addr)->s6_addr16[4]), \
-	ntohs((addr)->s6_addr16[5]), \
-	ntohs((addr)->s6_addr16[6]), \
-	ntohs((addr)->s6_addr16[7])
+	ntohs((addr).s6_addr16[0]), \
+	ntohs((addr).s6_addr16[1]), \
+	ntohs((addr).s6_addr16[2]), \
+	ntohs((addr).s6_addr16[3]), \
+	ntohs((addr).s6_addr16[4]), \
+	ntohs((addr).s6_addr16[5]), \
+	ntohs((addr).s6_addr16[6]), \
+	ntohs((addr).s6_addr16[7])
 
 /* ICMP error handler. */
 void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -165,12 +165,12 @@
 	/* Fill in the dest address from the route entry passed with the skb
 	 * and the source address from the transport.
 	 */
-	fl.fl6_dst = &transport->ipaddr.v6.sin6_addr;
-	fl.fl6_src = &transport->saddr.v6.sin6_addr;
+	ipv6_addr_copy(&fl.fl6_dst, &transport->ipaddr.v6.sin6_addr);
+	ipv6_addr_copy(&fl.fl6_src, &transport->saddr.v6.sin6_addr);
 
 	fl.fl6_flowlabel = np->flow_label;
 	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
-	if (ipv6_addr_type(fl.fl6_src) & IPV6_ADDR_LINKLOCAL)
+	if (ipv6_addr_type(&fl.fl6_src) & IPV6_ADDR_LINKLOCAL)
 		fl.oif = transport->saddr.v6.sin6_scope_id;
 	else
 		fl.oif = sk->sk_bound_dev_if;
@@ -179,7 +179,7 @@
 
 	if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-		fl.fl6_dst = rt0->addr;
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 	}
 
 	SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, "
@@ -204,7 +204,7 @@
 	struct flowi fl;
 
 	memset(&fl, 0, sizeof(fl));
-	fl.fl6_dst = &daddr->v6.sin6_addr;
+	ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr);
 	if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
 		fl.oif = daddr->v6.sin6_scope_id;
 	
@@ -213,7 +213,7 @@
 			  __FUNCTION__, NIP6(fl.fl6_dst));
 
 	if (saddr) {
-		fl.fl6_src = &saddr->v6.sin6_addr;
+		ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr);
 		SCTP_DEBUG_PRINTK(
 			"SRC=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x - ",
 			NIP6(fl.fl6_src));
@@ -226,7 +226,7 @@
 		SCTP_DEBUG_PRINTK(
 			"rt6_dst:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
 			"rt6_src:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-			NIP6(&rt->rt6i_dst.addr), NIP6(&rt->rt6i_src.addr));
+			NIP6(rt->rt6i_dst.addr), NIP6(rt->rt6i_src.addr));
 	} else {
 		SCTP_DEBUG_PRINTK("NO ROUTE\n");
 	}
@@ -273,13 +273,13 @@
 
 	SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p "
 			  "daddr:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
-			  __FUNCTION__, asoc, dst, NIP6(&daddr->v6.sin6_addr));
+			  __FUNCTION__, asoc, dst, NIP6(daddr->v6.sin6_addr));
 
 	if (!asoc) {
 		ipv6_get_saddr(dst, &daddr->v6.sin6_addr,&saddr->v6.sin6_addr);
 		SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: "
 				  "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-				  NIP6(&saddr->v6.sin6_addr));
+				  NIP6(saddr->v6.sin6_addr));
 		return;
 	}
 
@@ -308,12 +308,12 @@
 		memcpy(saddr, baddr, sizeof(union sctp_addr));
 		SCTP_DEBUG_PRINTK("saddr: "
 				  "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-				  NIP6(&saddr->v6.sin6_addr));
+				  NIP6(saddr->v6.sin6_addr));
 	} else {
 		printk(KERN_ERR "%s: asoc:%p Could not find a valid source "
 		       "address for the "
 		       "dest:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-		       __FUNCTION__, asoc, NIP6(&daddr->v6.sin6_addr));
+		       __FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr));
 	}
 
 	sctp_read_unlock(addr_lock);
@@ -629,7 +629,7 @@
 	/* Init the ipv4 part of the socket since we can have sockets
 	 * using v6 API for ipv4.
 	 */
-	newinet->ttl = sysctl_ip_default_ttl;
+	newinet->uc_ttl = -1;
 	newinet->mc_loop = 1;
 	newinet->mc_ttl = 1;
 	newinet->mc_index = 0;
@@ -678,7 +678,7 @@
 static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
 {
 	seq_printf(seq, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
-		   NIP6(&addr->v6.sin6_addr));
+		   NIP6(addr->v6.sin6_addr));
 }
 
 /* Initialize a PF_INET6 socket msg_name. */
@@ -912,14 +912,15 @@
 	.flags         = SCTP_PROTOSW_FLAG,
 };
 
+static int sctp6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+	return sctp_rcv(*pskb) ? -1 : 0;
+}
+
 static struct inet6_protocol sctpv6_protocol = {
-	.handler      = sctp_rcv,
+	.handler      = sctp6_rcv,
 	.err_handler  = sctp_v6_err,
-	.next         = NULL,
-	.protocol     = IPPROTO_SCTP,
-	.copy         = 0,
-	.data         = NULL,
-	.name         = "SCTPv6",
+	.flags        = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
 };
 
 static struct sctp_af sctp_ipv6_specific = {
@@ -967,7 +968,8 @@
 int sctp_v6_init(void)
 {
 	/* Register inet6 protocol. */
-	inet6_add_protocol(&sctpv6_protocol);
+	if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0)
+		return -EAGAIN;
 
 	/* Add SCTPv6(UDP and TCP style) to inetsw6 linked list. */
 	inet6_register_protosw(&sctpv6_seqpacket_protosw);
@@ -989,7 +991,7 @@
 void sctp_v6_exit(void)
 {
 	list_del(&sctp_ipv6_specific.list);
-	inet6_del_protocol(&sctpv6_protocol);
+	inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP);
 	inet6_unregister_protosw(&sctpv6_seqpacket_protosw);
 	inet6_unregister_protosw(&sctpv6_stream_protosw);
 	unregister_inet6addr_notifier(&sctp_inetaddr_notifier);
Index: net/sctp/protocol.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/net/sctp/protocol.c,v
retrieving revision 1.1.1.7
retrieving revision 1.1.1.7.2.1
diff -u -r1.1.1.7 -r1.1.1.7.2.1
--- a/net/sctp/protocol.c	14 Apr 2004 13:05:41 -0000	1.1.1.7
+++ b/net/sctp/protocol.c	16 Apr 2004 13:16:27 -0000	1.1.1.7.2.1
@@ -433,7 +433,7 @@
 				  union sctp_addr *saddr)
 {
 	struct rtable *rt;
-	struct rt_key key;
+	struct flowi fl;
 	struct sctp_bind_addr *bp;
 	rwlock_t *addr_lock;
 	struct sctp_sockaddr_entry *laddr;
@@ -441,21 +441,21 @@
 	struct dst_entry *dst = NULL;
 	union sctp_addr dst_saddr;
 
-	memset(&key, 0x0, sizeof(struct rt_key));
-	key.dst = daddr->v4.sin_addr.s_addr;
-
+	memset(&fl, 0x0, sizeof(struct flowi));
+	fl.fl4_dst = daddr->v4.sin_addr.s_addr;
+	fl.proto = IPPROTO_SCTP;
 	if (asoc) {
-		key.tos = RT_CONN_FLAGS(asoc->base.sk);
-		key.oif = asoc->base.sk->bound_dev_if;
+		fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk);
+		fl.oif = asoc->base.sk->bound_dev_if;
 	}
 	if (saddr)
-		key.src = saddr->v4.sin_addr.s_addr;
+		fl.fl4_src = saddr->v4.sin_addr.s_addr;
 
 	SCTP_DEBUG_PRINTK("%s: DST:%u.%u.%u.%u, SRC:%u.%u.%u.%u - ",
-			  __FUNCTION__, NIPQUAD(key.dst),
-			  NIPQUAD(key.src));
+			  __FUNCTION__, NIPQUAD(fl.fl4_dst),
+			  NIPQUAD(fl.fl4_src));
 
-	if (!ip_route_output_key(&rt, &key)) {
+	if (!ip_route_output_key(&rt, &fl)) {
 		dst = &rt->u.dst;
 	}
 
@@ -497,8 +497,8 @@
 		laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
 
 		if (AF_INET == laddr->a.sa.sa_family) {
-			key.src = laddr->a.v4.sin_addr.s_addr;
-			if (!ip_route_output_key(&rt, &key)) {
+			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
+			if (!ip_route_output_key(&rt, &fl)) {
 				dst = &rt->u.dst;
 				goto out_unlock;
 			}
@@ -587,7 +587,7 @@
 	newinet->pmtudisc = inet->pmtudisc;
       	newinet->id = 0;
 
-	newinet->ttl = sysctl_ip_default_ttl;
+	newinet->uc_ttl = -1;
 	newinet->mc_loop = 1;
 	newinet->mc_ttl = 1;
 	newinet->mc_index = 0;
@@ -656,7 +656,7 @@
 		return err;
 	}
 	sctp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
-	inet_sk(sctp_ctl_socket->sk)->ttl = MAXTTL;
+	inet_sk(sctp_ctl_socket->sk)->uc_ttl = -1;
 
 	return 0;
 }
@@ -872,8 +872,7 @@
 static struct inet_protocol sctp_protocol = {
 	.handler     = sctp_rcv,
 	.err_handler = sctp_v4_err,
-	.protocol    = IPPROTO_SCTP,
-	.name        = "SCTP"
+	.no_policy   = 1,
 };
 
 /* IPv4 address related functions.  */
@@ -960,7 +959,8 @@
 		return -EINVAL;
 
 	/* Add SCTP to inet_protos hash table.  */
-	inet_add_protocol(&sctp_protocol);
+	if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0)
+		return -EAGAIN;
 
 	/* Add SCTP(TCP and UDP style) to inetsw linked list.  */
 	inet_register_protosw(&sctp_seqpacket_protosw);
@@ -1148,7 +1148,7 @@
 err_init_mibs:
 	kmem_cache_destroy(sctp_chunk_cachep);
 err_chunk_cachep:
-	inet_del_protocol(&sctp_protocol);
+	inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
 	inet_unregister_protosw(&sctp_seqpacket_protosw);
 	inet_unregister_protosw(&sctp_stream_protosw);
 	return status;
@@ -1188,7 +1188,7 @@
 	sctp_proc_exit();
 	cleanup_sctp_mibs();
 
-	inet_del_protocol(&sctp_protocol);
+	inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
 	inet_unregister_protosw(&sctp_seqpacket_protosw);
 	inet_unregister_protosw(&sctp_stream_protosw);
 }
Index: net/xfrm/Config.in
===================================================================
RCS file: net/xfrm/Config.in
diff -N net/xfrm/Config.in
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/xfrm/Config.in	16 Apr 2004 13:16:27 -0000	1.2.18.1
@@ -0,0 +1,4 @@
+#
+# XFRM configuration
+#
+tristate '  IP: IPsec user configuration interface' CONFIG_XFRM_USER
Index: net/xfrm/Makefile
===================================================================
RCS file: net/xfrm/Makefile
diff -N net/xfrm/Makefile
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/xfrm/Makefile	16 Apr 2004 13:16:27 -0000	1.3.18.1
@@ -0,0 +1,13 @@
+#
+# Makefile for the XFRM subsystem.
+#
+
+O_TARGET := xfrm.o
+
+export-objs = xfrm_export.o
+
+obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o xfrm_output.o \
+	xfrm_export.o
+obj-$(CONFIG_XFRM_USER) += xfrm_user.o
+
+include $(TOPDIR)/Rules.make
Index: net/xfrm/xfrm_algo.c
===================================================================
RCS file: net/xfrm/xfrm_algo.c
diff -N net/xfrm/xfrm_algo.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/xfrm/xfrm_algo.c	16 Apr 2004 13:16:27 -0000	1.5.2.1
@@ -0,0 +1,729 @@
+/* 
+ * xfrm algorithm interface
+ *
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/pfkeyv2.h>
+#include <net/xfrm.h>
+#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
+#include <net/ah.h>
+#endif
+#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+#include <net/esp.h>
+#endif
+#include <asm/scatterlist.h>
+
+/*
+ * Algorithms supported by IPsec.  These entries contain properties which
+ * are used in key negotiation and xfrm processing, and are used to verify
+ * that instantiated crypto transforms have correct parameters for IPsec
+ * purposes.
+ */
+static struct xfrm_algo_desc aalg_list[] = {
+{
+	.name = "digest_null",
+	
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 0,
+			.icv_fullbits = 0,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_NULL,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 0,
+		.sadb_alg_maxbits = 0
+	}
+},
+{
+	.name = "md5",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 128,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id = SADB_AALG_MD5HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 128,
+		.sadb_alg_maxbits = 128
+	}
+},
+{
+	.name = "sha1",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 160,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_AALG_SHA1HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 160,
+		.sadb_alg_maxbits = 160
+	}
+},
+{
+	.name = "sha256",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 256,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_SHA2_256HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 256,
+		.sadb_alg_maxbits = 256
+	}
+},
+{
+	.name = "ripemd160",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 160,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 160,
+		.sadb_alg_maxbits = 160
+	}
+},
+};
+
+static struct xfrm_algo_desc ealg_list[] = {
+{
+	.name = "cipher_null",
+	
+	.uinfo = {
+		.encr = {
+			.blockbits = 8,
+			.defkeybits = 0,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id =	SADB_EALG_NULL,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 0,
+		.sadb_alg_maxbits = 0
+	}
+},
+{
+	.name = "des",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 64,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_EALG_DESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 64,
+		.sadb_alg_maxbits = 64
+	}
+},
+{
+	.name = "des3_ede",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 192,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_EALG_3DESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 192,
+		.sadb_alg_maxbits = 192
+	}
+},
+{
+	.name = "cast128",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_CASTCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 40,
+		.sadb_alg_maxbits = 128
+	}
+},
+{
+	.name = "blowfish",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_BLOWFISHCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 40,
+		.sadb_alg_maxbits = 448
+	}
+},
+{
+	.name = "aes",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 128,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_AESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 128,
+		.sadb_alg_maxbits = 256
+	}
+},
+{
+        .name = "serpent",
+
+        .uinfo = {
+                .encr = {
+                        .blockbits = 128,
+                        .defkeybits = 128,
+                }
+        },
+
+        .desc = {
+                .sadb_alg_id = SADB_X_EALG_SERPENTCBC,
+                .sadb_alg_ivlen = 8,
+                .sadb_alg_minbits = 128,
+                .sadb_alg_maxbits = 256,
+        }
+},
+{
+        .name = "twofish",
+                 
+        .uinfo = {
+                .encr = {
+                        .blockbits = 128,
+                        .defkeybits = 128,
+                }
+        },
+
+        .desc = {
+                .sadb_alg_id = SADB_X_EALG_TWOFISHCBC,
+                .sadb_alg_ivlen = 8,
+                .sadb_alg_minbits = 128,
+                .sadb_alg_maxbits = 256
+        }
+},
+};
+
+static struct xfrm_algo_desc calg_list[] = {
+{
+	.name = "deflate",
+	.uinfo = {
+		.comp = {
+			.threshold = 90,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE }
+},
+{
+	.name = "lzs",
+	.uinfo = {
+		.comp = {
+			.threshold = 90,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_LZS }
+},
+{
+	.name = "lzjh",
+	.uinfo = {
+		.comp = {
+			.threshold = 50,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_LZJH }
+},
+};
+
+static inline int aalg_entries(void)
+{
+	return sizeof(aalg_list) / sizeof(aalg_list[0]);
+}
+
+static inline int ealg_entries(void)
+{
+	return sizeof(ealg_list) / sizeof(ealg_list[0]);
+}
+
+static inline int calg_entries(void)
+{
+	return sizeof(calg_list) / sizeof(calg_list[0]);
+}
+
+/* Todo: generic iterators */
+struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < aalg_entries(); i++) {
+		if (aalg_list[i].desc.sadb_alg_id == alg_id) {
+			if (aalg_list[i].available)
+				return &aalg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < ealg_entries(); i++) {
+		if (ealg_list[i].desc.sadb_alg_id == alg_id) {
+			if (ealg_list[i].available)
+				return &ealg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < calg_entries(); i++) {
+		if (calg_list[i].desc.sadb_alg_id == alg_id) {
+			if (calg_list[i].available)
+				return &calg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name)
+{
+	int i;
+
+	if (!name)
+		return NULL;
+
+	for (i=0; i < aalg_entries(); i++) {
+		if (strcmp(name, aalg_list[i].name) == 0) {
+			if (aalg_list[i].available)
+				return &aalg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name)
+{
+	int i;
+
+	if (!name)
+		return NULL;
+
+	for (i=0; i < ealg_entries(); i++) {
+		if (strcmp(name, ealg_list[i].name) == 0) {
+			if (ealg_list[i].available)
+				return &ealg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_calg_get_byname(char *name)
+{
+	int i;
+
+	if (!name)
+		return NULL;
+
+	for (i=0; i < calg_entries(); i++) {
+		if (strcmp(name, calg_list[i].name) == 0) {
+			if (calg_list[i].available)
+				return &calg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx)
+{
+	if (idx >= aalg_entries())
+		return NULL;
+
+	return &aalg_list[idx];
+}
+
+struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx)
+{
+	if (idx >= ealg_entries())
+		return NULL;
+
+	return &ealg_list[idx];
+}
+
+struct xfrm_algo_desc *xfrm_calg_get_byidx(unsigned int idx)
+{
+	if (idx >= calg_entries())
+		return NULL;
+
+	return &calg_list[idx];
+}
+
+/*
+ * Probe for the availability of crypto algorithms, and set the available
+ * flag for any algorithms found on the system.  This is typically called by
+ * pfkey during userspace SA add, update or register.
+ */
+void xfrm_probe_algs(void)
+{
+#ifdef CONFIG_CRYPTO
+	int i, status;
+	
+	BUG_ON(in_softirq());
+
+	for (i = 0; i < aalg_entries(); i++) {
+		status = crypto_alg_available(aalg_list[i].name, 0);
+		if (aalg_list[i].available != status)
+			aalg_list[i].available = status;
+	}
+	
+	for (i = 0; i < ealg_entries(); i++) {
+		status = crypto_alg_available(ealg_list[i].name, 0);
+		if (ealg_list[i].available != status)
+			ealg_list[i].available = status;
+	}
+	
+	for (i = 0; i < calg_entries(); i++) {
+		status = crypto_alg_available(calg_list[i].name, 0);
+		if (calg_list[i].available != status)
+			calg_list[i].available = status;
+	}
+#endif
+}
+
+int xfrm_count_auth_supported(void)
+{
+	int i, n;
+
+	for (i = 0, n = 0; i < aalg_entries(); i++)
+		if (aalg_list[i].available)
+			n++;
+	return n;
+}
+
+int xfrm_count_enc_supported(void)
+{
+	int i, n;
+
+	for (i = 0, n = 0; i < ealg_entries(); i++)
+		if (ealg_list[i].available)
+			n++;
+	return n;
+}
+
+/* Move to common area: it is shared with AH. */
+
+void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
+		  int offset, int len, icv_update_fn_t icv_update)
+{
+	int start = skb->len - skb->data_len;
+	int i, copy = start - offset;
+	struct scatterlist sg;
+
+	/* Checksum header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		
+		sg.page = virt_to_page(skb->data + offset);
+		sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+		sg.length = copy;
+		
+		icv_update(tfm, &sg, 1);
+		
+		if ((len -= copy) == 0)
+			return;
+		offset += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			
+			sg.page = frag->page;
+			sg.offset = frag->page_offset + offset-start;
+			sg.length = copy;
+			
+			icv_update(tfm, &sg, 1);
+
+			if (!(len -= copy))
+				return;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				skb_icv_walk(list, tfm, offset-start, copy, icv_update);
+				if ((len -= copy) == 0)
+					return;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+}
+
+#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+
+/* Looking generic it is not used in another places. */
+
+int
+skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+	int start = skb->len - skb->data_len;
+	int i, copy = start - offset;
+	int elt = 0;
+
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		sg[elt].page = virt_to_page(skb->data + offset);
+		sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+		sg[elt].length = copy;
+		elt++;
+		if ((len -= copy) == 0)
+			return elt;
+		offset += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			sg[elt].page = frag->page;
+			sg[elt].offset = frag->page_offset+offset-start;
+			sg[elt].length = copy;
+			elt++;
+			if (!(len -= copy))
+				return elt;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
+				if ((len -= copy) == 0)
+					return elt;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+	return elt;
+}
+
+/* Check that skb data bits are writable. If they are not, copy data
+ * to newly created private area. If "tailbits" is given, make sure that
+ * tailbits bytes beyond current end of skb are writable.
+ *
+ * Returns amount of elements of scatterlist to load for subsequent
+ * transformations and pointer to writable trailer skb.
+ */
+
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
+{
+	int copyflag;
+	int elt;
+	struct sk_buff *skb1, **skb_p;
+
+	/* If skb is cloned or its head is paged, reallocate
+	 * head pulling out all the pages (pages are considered not writable
+	 * at the moment even if they are anonymous).
+	 */
+	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
+	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
+		return -ENOMEM;
+
+	/* Easy case. Most of packets will go this way. */
+	if (!skb_shinfo(skb)->frag_list) {
+		/* A little of trouble, not enough of space for trailer.
+		 * This should not happen, when stack is tuned to generate
+		 * good frames. OK, on miss we reallocate and reserve even more
+		 * space, 128 bytes is fair. */
+
+		if (skb_tailroom(skb) < tailbits &&
+		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
+			return -ENOMEM;
+
+		/* Voila! */
+		*trailer = skb;
+		return 1;
+	}
+
+	/* Misery. We are in troubles, going to mincer fragments... */
+
+	elt = 1;
+	skb_p = &skb_shinfo(skb)->frag_list;
+	copyflag = 0;
+
+	while ((skb1 = *skb_p) != NULL) {
+		int ntail = 0;
+
+		/* The fragment is partially pulled by someone,
+		 * this can happen on input. Copy it and everything
+		 * after it. */
+
+		if (skb_shared(skb1))
+			copyflag = 1;
+
+		/* If the skb is the last, worry about trailer. */
+
+		if (skb1->next == NULL && tailbits) {
+			if (skb_shinfo(skb1)->nr_frags ||
+			    skb_shinfo(skb1)->frag_list ||
+			    skb_tailroom(skb1) < tailbits)
+				ntail = tailbits + 128;
+		}
+
+		if (copyflag ||
+		    skb_cloned(skb1) ||
+		    ntail ||
+		    skb_shinfo(skb1)->nr_frags ||
+		    skb_shinfo(skb1)->frag_list) {
+			struct sk_buff *skb2;
+
+			/* Fuck, we are miserable poor guys... */
+			if (ntail == 0)
+				skb2 = skb_copy(skb1, GFP_ATOMIC);
+			else
+				skb2 = skb_copy_expand(skb1,
+						       skb_headroom(skb1),
+						       ntail,
+						       GFP_ATOMIC);
+			if (unlikely(skb2 == NULL))
+				return -ENOMEM;
+
+			if (skb1->sk)
+				skb_set_owner_w(skb, skb1->sk);
+
+			/* Looking around. Are we still alive?
+			 * OK, link new skb, drop old one */
+
+			skb2->next = skb1->next;
+			*skb_p = skb2;
+			kfree_skb(skb1);
+			skb1 = skb2;
+		}
+		elt++;
+		*trailer = skb1;
+		skb_p = &skb1->next;
+	}
+
+	return elt;
+}
+
+void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
+{
+	if (tail != skb) {
+		skb->data_len += len;
+		skb->len += len;
+	}
+	return skb_put(tail, len);
+}
+#endif
Index: net/xfrm/xfrm_export.c
===================================================================
RCS file: net/xfrm/xfrm_export.c
diff -N net/xfrm/xfrm_export.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/xfrm/xfrm_export.c	16 Apr 2004 13:16:27 -0000	1.1.3.1.20.1
@@ -0,0 +1,75 @@
+#include <linux/module.h>
+#include <net/xfrm.h>
+
+EXPORT_SYMBOL(xfrm_user_policy);
+EXPORT_SYMBOL(km_waitq);
+EXPORT_SYMBOL(km_new_mapping);
+EXPORT_SYMBOL(xfrm_cfg_sem);
+EXPORT_SYMBOL(xfrm_policy_alloc);
+EXPORT_SYMBOL(__xfrm_policy_destroy);
+EXPORT_SYMBOL(xfrm_lookup);
+EXPORT_SYMBOL(__xfrm_policy_check);
+EXPORT_SYMBOL(__xfrm_route_forward);
+EXPORT_SYMBOL(xfrm_state_alloc);
+EXPORT_SYMBOL(__xfrm_state_destroy);
+EXPORT_SYMBOL(xfrm_state_find);
+EXPORT_SYMBOL(xfrm_state_insert);
+EXPORT_SYMBOL(xfrm_state_add);
+EXPORT_SYMBOL(xfrm_state_update);
+EXPORT_SYMBOL(xfrm_state_check_expire);
+EXPORT_SYMBOL(xfrm_state_check_space);
+EXPORT_SYMBOL(xfrm_state_lookup);
+EXPORT_SYMBOL(xfrm_state_register_afinfo);
+EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
+EXPORT_SYMBOL(xfrm_state_get_afinfo);
+EXPORT_SYMBOL(xfrm_state_put_afinfo);
+EXPORT_SYMBOL(xfrm_state_delete_tunnel);
+EXPORT_SYMBOL(xfrm_replay_check);
+EXPORT_SYMBOL(xfrm_replay_advance);
+EXPORT_SYMBOL(xfrm_check_selectors);
+EXPORT_SYMBOL(xfrm_check_output);
+EXPORT_SYMBOL(__secpath_destroy);
+EXPORT_SYMBOL(secpath_dup);
+EXPORT_SYMBOL(xfrm_get_acqseq);
+EXPORT_SYMBOL(xfrm_parse_spi);
+EXPORT_SYMBOL(xfrm4_rcv);
+EXPORT_SYMBOL(xfrm4_tunnel_register);
+EXPORT_SYMBOL(xfrm4_tunnel_deregister);
+EXPORT_SYMBOL(xfrm4_tunnel_check_size);
+EXPORT_SYMBOL(xfrm_register_type);
+EXPORT_SYMBOL(xfrm_unregister_type);
+EXPORT_SYMBOL(xfrm_get_type);
+EXPORT_SYMBOL(xfrm_register_km);
+EXPORT_SYMBOL(xfrm_unregister_km);
+EXPORT_SYMBOL(xfrm_state_delete);
+EXPORT_SYMBOL(xfrm_state_walk);
+EXPORT_SYMBOL(xfrm_find_acq_byseq);
+EXPORT_SYMBOL(xfrm_find_acq);
+EXPORT_SYMBOL(xfrm_alloc_spi);
+EXPORT_SYMBOL(xfrm_state_flush);
+EXPORT_SYMBOL(xfrm_policy_kill);
+EXPORT_SYMBOL(xfrm_policy_bysel);
+EXPORT_SYMBOL(xfrm_policy_insert);
+EXPORT_SYMBOL(xfrm_policy_walk);
+EXPORT_SYMBOL(xfrm_policy_flush);
+EXPORT_SYMBOL(xfrm_policy_byid);
+EXPORT_SYMBOL(xfrm_policy_list);
+EXPORT_SYMBOL(xfrm_dst_lookup);
+EXPORT_SYMBOL(xfrm_policy_register_afinfo);
+EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
+EXPORT_SYMBOL(xfrm_policy_get_afinfo);
+EXPORT_SYMBOL(xfrm_policy_put_afinfo);
+
+EXPORT_SYMBOL_GPL(xfrm_probe_algs);
+EXPORT_SYMBOL_GPL(xfrm_count_auth_supported);
+EXPORT_SYMBOL_GPL(xfrm_count_enc_supported);
+EXPORT_SYMBOL_GPL(xfrm_aalg_get_byidx);
+EXPORT_SYMBOL_GPL(xfrm_ealg_get_byidx);
+EXPORT_SYMBOL_GPL(xfrm_calg_get_byidx);
+EXPORT_SYMBOL_GPL(xfrm_aalg_get_byid);
+EXPORT_SYMBOL_GPL(xfrm_ealg_get_byid);
+EXPORT_SYMBOL_GPL(xfrm_calg_get_byid);
+EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname);
+EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname);
+EXPORT_SYMBOL_GPL(xfrm_calg_get_byname);
+EXPORT_SYMBOL_GPL(skb_icv_walk);
Index: net/xfrm/xfrm_input.c
===================================================================
RCS file: net/xfrm/xfrm_input.c
diff -N net/xfrm/xfrm_input.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/xfrm/xfrm_input.c	16 Apr 2004 13:16:27 -0000	1.4.18.1
@@ -0,0 +1,85 @@
+/*
+ * xfrm_input.c
+ *
+ * Changes:
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific portion
+ * 	
+ */
+
+#include <linux/slab.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+
+static kmem_cache_t *secpath_cachep;
+
+void __secpath_destroy(struct sec_path *sp)
+{
+	int i;
+	for (i = 0; i < sp->len; i++)
+		xfrm_state_put(sp->x[i].xvec);
+	kmem_cache_free(secpath_cachep, sp);
+}
+
+struct sec_path *secpath_dup(struct sec_path *src)
+{
+	struct sec_path *sp;
+
+	sp = kmem_cache_alloc(secpath_cachep, SLAB_ATOMIC);
+	if (!sp)
+		return NULL;
+
+	sp->len = 0;
+	if (src) {
+		int i;
+
+		memcpy(sp, src, sizeof(*sp));
+		for (i = 0; i < sp->len; i++)
+			xfrm_state_hold(sp->x[i].xvec);
+	}
+	atomic_set(&sp->refcnt, 1);
+	return sp;
+}
+
+/* Fetch spi and seq from ipsec header */
+
+int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
+{
+	int offset, offset_seq;
+
+	switch (nexthdr) {
+	case IPPROTO_AH:
+		offset = offsetof(struct ip_auth_hdr, spi);
+		offset_seq = offsetof(struct ip_auth_hdr, seq_no);
+		break;
+	case IPPROTO_ESP:
+		offset = offsetof(struct ip_esp_hdr, spi);
+		offset_seq = offsetof(struct ip_esp_hdr, seq_no);
+		break;
+	case IPPROTO_COMP:
+		if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
+			return -EINVAL;
+		*spi = ntohl(ntohs(*(u16*)(skb->h.raw + 2)));
+		*seq = 0;
+		return 0;
+	default:
+		return 1;
+	}
+
+	if (!pskb_may_pull(skb, 16))
+		return -EINVAL;
+
+	*spi = *(u32*)(skb->h.raw + offset);
+	*seq = *(u32*)(skb->h.raw + offset_seq);
+	return 0;
+}
+
+void __init xfrm_input_init(void)
+{
+	secpath_cachep = kmem_cache_create("secpath_cache",
+					   sizeof(struct sec_path),
+					   0, SLAB_HWCACHE_ALIGN,
+					   NULL, NULL);
+	if (!secpath_cachep)
+		panic("XFRM: failed to allocate secpath_cache\n");
+}
Index: net/xfrm/xfrm_output.c
===================================================================
RCS file: net/xfrm/xfrm_output.c
diff -N net/xfrm/xfrm_output.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/xfrm/xfrm_output.c	16 Apr 2004 13:16:27 -0000	1.2.18.1
@@ -0,0 +1,46 @@
+/* 
+ * generic xfrm output routines
+ *
+ * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/xfrm.h>
+
+int xfrm_check_output(struct xfrm_state *x,
+                      struct sk_buff *skb, unsigned short family)
+{
+	int err;
+	
+	err = xfrm_state_check_expire(x);
+	if (err)
+		goto out;
+		
+	if (x->props.mode) {
+		switch (family) {
+		case AF_INET:
+			err = xfrm4_tunnel_check_size(skb);
+			break;
+			
+		case AF_INET6:
+			err = xfrm6_tunnel_check_size(skb);
+			break;
+			
+		default:
+			err = -EINVAL;
+		}
+		
+		if (err)
+			goto out;
+	}
+
+	err = xfrm_state_check_space(x, skb);
+out:
+	return err;
+}
Index: net/xfrm/xfrm_policy.c
===================================================================
RCS file: net/xfrm/xfrm_policy.c
diff -N net/xfrm/xfrm_policy.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/xfrm/xfrm_policy.c	16 Apr 2004 13:16:27 -0000	1.12.2.1
@@ -0,0 +1,1250 @@
+/* 
+ * xfrm_policy.c
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	YOSHIFUJI Hideaki
+ * 		Split up af-specific portion
+ *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
+ * 	
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/tqueue.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+
+DECLARE_MUTEX(xfrm_cfg_sem);
+
+static rwlock_t xfrm_policy_lock = RW_LOCK_UNLOCKED;
+
+struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
+
+static rwlock_t xfrm_policy_afinfo_lock = RW_LOCK_UNLOCKED;
+static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
+
+kmem_cache_t *xfrm_dst_cache;
+
+static struct tq_struct xfrm_policy_gc_work;
+static struct list_head xfrm_policy_gc_list =
+	LIST_HEAD_INIT(xfrm_policy_gc_list);
+static spinlock_t xfrm_policy_gc_lock = SPIN_LOCK_UNLOCKED;
+
+int xfrm_register_type(struct xfrm_type *type, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct xfrm_type_map *typemap;
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+	typemap = afinfo->type_map;
+
+	write_lock(&typemap->lock);
+	if (likely(typemap->map[type->proto] == NULL))
+		typemap->map[type->proto] = type;
+	else
+		err = -EEXIST;
+	write_unlock(&typemap->lock);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
+int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct xfrm_type_map *typemap;
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+	typemap = afinfo->type_map;
+
+	write_lock(&typemap->lock);
+	if (unlikely(typemap->map[type->proto] != type))
+		err = -ENOENT;
+	else
+		typemap->map[type->proto] = NULL;
+	write_unlock(&typemap->lock);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
+struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo;
+	struct xfrm_type_map *typemap;
+	struct xfrm_type *type;
+	int modload_attempted = 0;
+
+retry:
+	afinfo = xfrm_policy_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return NULL;
+	typemap = afinfo->type_map;
+
+	read_lock(&typemap->lock);
+	type = typemap->map[proto];
+	if (type && type->owner)
+		__MOD_INC_USE_COUNT(type->owner);
+	read_unlock(&typemap->lock);
+	if (!type && !modload_attempted) {
+		char module_name[36];
+
+		xfrm_policy_put_afinfo(afinfo);
+		sprintf(module_name, "xfrm-type-%d-%d",
+			(int) family, (int) proto);
+		request_module(module_name);
+		modload_attempted = 1;
+		goto retry;
+	}
+
+	xfrm_policy_put_afinfo(afinfo);
+	return type;
+}
+
+int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
+		    unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	if (likely(afinfo->dst_lookup != NULL))
+		err = afinfo->dst_lookup(dst, fl);
+	else
+		err = -EINVAL;
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
+void xfrm_put_type(struct xfrm_type *type)
+{
+	if (type->owner)
+		__MOD_DEC_USE_COUNT(type->owner);
+}
+
+static inline unsigned long make_jiffies(long secs)
+{
+	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
+		return MAX_SCHEDULE_TIMEOUT-1;
+	else
+	        return secs*HZ;
+}
+
+static void xfrm_policy_timer(unsigned long data)
+{
+	struct xfrm_policy *xp = (struct xfrm_policy*)data;
+	unsigned long now = (unsigned long)xtime.tv_sec;
+	long next = LONG_MAX;
+	int warn = 0;
+	int dir;
+
+	if (xp->dead)
+		goto out;
+
+	dir = xp->index & 7;
+
+	if (xp->lft.hard_add_expires_seconds) {
+		long tmo = xp->lft.hard_add_expires_seconds +
+			xp->curlft.add_time - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (xp->lft.hard_use_expires_seconds) {
+		long tmo = xp->lft.hard_use_expires_seconds +
+			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (xp->lft.soft_add_expires_seconds) {
+		long tmo = xp->lft.soft_add_expires_seconds +
+			xp->curlft.add_time - now;
+		if (tmo <= 0) {
+			warn = 1;
+			tmo = XFRM_KM_TIMEOUT;
+		}
+		if (tmo < next)
+			next = tmo;
+	}
+	if (xp->lft.soft_use_expires_seconds) {
+		long tmo = xp->lft.soft_use_expires_seconds +
+			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
+		if (tmo <= 0) {
+			warn = 1;
+			tmo = XFRM_KM_TIMEOUT;
+		}
+		if (tmo < next)
+			next = tmo;
+	}
+
+	if (warn)
+		km_policy_expired(xp, dir, 0);
+	if (next != LONG_MAX &&
+	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
+		xfrm_pol_hold(xp);
+
+out:
+	xfrm_pol_put(xp);
+	return;
+
+expired:
+	km_policy_expired(xp, dir, 1);
+	xfrm_policy_delete(xp, dir);
+	xfrm_pol_put(xp);
+}
+
+
+/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
+ * SPD calls.
+ */
+
+struct xfrm_policy *xfrm_policy_alloc(int gfp)
+{
+	struct xfrm_policy *policy;
+
+	policy = kmalloc(sizeof(struct xfrm_policy), gfp);
+
+	if (policy) {
+		memset(policy, 0, sizeof(struct xfrm_policy));
+		atomic_set(&policy->refcnt, 1);
+		policy->lock = RW_LOCK_UNLOCKED;
+		init_timer(&policy->timer);
+		policy->timer.data = (unsigned long)policy;
+		policy->timer.function = xfrm_policy_timer;
+	}
+	return policy;
+}
+
+/* Destroy xfrm_policy: descendant resources must be released to this moment. */
+
+void __xfrm_policy_destroy(struct xfrm_policy *policy)
+{
+	if (!policy->dead)
+		BUG();
+
+	if (policy->bundles)
+		BUG();
+
+	if (del_timer(&policy->timer))
+		BUG();
+
+	kfree(policy);
+}
+
+static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
+{
+	struct dst_entry *dst;
+
+	while ((dst = policy->bundles) != NULL) {
+		policy->bundles = dst->next;
+		dst_free(dst);
+	}
+
+	if (del_timer(&policy->timer))
+		atomic_dec(&policy->refcnt);
+
+	if (atomic_read(&policy->refcnt) > 1)
+		flow_cache_flush();
+
+	xfrm_pol_put(policy);
+}
+
+static void xfrm_policy_gc_task(void *data)
+{
+	struct xfrm_policy *policy;
+	struct list_head *entry, *tmp;
+	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+
+	spin_lock_bh(&xfrm_policy_gc_lock);
+	list_splice_init(&xfrm_policy_gc_list, &gc_list);
+	spin_unlock_bh(&xfrm_policy_gc_lock);
+
+	list_for_each_safe(entry, tmp, &gc_list) {
+		policy = list_entry(entry, struct xfrm_policy, list);
+		xfrm_policy_gc_kill(policy);
+	}
+}
+
+/* Rule must be locked. Release descentant resources, announce
+ * entry dead. The rule must be unlinked from lists to the moment.
+ */
+
+void xfrm_policy_kill(struct xfrm_policy *policy)
+{
+	write_lock_bh(&policy->lock);
+	if (policy->dead)
+		goto out;
+
+	policy->dead = 1;
+
+	spin_lock(&xfrm_policy_gc_lock);
+	list_add(&policy->list, &xfrm_policy_gc_list);
+	spin_unlock(&xfrm_policy_gc_lock);
+	schedule_task(&xfrm_policy_gc_work);
+
+out:
+	write_unlock_bh(&policy->lock);
+}
+
+/* Generate new index... KAME seems to generate them ordered by cost
+ * of an absolute inpredictability of ordering of rules. This will not pass. */
+static u32 xfrm_gen_index(int dir)
+{
+	u32 idx;
+	struct xfrm_policy *p;
+	static u32 idx_generator;
+
+	for (;;) {
+		idx = (idx_generator | dir);
+		idx_generator += 8;
+		if (idx == 0)
+			idx = 8;
+		for (p = xfrm_policy_list[dir]; p; p = p->next) {
+			if (p->index == idx)
+				break;
+		}
+		if (!p)
+			return idx;
+	}
+}
+
+int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+{
+	struct xfrm_policy *pol, **p;
+	struct xfrm_policy *delpol = NULL;
+	struct xfrm_policy **newpos = NULL;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
+		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
+			if (excl) {
+				write_unlock_bh(&xfrm_policy_lock);
+				return -EEXIST;
+			}
+			*p = pol->next;
+			delpol = pol;
+			if (policy->priority > pol->priority)
+				continue;
+		} else if (policy->priority >= pol->priority)
+			continue;
+		if (!newpos)
+			newpos = p;
+		if (delpol)
+			break;
+	}
+	if (newpos)
+		p = newpos;
+	xfrm_pol_hold(policy);
+	policy->next = *p;
+	*p = policy;
+	atomic_inc(&flow_cache_genid);
+	policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
+	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
+	policy->curlft.use_time = 0;
+	if (!mod_timer(&policy->timer, jiffies + HZ))
+		xfrm_pol_hold(policy);
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (delpol) {
+		xfrm_policy_kill(delpol);
+	}
+	wake_up(&km_waitq);
+	return 0;
+}
+
+struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
+				      int delete)
+{
+	struct xfrm_policy *pol, **p;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
+		if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
+			xfrm_pol_hold(pol);
+			if (delete)
+				*p = pol->next;
+			break;
+		}
+	}
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (pol && delete) {
+		atomic_inc(&flow_cache_genid);
+		xfrm_policy_kill(pol);
+		wake_up(&km_waitq);
+	}
+	return pol;
+}
+
+struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
+{
+	struct xfrm_policy *pol, **p;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
+		if (pol->index == id) {
+			xfrm_pol_hold(pol);
+			if (delete)
+				*p = pol->next;
+			break;
+		}
+	}
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (pol && delete) {
+		atomic_inc(&flow_cache_genid);
+		xfrm_policy_kill(pol);
+		wake_up(&km_waitq);
+	}
+	return pol;
+}
+
+void xfrm_policy_flush()
+{
+	struct xfrm_policy *xp;
+	int dir;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
+		while ((xp = xfrm_policy_list[dir]) != NULL) {
+			xfrm_policy_list[dir] = xp->next;
+			write_unlock_bh(&xfrm_policy_lock);
+
+			xfrm_policy_kill(xp);
+
+			write_lock_bh(&xfrm_policy_lock);
+		}
+	}
+	atomic_inc(&flow_cache_genid);
+	write_unlock_bh(&xfrm_policy_lock);
+	wake_up(&km_waitq);
+}
+
+int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
+		     void *data)
+{
+	struct xfrm_policy *xp;
+	int dir;
+	int count = 0;
+	int error = 0;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
+		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
+			count++;
+	}
+
+	if (count == 0) {
+		error = -ENOENT;
+		goto out;
+	}
+
+	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
+		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
+			error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
+			if (error)
+				goto out;
+		}
+	}
+
+out:
+	read_unlock_bh(&xfrm_policy_lock);
+	return error;
+}
+
+
+/* Find policy to apply to this flow. */
+
+static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+			       void **objp, atomic_t **obj_refp)
+{
+	struct xfrm_policy *pol;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
+		struct xfrm_selector *sel = &pol->selector;
+		int match;
+
+		if (pol->family != family)
+			continue;
+
+		match = xfrm_selector_match(sel, fl, family);
+		if (match) {
+			xfrm_pol_hold(pol);
+			break;
+		}
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+	if ((*objp = (void *) pol) != NULL)
+		*obj_refp = &pol->refcnt;
+}
+
+struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
+{
+	struct xfrm_policy *pol;
+
+	read_lock_bh(&xfrm_policy_lock);
+	if ((pol = sk->policy[dir]) != NULL) {
+		int match;
+
+		match = xfrm_selector_match(&pol->selector, fl, sk->family);
+		if (match)
+			xfrm_pol_hold(pol);
+		else
+			pol = NULL;
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+	return pol;
+}
+
+static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
+{
+	pol->next = xfrm_policy_list[dir];
+	xfrm_policy_list[dir] = pol;
+	xfrm_pol_hold(pol);
+}
+
+static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
+						int dir)
+{
+	struct xfrm_policy **polp;
+
+	for (polp = &xfrm_policy_list[dir];
+	     *polp != NULL; polp = &(*polp)->next) {
+		if (*polp == pol) {
+			*polp = pol->next;
+			return pol;
+		}
+	}
+	return NULL;
+}
+
+void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
+{
+	write_lock_bh(&xfrm_policy_lock);
+	pol = __xfrm_policy_unlink(pol, dir);
+	write_unlock_bh(&xfrm_policy_lock);
+	if (pol)
+		xfrm_policy_kill(pol);
+}
+
+int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
+{
+	struct xfrm_policy *old_pol;
+
+	write_lock_bh(&xfrm_policy_lock);
+	old_pol = sk->policy[dir];
+	sk->policy[dir] = pol;
+	if (pol) {
+		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
+		pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
+		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
+	}
+	if (old_pol)
+		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (old_pol) {
+		xfrm_policy_kill(old_pol);
+	}
+	wake_up(&km_waitq);
+	return 0;
+}
+
+static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
+{
+	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
+
+	if (newp) {
+		newp->selector = old->selector;
+		newp->lft = old->lft;
+		newp->curlft = old->curlft;
+		newp->action = old->action;
+		newp->flags = old->flags;
+		newp->xfrm_nr = old->xfrm_nr;
+		newp->index = old->index;
+		memcpy(newp->xfrm_vec, old->xfrm_vec,
+		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
+		write_lock_bh(&xfrm_policy_lock);
+		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
+		write_unlock_bh(&xfrm_policy_lock);
+		xfrm_pol_put(newp);
+	}
+	return newp;
+}
+
+int __xfrm_sk_clone_policy(struct sock *sk)
+{
+	struct xfrm_policy *p0, *p1;
+	p0 = sk->policy[0];
+	p1 = sk->policy[1];
+	sk->policy[0] = NULL;
+	sk->policy[1] = NULL;
+	if (p0 && (sk->policy[0] = clone_policy(p0, 0)) == NULL)
+		return -ENOMEM;
+	if (p1 && (sk->policy[1] = clone_policy(p1, 1)) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+/* Resolve list of templates for the flow, given policy. */
+
+static int
+xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
+		  struct xfrm_state **xfrm,
+		  unsigned short family)
+{
+	int nx;
+	int i, error;
+	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
+	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
+
+	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
+		struct xfrm_state *x;
+		xfrm_address_t *remote = daddr;
+		xfrm_address_t *local  = saddr;
+		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
+
+		if (tmpl->mode) {
+			remote = &tmpl->id.daddr;
+			local = &tmpl->saddr;
+		}
+
+		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
+
+		if (x && x->km.state == XFRM_STATE_VALID) {
+			xfrm[nx++] = x;
+			daddr = remote;
+			saddr = local;
+			continue;
+		}
+		if (x) {
+			error = (x->km.state == XFRM_STATE_ERROR ?
+				 -EINVAL : -EAGAIN);
+			xfrm_state_put(x);
+		}
+
+		if (!tmpl->optional)
+			goto fail;
+	}
+	return nx;
+
+fail:
+	for (nx--; nx>=0; nx--)
+		xfrm_state_put(xfrm[nx]);
+	return error;
+}
+
+/* Check that the bundle accepts the flow and its components are
+ * still valid.
+ */
+
+static struct dst_entry *
+xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
+{
+	struct dst_entry *x;
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return ERR_PTR(-EINVAL);
+	x = afinfo->find_bundle(fl, policy);
+	xfrm_policy_put_afinfo(afinfo);
+	return x;
+}
+
+/* Allocate chain of dst_entry's, attach known xfrm's, calculate
+ * all the metrics... Shortly, bundle a bundle.
+ */
+
+static int
+xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
+		   struct flowi *fl, struct dst_entry **dst_p,
+		   unsigned short family)
+{
+	int err;
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
+static inline int policy_to_flow_dir(int dir)
+{
+	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
+	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
+	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
+		return dir;
+	switch (dir) {
+	default:
+	case XFRM_POLICY_IN:
+		return FLOW_DIR_IN;
+	case XFRM_POLICY_OUT:
+		return FLOW_DIR_OUT;
+	case XFRM_POLICY_FWD:
+		return FLOW_DIR_FWD;
+	};
+}
+
+static int stale_bundle(struct dst_entry *dst);
+
+/* Main function: finds/creates a bundle for given flow.
+ *
+ * At the moment we eat a raw IP route. Mostly to speed up lookups
+ * on interfaces with disabled IPsec.
+ */
+int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
+		struct sock *sk, int flags)
+{
+	struct xfrm_policy *policy;
+	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+	struct rtable *rt = (struct rtable*)*dst_p;
+	struct dst_entry *dst;
+	int nx = 0;
+	int err;
+	u32 genid;
+	u16 family = (*dst_p)->ops->family;
+
+	switch (family) {
+	case AF_INET:
+		if (!fl->fl4_src)
+			fl->fl4_src = rt->rt_src;
+		if (!fl->fl4_dst)
+			fl->fl4_dst = rt->rt_dst;
+	case AF_INET6:
+		/* Still not clear... */
+	default:
+		/* nothing */;
+	}
+
+restart:
+	genid = atomic_read(&flow_cache_genid);
+	policy = NULL;
+	if (sk && sk->policy[1])
+		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+
+	if (!policy) {
+		/* To accelerate a bit...  */
+		if ((rt->u.dst.flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
+			return 0;
+
+		policy = flow_cache_lookup(fl, family,
+					   policy_to_flow_dir(XFRM_POLICY_OUT),
+					   xfrm_policy_lookup);
+	}
+
+	if (!policy)
+		return 0;
+
+	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	switch (policy->action) {
+	case XFRM_POLICY_BLOCK:
+		/* Prohibit the flow */
+		xfrm_pol_put(policy);
+		return -EPERM;
+
+	case XFRM_POLICY_ALLOW:
+		if (policy->xfrm_nr == 0) {
+			/* Flow passes not transformed. */
+			xfrm_pol_put(policy);
+			return 0;
+		}
+
+		/* Try to find matching bundle.
+		 *
+		 * LATER: help from flow cache. It is optional, this
+		 * is required only for output policy.
+		 */
+		dst = xfrm_find_bundle(fl, policy, family);
+		if (IS_ERR(dst)) {
+			xfrm_pol_put(policy);
+			return PTR_ERR(dst);
+		}
+
+		if (dst)
+			break;
+
+		nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+
+		if (unlikely(nx<0)) {
+			err = nx;
+			if (err == -EAGAIN && flags) {
+				DECLARE_WAITQUEUE(wait, current);
+
+				add_wait_queue(&km_waitq, &wait);
+				set_current_state(TASK_INTERRUPTIBLE);
+				schedule();
+				set_current_state(TASK_RUNNING);
+				remove_wait_queue(&km_waitq, &wait);
+
+				nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+
+				if (nx == -EAGAIN && signal_pending(current)) {
+					err = -ERESTART;
+					goto error;
+				}
+				if (nx == -EAGAIN ||
+				    genid != atomic_read(&flow_cache_genid)) {
+					xfrm_pol_put(policy);
+					goto restart;
+				}
+				err = nx;
+			}
+			if (err < 0)
+				goto error;
+		}
+		if (nx == 0) {
+			/* Flow passes not transformed. */
+			xfrm_pol_put(policy);
+			return 0;
+		}
+
+		dst = &rt->u.dst;
+		err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
+
+		if (unlikely(err)) {
+			int i;
+			for (i=0; i<nx; i++)
+				xfrm_state_put(xfrm[i]);
+			goto error;
+		}
+
+		write_lock_bh(&policy->lock);
+		if (unlikely(policy->dead || stale_bundle(dst))) {
+			/* Wow! While we worked on resolving, this
+			 * policy has gone. Retry. It is not paranoia,
+			 * we just cannot enlist new bundle to dead object.
+			 * We can't enlist stable bundles either.
+			 */
+			write_unlock_bh(&policy->lock);
+
+			xfrm_pol_put(policy);
+			if (dst)
+				dst_free(dst);
+			goto restart;
+		}
+		dst->next = policy->bundles;
+		policy->bundles = dst;
+		dst_hold(dst);
+		write_unlock_bh(&policy->lock);
+	}
+	*dst_p = dst;
+	ip_rt_put(rt);
+	xfrm_pol_put(policy);
+	return 0;
+
+error:
+	ip_rt_put(rt);
+	xfrm_pol_put(policy);
+	*dst_p = NULL;
+	return err;
+}
+
+/* When skb is transformed back to its "native" form, we have to
+ * check policy restrictions. At the moment we make this in maximally
+ * stupid way. Shame on me. :-) Of course, connected sockets must
+ * have policy cached at them.
+ */
+
+static inline int
+xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
+	      unsigned short family)
+{
+	if (xfrm_state_kern(x))
+		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family);
+	return	x->id.proto == tmpl->id.proto &&
+		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
+		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
+		x->props.mode == tmpl->mode &&
+		(tmpl->aalgos & (1<<x->props.aalgo)) &&
+		!(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
+}
+
+static inline int
+xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
+	       unsigned short family)
+{
+	int idx = start;
+
+	if (tmpl->optional) {
+		if (!tmpl->mode)
+			return start;
+	} else
+		start = -1;
+	for (; idx < sp->len; idx++) {
+		if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family))
+			return ++idx;
+		if (sp->x[idx].xvec->props.mode)
+			break;
+	}
+	return start;
+}
+
+static int
+_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	afinfo->decode_session(skb, fl);
+	xfrm_policy_put_afinfo(afinfo);
+	return 0;
+}
+
+int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
+			unsigned short family)
+{
+	struct xfrm_policy *pol;
+	struct flowi fl;
+
+	if (_decode_session(skb, &fl, family) < 0)
+		return 0;
+
+	/* First, check used SA against their selectors. */
+	if (skb->sp) {
+		int i;
+
+		for (i=skb->sp->len-1; i>=0; i--) {
+		  struct sec_decap_state *xvec = &(skb->sp->x[i]);
+			if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
+				return 0;
+
+			/* If there is a post_input processor, try running it */
+			if (xvec->xvec->type->post_input &&
+			    (xvec->xvec->type->post_input)(xvec->xvec,
+							   &(xvec->decap),
+							   skb) != 0)
+				return 0;
+		}
+	}
+
+	pol = NULL;
+	if (sk && sk->policy[dir])
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+
+	if (!pol)
+		pol = flow_cache_lookup(&fl, family,
+					policy_to_flow_dir(dir),
+					xfrm_policy_lookup);
+
+	if (!pol)
+		return !skb->sp;
+
+	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	if (pol->action == XFRM_POLICY_ALLOW) {
+		struct sec_path *sp;
+		static struct sec_path dummy;
+		int i, k;
+
+		if ((sp = skb->sp) == NULL)
+			sp = &dummy;
+
+		/* For each tunnel xfrm, find the first matching tmpl.
+		 * For each tmpl before that, find corresponding xfrm.
+		 * Order is _important_. Later we will implement
+		 * some barriers, but at the moment barriers
+		 * are implied between each two transformations.
+		 */
+		for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
+			k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
+			if (k < 0)
+				goto reject;
+		}
+
+		for (; k < sp->len; k++) {
+			if (sp->x[k].xvec->props.mode)
+				goto reject;
+		}
+
+		xfrm_pol_put(pol);
+		return 1;
+	}
+
+reject:
+	xfrm_pol_put(pol);
+	return 0;
+}
+
+int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
+{
+	struct flowi fl;
+
+	if (_decode_session(skb, &fl, family) < 0)
+		return 0;
+
+	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
+}
+
+/* Optimize later using cookies and generation ids. */
+
+static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
+{
+	if (!stale_bundle(dst))
+		return dst;
+
+	dst_release(dst);
+	return NULL;
+}
+
+static int stale_bundle(struct dst_entry *dst)
+{
+	struct dst_entry *child = dst;
+
+	while (child) {
+		if (child->obsolete > 0 ||
+		    (child->dev && !netif_running(child->dev)) ||
+		    (child->xfrm && child->xfrm->km.state != XFRM_STATE_VALID)) {
+			return 1;
+		}
+		child = child->child;
+	}
+
+	return 0;
+}
+
+static void xfrm_dst_destroy(struct dst_entry *dst)
+{
+	xfrm_state_put(dst->xfrm);
+	dst->xfrm = NULL;
+}
+
+static void xfrm_link_failure(struct sk_buff *skb)
+{
+	/* Impossible. Such dst must be popped before reaches point of failure. */
+	return;
+}
+
+static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+{
+	if (dst) {
+		if (dst->obsolete) {
+			dst_release(dst);
+			dst = NULL;
+		}
+	}
+	return dst;
+}
+
+static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
+{
+	int i;
+	struct xfrm_policy *pol;
+	struct dst_entry *dst, **dstp, *gc_list = NULL;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
+		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
+			write_lock(&pol->lock);
+			dstp = &pol->bundles;
+			while ((dst=*dstp) != NULL) {
+				if (func(dst)) {
+					*dstp = dst->next;
+					dst->next = gc_list;
+					gc_list = dst;
+				} else {
+					dstp = &dst->next;
+				}
+			}
+			write_unlock(&pol->lock);
+		}
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+
+	while (gc_list) {
+		dst = gc_list;
+		gc_list = dst->next;
+		dst_free(dst);
+	}
+}
+
+static int unused_bundle(struct dst_entry *dst)
+{
+	return !atomic_read(&dst->__refcnt);
+}
+
+static void __xfrm_garbage_collect(void)
+{
+	xfrm_prune_bundles(unused_bundle);
+}
+
+int xfrm_flush_bundles(void)
+{
+	xfrm_prune_bundles(stale_bundle);
+	return 0;
+}
+
+/* Well... that's _TASK_. We need to scan through transformation
+ * list and figure out what mss tcp should generate in order to
+ * final datagram fit to mtu. Mama mia... :-)
+ *
+ * Apparently, some easy way exists, but we used to choose the most
+ * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
+ *
+ * Consider this function as something like dark humour. :-)
+ */
+static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
+{
+	int res = mtu - dst->header_len;
+
+	for (;;) {
+		struct dst_entry *d = dst;
+		int m = res;
+
+		do {
+			struct xfrm_state *x = d->xfrm;
+			if (x) {
+				spin_lock_bh(&x->lock);
+				if (x->km.state == XFRM_STATE_VALID &&
+				    x->type && x->type->get_max_size)
+					m = x->type->get_max_size(d->xfrm, m);
+				else
+					m += x->props.header_len;
+				spin_unlock_bh(&x->lock);
+			}
+		} while ((d = d->child) != NULL);
+
+		if (m <= mtu)
+			break;
+		res -= (m - mtu);
+		if (res < 88)
+			return mtu;
+	}
+
+	return res + dst->header_len;
+}
+
+int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_policy_afinfo_lock);
+	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
+		err = -ENOBUFS;
+	else {
+		struct dst_ops *dst_ops = afinfo->dst_ops;
+		if (likely(dst_ops->kmem_cachep == NULL))
+			dst_ops->kmem_cachep = xfrm_dst_cache;
+		if (likely(dst_ops->check == NULL))
+			dst_ops->check = xfrm_dst_check;
+		if (likely(dst_ops->destroy == NULL))
+			dst_ops->destroy = xfrm_dst_destroy;
+		if (likely(dst_ops->negative_advice == NULL))
+			dst_ops->negative_advice = xfrm_negative_advice;
+		if (likely(dst_ops->link_failure == NULL))
+			dst_ops->link_failure = xfrm_link_failure;
+		if (likely(dst_ops->get_mss == NULL))
+			dst_ops->get_mss = xfrm_get_mss;
+		if (likely(afinfo->garbage_collect == NULL))
+			afinfo->garbage_collect = __xfrm_garbage_collect;
+		xfrm_policy_afinfo[afinfo->family] = afinfo;
+	}
+	write_unlock(&xfrm_policy_afinfo_lock);
+	return err;
+}
+
+int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_policy_afinfo_lock);
+	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
+		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
+			err = -EINVAL;
+		else {
+			struct dst_ops *dst_ops = afinfo->dst_ops;
+			xfrm_policy_afinfo[afinfo->family] = NULL;
+			dst_ops->kmem_cachep = NULL;
+			dst_ops->check = NULL;
+			dst_ops->destroy = NULL;
+			dst_ops->negative_advice = NULL;
+			dst_ops->link_failure = NULL;
+			dst_ops->get_mss = NULL;
+			afinfo->garbage_collect = NULL;
+		}
+	}
+	write_unlock(&xfrm_policy_afinfo_lock);
+	return err;
+}
+
+struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo;
+	if (unlikely(family >= NPROTO))
+		return NULL;
+	read_lock(&xfrm_policy_afinfo_lock);
+	afinfo = xfrm_policy_afinfo[family];
+	if (likely(afinfo != NULL))
+		read_lock(&afinfo->lock);
+	read_unlock(&xfrm_policy_afinfo_lock);
+	return afinfo;
+}
+
+void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	if (unlikely(afinfo == NULL))
+		return;
+	read_unlock(&afinfo->lock);
+}
+
+static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	switch (event) {
+	case NETDEV_DOWN:
+		xfrm_flush_bundles();
+	}
+	return NOTIFY_DONE;
+}
+
+struct notifier_block xfrm_dev_notifier = {
+	xfrm_dev_event,
+	NULL,
+	0
+};
+
+void __init xfrm_policy_init(void)
+{
+	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
+					   sizeof(struct xfrm_dst),
+					   0, SLAB_HWCACHE_ALIGN,
+					   NULL, NULL);
+	if (!xfrm_dst_cache)
+		panic("XFRM: failed to allocate xfrm_dst_cache\n");
+
+	INIT_TQUEUE(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
+	register_netdevice_notifier(&xfrm_dev_notifier);
+}
+
+void __init xfrm_init(void)
+{
+	xfrm_state_init();
+	xfrm_policy_init();
+	xfrm_input_init();
+}
+
Index: net/xfrm/xfrm_state.c
===================================================================
RCS file: net/xfrm/xfrm_state.c
diff -N net/xfrm/xfrm_state.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/xfrm/xfrm_state.c	16 Apr 2004 13:16:27 -0000	1.8.8.1
@@ -0,0 +1,942 @@
+/*
+ * xfrm_state.c
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific functions
+ *	Derek Atkins <derek@ihtfp.com>
+ *		Add UDP Encapsulation
+ * 	
+ */
+
+#include <net/xfrm.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <asm/uaccess.h>
+#include <linux/tqueue.h>
+
+/* Each xfrm_state may be linked to two tables:
+
+   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
+   2. Hash table by daddr to find what SAs exist for given
+      destination/tunnel endpoint. (output)
+ */
+
+static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
+
+/* Hash table to find appropriate SA towards given target (endpoint
+ * of tunnel or destination of transport mode) allowed by selector.
+ *
+ * Main use is finding SA after policy selected tunnel or transport mode.
+ * Also, it can be used by ah/esp icmp error handler to find offending SA.
+ */
+static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
+static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
+
+DECLARE_WAIT_QUEUE_HEAD(km_waitq);
+
+static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
+static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
+
+static struct tq_struct xfrm_state_gc_work;
+static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
+static spinlock_t xfrm_state_gc_lock = SPIN_LOCK_UNLOCKED;
+
+static void __xfrm_state_delete(struct xfrm_state *x);
+
+static void xfrm_state_gc_destroy(struct xfrm_state *x)
+{
+	if (del_timer(&x->timer))
+		BUG();
+	if (x->aalg)
+		kfree(x->aalg);
+	if (x->ealg)
+		kfree(x->ealg);
+	if (x->calg)
+		kfree(x->calg);
+	if (x->encap)
+		kfree(x->encap);
+	if (x->type) {
+		x->type->destructor(x);
+		xfrm_put_type(x->type);
+	}
+	kfree(x);
+	wake_up(&km_waitq);
+}
+
+static void xfrm_state_gc_task(void *data)
+{
+	struct xfrm_state *x;
+	struct list_head *entry, *tmp;
+	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+
+	spin_lock_bh(&xfrm_state_gc_lock);
+	list_splice_init(&xfrm_state_gc_list, &gc_list);
+	spin_unlock_bh(&xfrm_state_gc_lock);
+
+	list_for_each_safe(entry, tmp, &gc_list) {
+		x = list_entry(entry, struct xfrm_state, bydst);
+		xfrm_state_gc_destroy(x);
+	}
+}
+
+static inline unsigned long make_jiffies(long secs)
+{
+	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
+		return MAX_SCHEDULE_TIMEOUT-1;
+	else
+	        return secs*HZ;
+}
+
+static void xfrm_timer_handler(unsigned long data)
+{
+	struct xfrm_state *x = (struct xfrm_state*)data;
+	unsigned long now = (unsigned long)xtime.tv_sec;
+	long next = LONG_MAX;
+	int warn = 0;
+
+	spin_lock(&x->lock);
+	if (x->km.state == XFRM_STATE_DEAD)
+		goto out;
+	if (x->km.state == XFRM_STATE_EXPIRED)
+		goto expired;
+	if (x->lft.hard_add_expires_seconds) {
+		long tmo = x->lft.hard_add_expires_seconds +
+			x->curlft.add_time - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (x->lft.hard_use_expires_seconds) {
+		long tmo = x->lft.hard_use_expires_seconds +
+			(x->curlft.use_time ? : now) - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (x->km.dying)
+		goto resched;
+	if (x->lft.soft_add_expires_seconds) {
+		long tmo = x->lft.soft_add_expires_seconds +
+			x->curlft.add_time - now;
+		if (tmo <= 0)
+			warn = 1;
+		else if (tmo < next)
+			next = tmo;
+	}
+	if (x->lft.soft_use_expires_seconds) {
+		long tmo = x->lft.soft_use_expires_seconds +
+			(x->curlft.use_time ? : now) - now;
+		if (tmo <= 0)
+			warn = 1;
+		else if (tmo < next)
+			next = tmo;
+	}
+
+	if (warn)
+		km_state_expired(x, 0);
+resched:
+	if (next != LONG_MAX &&
+	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
+		xfrm_state_hold(x);
+	goto out;
+
+expired:
+	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
+		x->km.state = XFRM_STATE_EXPIRED;
+		wake_up(&km_waitq);
+		next = 2;
+		goto resched;
+	}
+	if (x->id.spi != 0)
+		km_state_expired(x, 1);
+	__xfrm_state_delete(x);
+
+out:
+	spin_unlock(&x->lock);
+	xfrm_state_put(x);
+}
+
+struct xfrm_state *xfrm_state_alloc(void)
+{
+	struct xfrm_state *x;
+
+	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
+
+	if (x) {
+		memset(x, 0, sizeof(struct xfrm_state));
+		atomic_set(&x->refcnt, 1);
+		atomic_set(&x->tunnel_users, 0);
+		INIT_LIST_HEAD(&x->bydst);
+		INIT_LIST_HEAD(&x->byspi);
+		init_timer(&x->timer);
+		x->timer.function = xfrm_timer_handler;
+		x->timer.data	  = (unsigned long)x;
+		x->curlft.add_time = (unsigned long)xtime.tv_sec;
+		x->lft.soft_byte_limit = XFRM_INF;
+		x->lft.soft_packet_limit = XFRM_INF;
+		x->lft.hard_byte_limit = XFRM_INF;
+		x->lft.hard_packet_limit = XFRM_INF;
+		x->lock = SPIN_LOCK_UNLOCKED;
+	}
+	return x;
+}
+
+void __xfrm_state_destroy(struct xfrm_state *x)
+{
+	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
+
+	spin_lock_bh(&xfrm_state_gc_lock);
+	list_add(&x->bydst, &xfrm_state_gc_list);
+	spin_unlock_bh(&xfrm_state_gc_lock);
+	schedule_task(&xfrm_state_gc_work);
+}
+
+static void __xfrm_state_delete(struct xfrm_state *x)
+{
+	if (x->km.state != XFRM_STATE_DEAD) {
+		x->km.state = XFRM_STATE_DEAD;
+		spin_lock(&xfrm_state_lock);
+		list_del(&x->bydst);
+		atomic_dec(&x->refcnt);
+		if (x->id.spi) {
+			list_del(&x->byspi);
+			atomic_dec(&x->refcnt);
+		}
+		spin_unlock(&xfrm_state_lock);
+		if (del_timer(&x->timer))
+			atomic_dec(&x->refcnt);
+
+		/* The number two in this test is the reference
+		 * mentioned in the comment below plus the reference
+		 * our caller holds.  A larger value means that
+		 * there are DSTs attached to this xfrm_state.
+		 */
+		if (atomic_read(&x->refcnt) > 2)
+			xfrm_flush_bundles();
+
+		/* All xfrm_state objects are created by one of two possible
+		 * paths:
+		 *
+		 * 2) xfrm_state_lookup --> xfrm_state_insert
+		 *
+		 * The xfrm_state_lookup or xfrm_state_alloc call gives a
+		 * reference, and that is what we are dropping here.
+		 */
+		atomic_dec(&x->refcnt);
+	}
+}
+
+void xfrm_state_delete(struct xfrm_state *x)
+{
+	xfrm_state_delete_tunnel(x);
+	spin_lock_bh(&x->lock);
+	__xfrm_state_delete(x);
+	spin_unlock_bh(&x->lock);
+}
+
+void xfrm_state_flush(u8 proto)
+{
+	int i;
+	struct xfrm_state *x;
+
+	spin_lock_bh(&xfrm_state_lock);
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+restart:
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (!xfrm_state_kern(x) &&
+			    (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
+				xfrm_state_hold(x);
+				spin_unlock_bh(&xfrm_state_lock);
+
+				xfrm_state_delete(x);
+				xfrm_state_put(x);
+
+				spin_lock_bh(&xfrm_state_lock);
+				goto restart;
+			}
+		}
+	}
+	spin_unlock_bh(&xfrm_state_lock);
+	wake_up(&km_waitq);
+}
+
+static int
+xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
+		  struct xfrm_tmpl *tmpl,
+		  xfrm_address_t *daddr, xfrm_address_t *saddr,
+		  unsigned short family)
+{
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return -1;
+	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
+	xfrm_state_put_afinfo(afinfo);
+	return 0;
+}
+
+struct xfrm_state *
+xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
+		struct flowi *fl, struct xfrm_tmpl *tmpl,
+		struct xfrm_policy *pol, int *err,
+		unsigned short family)
+{
+	unsigned h = xfrm_dst_hash(daddr, family);
+	struct xfrm_state *x;
+	int acquire_in_progress = 0;
+	int error = 0;
+	struct xfrm_state *best = NULL;
+
+	spin_lock_bh(&xfrm_state_lock);
+	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
+		if (x->props.family == family &&
+		    x->props.reqid == tmpl->reqid &&
+		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    tmpl->mode == x->props.mode &&
+		    tmpl->id.proto == x->id.proto) {
+			/* Resolution logic:
+			   1. There is a valid state with matching selector.
+			      Done.
+			   2. Valid state with inappropriate selector. Skip.
+
+			   Entering area of "sysdeps".
+
+			   3. If state is not valid, selector is temporary,
+			      it selects only session which triggered
+			      previous resolution. Key manager will do
+			      something to install a state with proper
+			      selector.
+			 */
+			if (x->km.state == XFRM_STATE_VALID) {
+				if (!xfrm_selector_match(&x->sel, fl, family))
+					continue;
+				if (!best ||
+				    best->km.dying > x->km.dying ||
+				    (best->km.dying == x->km.dying &&
+				     best->curlft.add_time < x->curlft.add_time))
+					best = x;
+			} else if (x->km.state == XFRM_STATE_ACQ) {
+				acquire_in_progress = 1;
+			} else if (x->km.state == XFRM_STATE_ERROR ||
+				   x->km.state == XFRM_STATE_EXPIRED) {
+				if (xfrm_selector_match(&x->sel, fl, family))
+					error = 1;
+			}
+		}
+	}
+
+	if (best) {
+		xfrm_state_hold(best);
+		spin_unlock_bh(&xfrm_state_lock);
+		return best;
+	}
+
+	x = NULL;
+	if (!error && !acquire_in_progress &&
+	    ((x = xfrm_state_alloc()) != NULL)) {
+		/* Initialize temporary selector matching only
+		 * to current session. */
+		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
+
+		if (km_query(x, tmpl, pol) == 0) {
+			x->km.state = XFRM_STATE_ACQ;
+			list_add_tail(&x->bydst, xfrm_state_bydst+h);
+			xfrm_state_hold(x);
+			if (x->id.spi) {
+				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
+				list_add(&x->byspi, xfrm_state_byspi+h);
+				xfrm_state_hold(x);
+			}
+			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
+			xfrm_state_hold(x);
+			mod_timer(&x->timer, XFRM_ACQ_EXPIRES*HZ);
+		} else {
+			x->km.state = XFRM_STATE_DEAD;
+			xfrm_state_put(x);
+			x = NULL;
+			error = 1;
+		}
+	}
+	spin_unlock_bh(&xfrm_state_lock);
+	if (!x)
+		*err = acquire_in_progress ? -EAGAIN :
+			(error ? -ESRCH : -ENOMEM);
+	return x;
+}
+
+static void __xfrm_state_insert(struct xfrm_state *x)
+{
+	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
+
+	list_add(&x->bydst, xfrm_state_bydst+h);
+	xfrm_state_hold(x);
+
+	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+
+	list_add(&x->byspi, xfrm_state_byspi+h);
+	xfrm_state_hold(x);
+
+	if (!mod_timer(&x->timer, jiffies + HZ))
+		xfrm_state_hold(x);
+
+	wake_up(&km_waitq);
+}
+
+void xfrm_state_insert(struct xfrm_state *x)
+{
+	spin_lock_bh(&xfrm_state_lock);
+	__xfrm_state_insert(x);
+	spin_unlock_bh(&xfrm_state_lock);
+}
+
+int xfrm_state_add(struct xfrm_state *x)
+{
+	struct xfrm_state_afinfo *afinfo;
+	struct xfrm_state *x1;
+	int err;
+
+	afinfo = xfrm_state_get_afinfo(x->props.family);
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+
+	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+	if (!x1) {
+		x1 = afinfo->find_acq(
+			x->props.mode, x->props.reqid, x->id.proto,
+			&x->id.daddr, &x->props.saddr, 0);
+		if (x1 && x1->id.spi != x->id.spi && x1->id.spi) {
+			xfrm_state_put(x1);
+			x1 = NULL;
+		}
+	}
+
+	if (x1 && x1->id.spi) {
+		xfrm_state_put(x1);
+		x1 = NULL;
+		err = -EEXIST;
+		goto out;
+	}
+
+	__xfrm_state_insert(x);
+	err = 0;
+
+out:
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+
+	if (x1) {
+		xfrm_state_delete(x1);
+		xfrm_state_put(x1);
+	}
+
+	return err;
+}
+
+int xfrm_state_update(struct xfrm_state *x)
+{
+	struct xfrm_state_afinfo *afinfo;
+	struct xfrm_state *x1;
+	int err;
+
+	afinfo = xfrm_state_get_afinfo(x->props.family);
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+
+	err = -ESRCH;
+	if (!x1)
+		goto out;
+
+	if (xfrm_state_kern(x1)) {
+		xfrm_state_put(x1);
+		err = -EEXIST;
+		goto out;
+	}
+
+	if (x1->km.state == XFRM_STATE_ACQ) {
+		__xfrm_state_insert(x);
+		x = NULL;
+	}
+	err = 0;
+
+out:
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+
+	if (err)
+		return err;
+
+	if (!x) {
+		xfrm_state_delete(x1);
+		xfrm_state_put(x1);
+		return 0;
+	}
+
+	err = -EINVAL;
+	spin_lock_bh(&x1->lock);
+	if (likely(x1->km.state == XFRM_STATE_VALID)) {
+		if (x->encap && x1->encap)
+			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
+		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
+		x1->km.dying = 0;
+		err = 0;
+	}
+	spin_unlock_bh(&x1->lock);
+
+	if (!mod_timer(&x1->timer, jiffies + HZ))
+		xfrm_state_hold(x1);
+	if (x1->curlft.use_time)
+		xfrm_state_check_expire(x1);
+
+	xfrm_state_put(x1);
+
+	return err;
+}
+
+int xfrm_state_check_expire(struct xfrm_state *x)
+{
+	if (!x->curlft.use_time)
+		x->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	if (x->km.state != XFRM_STATE_VALID)
+		return -EINVAL;
+
+	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
+	    x->curlft.packets >= x->lft.hard_packet_limit) {
+		km_state_expired(x, 1);
+		if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
+			xfrm_state_hold(x);
+		return -EINVAL;
+	}
+
+	if (!x->km.dying &&
+	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
+	     x->curlft.packets >= x->lft.soft_packet_limit))
+		km_state_expired(x, 0);
+	return 0;
+}
+
+int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
+		- skb_headroom(skb);
+
+	if (nhead > 0)
+		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
+
+	/* Check tail too... */
+	return 0;
+}
+
+struct xfrm_state *
+xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
+		  unsigned short family)
+{
+	struct xfrm_state *x;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return NULL;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = afinfo->state_lookup(daddr, spi, proto);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return x;
+}
+
+struct xfrm_state *
+xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
+	      xfrm_address_t *daddr, xfrm_address_t *saddr, 
+	      int create, unsigned short family)
+{
+	struct xfrm_state *x;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return NULL;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return x;
+}
+
+/* Silly enough, but I'm lazy to build resolution list */
+
+struct xfrm_state * xfrm_find_acq_byseq(u32 seq)
+{
+	int i;
+	struct xfrm_state *x;
+
+	spin_lock_bh(&xfrm_state_lock);
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (x->km.seq == seq) {
+				xfrm_state_hold(x);
+				spin_unlock_bh(&xfrm_state_lock);
+				return x;
+			}
+		}
+	}
+	spin_unlock_bh(&xfrm_state_lock);
+	return NULL;
+}
+ 
+u32 xfrm_get_acqseq(void)
+{
+	u32 res;
+	static u32 acqseq;
+	static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
+
+	spin_lock_bh(&acqseq_lock);
+	res = (++acqseq ? : ++acqseq);
+	spin_unlock_bh(&acqseq_lock);
+	return res;
+}
+
+void
+xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
+{
+	u32 h;
+	struct xfrm_state *x0;
+
+	if (x->id.spi)
+		return;
+
+	if (minspi == maxspi) {
+		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
+		if (x0) {
+			xfrm_state_put(x0);
+			return;
+		}
+		x->id.spi = minspi;
+	} else {
+		u32 spi = 0;
+		minspi = ntohl(minspi);
+		maxspi = ntohl(maxspi);
+		for (h=0; h<maxspi-minspi+1; h++) {
+			spi = minspi + net_random()%(maxspi-minspi+1);
+			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
+			if (x0 == NULL)
+				break;
+			xfrm_state_put(x0);
+		}
+		x->id.spi = htonl(spi);
+	}
+	if (x->id.spi) {
+		spin_lock_bh(&xfrm_state_lock);
+		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+		list_add(&x->byspi, xfrm_state_byspi+h);
+		xfrm_state_hold(x);
+		spin_unlock_bh(&xfrm_state_lock);
+		wake_up(&km_waitq);
+	}
+}
+
+int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
+		    void *data)
+{
+	int i;
+	struct xfrm_state *x;
+	int count = 0;
+	int err = 0;
+
+	spin_lock_bh(&xfrm_state_lock);
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
+				count++;
+		}
+	}
+	if (count == 0) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
+				continue;
+			err = func(x, --count, data);
+			if (err)
+				goto out;
+		}
+	}
+out:
+	spin_unlock_bh(&xfrm_state_lock);
+	return err;
+}
+
+
+int xfrm_replay_check(struct xfrm_state *x, u32 seq)
+{
+	u32 diff;
+
+	seq = ntohl(seq);
+
+	if (unlikely(seq == 0))
+		return -EINVAL;
+
+	if (likely(seq > x->replay.seq))
+		return 0;
+
+	diff = x->replay.seq - seq;
+	if (diff >= x->props.replay_window) {
+		x->stats.replay_window++;
+		return -EINVAL;
+	}
+
+	if (x->replay.bitmap & (1U << diff)) {
+		x->stats.replay++;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
+{
+	u32 diff;
+
+	seq = ntohl(seq);
+
+	if (seq > x->replay.seq) {
+		diff = seq - x->replay.seq;
+		if (diff < x->props.replay_window)
+			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
+		else
+			x->replay.bitmap = 1;
+		x->replay.seq = seq;
+	} else {
+		diff = x->replay.seq - seq;
+		x->replay.bitmap |= (1U << diff);
+	}
+}
+
+int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl)
+{
+	int i;
+
+	for (i=0; i<n; i++) {
+		int match;
+		match = xfrm_selector_match(&x[i]->sel, fl, x[i]->props.family);
+		if (!match)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
+static rwlock_t		xfrm_km_lock = RW_LOCK_UNLOCKED;
+
+void km_state_expired(struct xfrm_state *x, int hard)
+{
+	struct xfrm_mgr *km;
+
+	if (hard)
+		x->km.state = XFRM_STATE_EXPIRED;
+	else
+		x->km.dying = 1;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list)
+		km->notify(x, hard);
+	read_unlock(&xfrm_km_lock);
+
+	if (hard)
+		wake_up(&km_waitq);
+}
+
+int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
+{
+	int err = -EINVAL;
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
+		if (!err)
+			break;
+	}
+	read_unlock(&xfrm_km_lock);
+	return err;
+}
+
+int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
+{
+	int err = -EINVAL;
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		if (km->new_mapping)
+			err = km->new_mapping(x, ipaddr, sport);
+		if (!err)
+			break;
+	}
+	read_unlock(&xfrm_km_lock);
+	return err;
+}
+
+void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
+{
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list)
+		if (km->notify_policy)
+			km->notify_policy(pol, dir, hard);
+	read_unlock(&xfrm_km_lock);
+
+	if (hard)
+		wake_up(&km_waitq);
+}
+
+int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen)
+{
+	int err;
+	u8 *data;
+	struct xfrm_mgr *km;
+	struct xfrm_policy *pol = NULL;
+
+	if (optlen <= 0 || optlen > PAGE_SIZE)
+		return -EMSGSIZE;
+
+	data = kmalloc(optlen, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	err = -EFAULT;
+	if (copy_from_user(data, optval, optlen))
+		goto out;
+
+	err = -EINVAL;
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		pol = km->compile_policy(sk->family, optname, data, optlen, &err);
+		if (err >= 0)
+			break;
+	}
+	read_unlock(&xfrm_km_lock);
+
+	if (err >= 0) {
+		xfrm_sk_policy_insert(sk, err, pol);
+		xfrm_pol_put(pol);
+		err = 0;
+	}
+
+out:
+	kfree(data);
+	return err;
+}
+
+int xfrm_register_km(struct xfrm_mgr *km)
+{
+	write_lock_bh(&xfrm_km_lock);
+	list_add_tail(&km->list, &xfrm_km_list);
+	write_unlock_bh(&xfrm_km_lock);
+	return 0;
+}
+
+int xfrm_unregister_km(struct xfrm_mgr *km)
+{
+	write_lock_bh(&xfrm_km_lock);
+	list_del(&km->list);
+	write_unlock_bh(&xfrm_km_lock);
+	return 0;
+}
+
+int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_state_afinfo_lock);
+	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
+		err = -ENOBUFS;
+	else {
+		afinfo->state_bydst = xfrm_state_bydst;
+		afinfo->state_byspi = xfrm_state_byspi;
+		xfrm_state_afinfo[afinfo->family] = afinfo;
+	}
+	write_unlock(&xfrm_state_afinfo_lock);
+	return err;
+}
+
+int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_state_afinfo_lock);
+	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
+		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
+			err = -EINVAL;
+		else {
+			xfrm_state_afinfo[afinfo->family] = NULL;
+			afinfo->state_byspi = NULL;
+			afinfo->state_bydst = NULL;
+		}
+	}
+	write_unlock(&xfrm_state_afinfo_lock);
+	return err;
+}
+
+struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
+{
+	struct xfrm_state_afinfo *afinfo;
+	if (unlikely(family >= NPROTO))
+		return NULL;
+	read_lock(&xfrm_state_afinfo_lock);
+	afinfo = xfrm_state_afinfo[family];
+	if (likely(afinfo != NULL))
+		read_lock(&afinfo->lock);
+	read_unlock(&xfrm_state_afinfo_lock);
+	return afinfo;
+}
+
+void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	if (unlikely(afinfo == NULL))
+		return;
+	read_unlock(&afinfo->lock);
+}
+
+/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
+void xfrm_state_delete_tunnel(struct xfrm_state *x)
+{
+	if (x->tunnel) {
+		struct xfrm_state *t = x->tunnel;
+
+		if (atomic_read(&t->tunnel_users) == 2)
+			xfrm_state_delete(t);
+		atomic_dec(&t->tunnel_users);
+		xfrm_state_put(t);
+		x->tunnel = NULL;
+	}
+}
+
+void __init xfrm_state_init(void)
+{
+	int i;
+
+	for (i=0; i<XFRM_DST_HSIZE; i++) {
+		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
+		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
+	}
+	INIT_TQUEUE(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
+}
+
Index: net/xfrm/xfrm_user.c
===================================================================
RCS file: net/xfrm/xfrm_user.c
diff -N net/xfrm/xfrm_user.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ b/net/xfrm/xfrm_user.c	16 Apr 2004 13:16:27 -0000	1.10.2.1
@@ -0,0 +1,1196 @@
+/* xfrm_user.c: User interface to configure xfrm engine.
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+#include <asm/uaccess.h>
+
+static struct sock *xfrm_nl;
+
+static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
+{
+	struct rtattr *rt = xfrma[type - 1];
+	struct xfrm_algo *algp;
+
+	if (!rt)
+		return 0;
+
+	if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp))
+		return -EINVAL;
+
+	algp = RTA_DATA(rt);
+	switch (type) {
+	case XFRMA_ALG_AUTH:
+		if (!algp->alg_key_len &&
+		    strcmp(algp->alg_name, "digest_null") != 0)
+			return -EINVAL;
+		break;
+
+	case XFRMA_ALG_CRYPT:
+		if (!algp->alg_key_len &&
+		    strcmp(algp->alg_name, "cipher_null") != 0)
+			return -EINVAL;
+		break;
+
+	case XFRMA_ALG_COMP:
+		/* Zero length keys are legal.  */
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
+	return 0;
+}
+
+static int verify_encap_tmpl(struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_ENCAP - 1];
+	struct xfrm_encap_tmpl *encap;
+
+	if (!rt)
+		return 0;
+
+	if ((rt->rta_len - sizeof(*rt)) < sizeof(*encap))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int verify_newsa_info(struct xfrm_usersa_info *p,
+			     struct rtattr **xfrma)
+{
+	int err;
+
+	err = -EINVAL;
+	switch (p->family) {
+	case AF_INET:
+		break;
+
+	case AF_INET6:
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		break;
+#else
+		err = -EAFNOSUPPORT;
+		goto out;
+#endif
+
+	default:
+		goto out;
+	};
+
+	err = -EINVAL;
+	switch (p->id.proto) {
+	case IPPROTO_AH:
+		if (!xfrma[XFRMA_ALG_AUTH-1]	||
+		    xfrma[XFRMA_ALG_CRYPT-1]	||
+		    xfrma[XFRMA_ALG_COMP-1])
+			goto out;
+		break;
+
+	case IPPROTO_ESP:
+		if ((!xfrma[XFRMA_ALG_AUTH-1] &&
+		     !xfrma[XFRMA_ALG_CRYPT-1])	||
+		    xfrma[XFRMA_ALG_COMP-1])
+			goto out;
+		break;
+
+	case IPPROTO_COMP:
+		if (!xfrma[XFRMA_ALG_COMP-1]	||
+		    xfrma[XFRMA_ALG_AUTH-1]	||
+		    xfrma[XFRMA_ALG_CRYPT-1])
+			goto out;
+		break;
+
+	default:
+		goto out;
+	};
+
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
+		goto out;
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT)))
+		goto out;
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP)))
+		goto out;
+	if ((err = verify_encap_tmpl(xfrma)))
+		goto out;
+
+	err = -EINVAL;
+	switch (p->mode) {
+	case 0:
+	case 1:
+		break;
+
+	default:
+		goto out;
+	};
+
+	err = 0;
+
+out:
+	return err;
+}
+
+static int attach_one_algo(struct xfrm_algo **algpp, struct rtattr *u_arg)
+{
+	struct rtattr *rta = u_arg;
+	struct xfrm_algo *p, *ualg;
+
+	if (!rta)
+		return 0;
+
+	ualg = RTA_DATA(rta);
+	p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len);
+	*algpp = p;
+	return 0;
+}
+
+static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_arg)
+{
+	struct rtattr *rta = u_arg;
+	struct xfrm_encap_tmpl *p, *uencap;
+
+	if (!rta)
+		return 0;
+
+	uencap = RTA_DATA(rta);
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, uencap, sizeof(*p));
+	*encapp = p;
+	return 0;
+}
+
+static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
+{
+	memcpy(&x->id, &p->id, sizeof(x->id));
+	memcpy(&x->sel, &p->sel, sizeof(x->sel));
+	memcpy(&x->lft, &p->lft, sizeof(x->lft));
+	x->props.mode = p->mode;
+	x->props.replay_window = p->replay_window;
+	x->props.reqid = p->reqid;
+	x->props.family = p->family;
+	x->props.saddr = p->saddr;
+	x->props.flags = p->flags;
+}
+
+static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
+					       struct rtattr **xfrma,
+					       int *errp)
+{
+	struct xfrm_state *x = xfrm_state_alloc();
+	int err = -ENOMEM;
+
+	if (!x)
+		goto error_no_put;
+
+	copy_from_user_state(x, p);
+
+	if ((err = attach_one_algo(&x->aalg, xfrma[XFRMA_ALG_AUTH-1])))
+		goto error;
+	if ((err = attach_one_algo(&x->ealg, xfrma[XFRMA_ALG_CRYPT-1])))
+		goto error;
+	if ((err = attach_one_algo(&x->calg, xfrma[XFRMA_ALG_COMP-1])))
+		goto error;
+	if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1])))
+		goto error;
+
+	err = -ENOENT;
+	x->type = xfrm_get_type(x->id.proto, x->props.family);
+	if (x->type == NULL)
+		goto error;
+
+	err = x->type->init_state(x, NULL);
+	if (err)
+		goto error;
+
+	x->curlft.add_time = (unsigned long) xtime.tv_sec;
+	x->km.state = XFRM_STATE_VALID;
+	x->km.seq = p->seq;
+
+	return x;
+
+error:
+	x->km.state = XFRM_STATE_DEAD;
+	xfrm_state_put(x);
+error_no_put:
+	*errp = err;
+	return NULL;
+}
+
+static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
+	struct xfrm_state *x;
+	int err;
+
+	err = verify_newsa_info(p, (struct rtattr **) xfrma);
+	if (err)
+		return err;
+
+	x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err);
+	if (!x)
+		return err;
+
+	if (nlh->nlmsg_type == XFRM_MSG_NEWSA)
+		err = xfrm_state_add(x);
+	else
+		err = xfrm_state_update(x);
+
+	if (err < 0) {
+		x->km.state = XFRM_STATE_DEAD;
+		xfrm_state_put(x);
+	}
+
+	return err;
+}
+
+static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_state *x;
+	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
+
+	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+	if (x == NULL)
+		return -ESRCH;
+
+	if (xfrm_state_kern(x)) {
+		xfrm_state_put(x);
+		return -EPERM;
+	}
+
+	xfrm_state_delete(x);
+	xfrm_state_put(x);
+
+	return 0;
+}
+
+static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
+{
+	memcpy(&p->id, &x->id, sizeof(p->id));
+	memcpy(&p->sel, &x->sel, sizeof(p->sel));
+	memcpy(&p->lft, &x->lft, sizeof(p->lft));
+	memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
+	memcpy(&p->stats, &x->stats, sizeof(p->stats));
+	p->saddr = x->props.saddr;
+	p->mode = x->props.mode;
+	p->replay_window = x->props.replay_window;
+	p->reqid = x->props.reqid;
+	p->family = x->props.family;
+	p->flags = x->props.flags;
+	p->seq = x->km.seq;
+}
+
+struct xfrm_dump_info {
+	struct sk_buff *in_skb;
+	struct sk_buff *out_skb;
+	u32 nlmsg_seq;
+	int start_idx;
+	int this_idx;
+};
+
+static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
+{
+	struct xfrm_dump_info *sp = ptr;
+	struct sk_buff *in_skb = sp->in_skb;
+	struct sk_buff *skb = sp->out_skb;
+	struct xfrm_usersa_info *p;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	if (sp->this_idx < sp->start_idx)
+		goto out;
+
+	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
+			sp->nlmsg_seq,
+			XFRM_MSG_NEWSA, sizeof(*p));
+	nlh->nlmsg_flags = 0;
+
+	p = NLMSG_DATA(nlh);
+	copy_to_user_state(x, p);
+
+	if (x->aalg)
+		RTA_PUT(skb, XFRMA_ALG_AUTH,
+			sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg);
+	if (x->ealg)
+		RTA_PUT(skb, XFRMA_ALG_CRYPT,
+			sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg);
+	if (x->calg)
+		RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
+
+	if (x->encap)
+		RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
+
+	nlh->nlmsg_len = skb->tail - b;
+out:
+	sp->this_idx++;
+	return 0;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct xfrm_dump_info info;
+
+	info.in_skb = cb->skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
+	info.this_idx = 0;
+	info.start_idx = cb->args[0];
+	(void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info);
+	cb->args[0] = info.this_idx;
+
+	return skb->len;
+}
+
+static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
+					  struct xfrm_state *x, u32 seq)
+{
+	struct xfrm_dump_info info;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+	info.in_skb = in_skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = seq;
+	info.this_idx = info.start_idx = 0;
+
+	if (dump_one_state(x, 0, &info)) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	return skb;
+}
+
+static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
+	struct xfrm_state *x;
+	struct sk_buff *resp_skb;
+	int err;
+
+	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+	err = -ESRCH;
+	if (x == NULL)
+		goto out_noput;
+
+	resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
+	if (IS_ERR(resp_skb)) {
+		err = PTR_ERR(resp_skb);
+	} else {
+		err = netlink_unicast(xfrm_nl, resp_skb,
+				      NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	}
+	xfrm_state_put(x);
+out_noput:
+	return err;
+}
+
+static int verify_userspi_info(struct xfrm_userspi_info *p)
+{
+	switch (p->info.id.proto) {
+	case IPPROTO_AH:
+	case IPPROTO_ESP:
+		break;
+
+	case IPPROTO_COMP:
+		/* IPCOMP spi is 16-bits. */
+		if (p->max >= 0x10000)
+			return -EINVAL;
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	if (p->min > p->max)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_state *x;
+	struct xfrm_userspi_info *p;
+	struct sk_buff *resp_skb;
+	int err;
+
+	p = NLMSG_DATA(nlh);
+	err = verify_userspi_info(p);
+	if (err)
+		goto out_noput;
+	x = xfrm_find_acq(p->info.mode, p->info.reqid, p->info.id.proto,
+			  &p->info.id.daddr,
+			  &p->info.saddr, 1,
+			  p->info.family);
+	err = -ENOENT;
+	if (x == NULL)
+		goto out_noput;
+
+	resp_skb = ERR_PTR(-ENOENT);
+
+	spin_lock_bh(&x->lock);
+	if (x->km.state != XFRM_STATE_DEAD) {
+		xfrm_alloc_spi(x, htonl(p->min), htonl(p->max));
+		if (x->id.spi)
+			resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
+	}
+	spin_unlock_bh(&x->lock);
+
+	if (IS_ERR(resp_skb)) {
+		err = PTR_ERR(resp_skb);
+		goto out;
+	}
+
+	err = netlink_unicast(xfrm_nl, resp_skb,
+			      NETLINK_CB(skb).pid, MSG_DONTWAIT);
+
+out:
+	xfrm_state_put(x);
+out_noput:
+	return err;
+}
+
+static int verify_policy_dir(__u8 dir)
+{
+	switch (dir) {
+	case XFRM_POLICY_IN:
+	case XFRM_POLICY_OUT:
+	case XFRM_POLICY_FWD:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	return 0;
+}
+
+static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
+{
+	switch (p->share) {
+	case XFRM_SHARE_ANY:
+	case XFRM_SHARE_SESSION:
+	case XFRM_SHARE_USER:
+	case XFRM_SHARE_UNIQUE:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	switch (p->action) {
+	case XFRM_POLICY_ALLOW:
+	case XFRM_POLICY_BLOCK:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	switch (p->sel.family) {
+	case AF_INET:
+		break;
+
+	case AF_INET6:
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		break;
+#else
+		return  -EAFNOSUPPORT;
+#endif
+
+	default:
+		return -EINVAL;
+	};
+
+	return verify_policy_dir(p->dir);
+}
+
+static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
+			   int nr)
+{
+	int i;
+
+	xp->xfrm_nr = nr;
+	for (i = 0; i < nr; i++, ut++) {
+		struct xfrm_tmpl *t = &xp->xfrm_vec[i];
+
+		memcpy(&t->id, &ut->id, sizeof(struct xfrm_id));
+		memcpy(&t->saddr, &ut->saddr,
+		       sizeof(xfrm_address_t));
+		t->reqid = ut->reqid;
+		t->mode = ut->mode;
+		t->share = ut->share;
+		t->optional = ut->optional;
+		t->aalgos = ut->aalgos;
+		t->ealgos = ut->ealgos;
+		t->calgos = ut->calgos;
+	}
+}
+
+static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_TMPL-1];
+	struct xfrm_user_tmpl *utmpl;
+	int nr;
+
+	if (!rt) {
+		pol->xfrm_nr = 0;
+	} else {
+		nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
+
+		if (nr > XFRM_MAX_DEPTH)
+			return -EINVAL;
+
+		copy_templates(pol, RTA_DATA(rt), nr);
+	}
+	return 0;
+}
+
+static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p)
+{
+	xp->priority = p->priority;
+	xp->index = p->index;
+	memcpy(&xp->selector, &p->sel, sizeof(xp->selector));
+	memcpy(&xp->lft, &p->lft, sizeof(xp->lft));
+	xp->action = p->action;
+	xp->flags = p->flags;
+	xp->family = p->sel.family;
+	/* XXX xp->share = p->share; */
+}
+
+static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir)
+{
+	memcpy(&p->sel, &xp->selector, sizeof(p->sel));
+	memcpy(&p->lft, &xp->lft, sizeof(p->lft));
+	memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft));
+	p->priority = xp->priority;
+	p->index = xp->index;
+	p->sel.family = xp->family;
+	p->dir = dir;
+	p->action = xp->action;
+	p->flags = xp->flags;
+	p->share = XFRM_SHARE_ANY; /* XXX xp->share */
+}
+
+static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp)
+{
+	struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL);
+	int err;
+
+	if (!xp) {
+		*errp = -ENOMEM;
+		return NULL;
+	}
+
+	copy_from_user_policy(xp, p);
+	err = copy_from_user_tmpl(xp, xfrma);
+	if (err) {
+		*errp = err;
+		kfree(xp);
+		xp = NULL;
+	}
+
+	return xp;
+}
+
+static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh);
+	struct xfrm_policy *xp;
+	int err;
+	int excl;
+
+	err = verify_newpolicy_info(p);
+	if (err)
+		return err;
+
+	xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err);
+	if (!xp)
+		return err;
+
+	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
+	err = xfrm_policy_insert(p->dir, xp, excl);
+	if (err) {
+		kfree(xp);
+		return err;
+	}
+
+	xfrm_pol_put(xp);
+
+	return 0;
+}
+
+static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	struct xfrm_user_tmpl vec[XFRM_MAX_DEPTH];
+	int i;
+
+	if (xp->xfrm_nr == 0)
+		return 0;
+
+	for (i = 0; i < xp->xfrm_nr; i++) {
+		struct xfrm_user_tmpl *up = &vec[i];
+		struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
+
+		memcpy(&up->id, &kp->id, sizeof(up->id));
+		up->family = xp->family;
+		memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr));
+		up->reqid = kp->reqid;
+		up->mode = kp->mode;
+		up->share = kp->share;
+		up->optional = kp->optional;
+		up->aalgos = kp->aalgos;
+		up->ealgos = kp->ealgos;
+		up->calgos = kp->calgos;
+	}
+	RTA_PUT(skb, XFRMA_TMPL,
+		(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr),
+		vec);
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
+{
+	struct xfrm_dump_info *sp = ptr;
+	struct xfrm_userpolicy_info *p;
+	struct sk_buff *in_skb = sp->in_skb;
+	struct sk_buff *skb = sp->out_skb;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	if (sp->this_idx < sp->start_idx)
+		goto out;
+
+	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
+			sp->nlmsg_seq,
+			XFRM_MSG_NEWPOLICY, sizeof(*p));
+	p = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	copy_to_user_policy(xp, p, dir);
+	if (copy_to_user_tmpl(xp, skb) < 0)
+		goto nlmsg_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+out:
+	sp->this_idx++;
+	return 0;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct xfrm_dump_info info;
+
+	info.in_skb = cb->skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
+	info.this_idx = 0;
+	info.start_idx = cb->args[0];
+	(void) xfrm_policy_walk(dump_one_policy, &info);
+	cb->args[0] = info.this_idx;
+
+	return skb->len;
+}
+
+static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
+					  struct xfrm_policy *xp,
+					  int dir, u32 seq)
+{
+	struct xfrm_dump_info info;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+	info.in_skb = in_skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = seq;
+	info.this_idx = info.start_idx = 0;
+
+	if (dump_one_policy(xp, dir, 0, &info) < 0) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	return skb;
+}
+
+static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_policy *xp;
+	struct xfrm_userpolicy_id *p;
+	int err;
+	int delete;
+
+	p = NLMSG_DATA(nlh);
+	delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
+
+	err = verify_policy_dir(p->dir);
+	if (err)
+		return err;
+
+	if (p->index)
+		xp = xfrm_policy_byid(p->dir, p->index, delete);
+	else
+		xp = xfrm_policy_bysel(p->dir, &p->sel, delete);
+	if (xp == NULL)
+		return -ENOENT;
+
+	if (!delete) {
+		struct sk_buff *resp_skb;
+
+		resp_skb = xfrm_policy_netlink(skb, xp, p->dir, nlh->nlmsg_seq);
+		if (IS_ERR(resp_skb)) {
+			err = PTR_ERR(resp_skb);
+		} else {
+			err = netlink_unicast(xfrm_nl, resp_skb,
+					      NETLINK_CB(skb).pid,
+					      MSG_DONTWAIT);
+		}
+	}
+
+	xfrm_pol_put(xp);
+
+	return err;
+}
+
+static const int xfrm_msg_min[(XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)] = {
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* NEW SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* DEL SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* GET SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* NEW POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* DEL POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* GET POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userspi_info)),	/* ALLOC SPI */
+	NLMSG_LENGTH(sizeof(struct xfrm_user_acquire)),	/* ACQUIRE */
+	NLMSG_LENGTH(sizeof(struct xfrm_user_expire)),	/* EXPIRE */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* UPD POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* UPD SA */
+};
+
+static struct xfrm_link {
+	int (*doit)(struct sk_buff *, struct nlmsghdr *, void **);
+	int (*dump)(struct sk_buff *, struct netlink_callback *);
+} xfrm_dispatch[] = {
+	{	.doit	=	xfrm_add_sa, 		},
+	{	.doit	=	xfrm_del_sa, 		},
+	{
+		.doit	=	xfrm_get_sa,
+		.dump	=	xfrm_dump_sa,
+	},
+	{	.doit	=	xfrm_add_policy 	},
+	{	.doit	=	xfrm_get_policy 	},
+	{
+		.doit	=	xfrm_get_policy,
+		.dump	=	xfrm_dump_policy,
+	},
+	{	.doit	=	xfrm_alloc_userspi	},
+	{},
+	{},
+	{	.doit	=	xfrm_add_policy 	},
+	{	.doit	=	xfrm_add_sa, 		},
+};
+
+static int xfrm_done(struct netlink_callback *cb)
+{
+	return 0;
+}
+
+static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+{
+	struct rtattr *xfrma[XFRMA_MAX];
+	struct xfrm_link *link;
+	int type, min_len;
+
+	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
+		return 0;
+
+	type = nlh->nlmsg_type;
+
+	/* A control message: ignore them */
+	if (type < XFRM_MSG_BASE)
+		return 0;
+
+	/* Unknown message: reply with EINVAL */
+	if (type > XFRM_MSG_MAX)
+		goto err_einval;
+
+	type -= XFRM_MSG_BASE;
+	link = &xfrm_dispatch[type];
+
+	/* All operations require privileges, even GET */
+	if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) {
+		*errp = -EPERM;
+		return -1;
+	}
+
+	if ((type == 2 || type == 5) && (nlh->nlmsg_flags & NLM_F_DUMP)) {
+		u32 rlen;
+
+		if (link->dump == NULL)
+			goto err_einval;
+
+		if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
+						link->dump,
+						xfrm_done)) != 0) {
+			return -1;
+		}
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return -1;
+	}
+
+	memset(xfrma, 0, sizeof(xfrma));
+
+	if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type]))
+		goto err_einval;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned short flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > XFRMA_MAX)
+					goto err_einval;
+				xfrma[flavor - 1] = attr;
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+
+	if (link->doit == NULL)
+		goto err_einval;
+	*errp = link->doit(skb, nlh, (void **) &xfrma);
+
+	return *errp;
+
+err_einval:
+	*errp = -EINVAL;
+	return -1;
+}
+
+static int xfrm_user_rcv_skb(struct sk_buff *skb)
+{
+	int err;
+	struct nlmsghdr *nlh;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		u32 rlen;
+
+		nlh = (struct nlmsghdr *) skb->data;
+		if (nlh->nlmsg_len < sizeof(*nlh) ||
+		    skb->len < nlh->nlmsg_len)
+			return 0;
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		if (xfrm_user_rcv_msg(skb, nlh, &err) < 0) {
+			if (err == 0)
+				return -1;
+			netlink_ack(skb, nlh, err);
+		} else if (nlh->nlmsg_flags & NLM_F_ACK)
+			netlink_ack(skb, nlh, 0);
+		skb_pull(skb, rlen);
+	}
+
+	return 0;
+}
+
+static void xfrm_netlink_rcv(struct sock *sk, int len)
+{
+	do {
+		struct sk_buff *skb;
+
+		down(&xfrm_cfg_sem);
+
+		while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
+			if (xfrm_user_rcv_skb(skb)) {
+				if (skb->len)
+					skb_queue_head(&sk->receive_queue, skb);
+				else
+					kfree_skb(skb);
+				break;
+			}
+			kfree_skb(skb);
+		}
+
+		up(&xfrm_cfg_sem);
+
+	} while (xfrm_nl && xfrm_nl->receive_queue.qlen);
+}
+
+static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard)
+{
+	struct xfrm_user_expire *ue;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_EXPIRE,
+			sizeof(*ue));
+	ue = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	copy_to_user_state(x, &ue->state);
+	ue->hard = (hard != 0) ? 1 : 0;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_state_notify(struct xfrm_state *x, int hard)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_expire(skb, x, hard) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
+
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
+}
+
+static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
+			 struct xfrm_tmpl *xt, struct xfrm_policy *xp,
+			 int dir)
+{
+	struct xfrm_user_acquire *ua;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+	__u32 seq = xfrm_get_acqseq();
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE,
+			sizeof(*ua));
+	ua = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	memcpy(&ua->id, &x->id, sizeof(ua->id));
+	memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr));
+	memcpy(&ua->sel, &x->sel, sizeof(ua->sel));
+	copy_to_user_policy(xp, &ua->policy, dir);
+	ua->aalgos = xt->aalgos;
+	ua->ealgos = xt->ealgos;
+	ua->calgos = xt->calgos;
+	ua->seq = x->km.seq = seq;
+
+	if (copy_to_user_tmpl(xp, skb) < 0)
+		goto nlmsg_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
+			     struct xfrm_policy *xp, int dir)
+{
+	struct sk_buff *skb;
+	size_t len;
+
+	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
+	len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire));
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_acquire(skb, x, xt, xp, dir) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE;
+
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC);
+}
+
+/* User gives us xfrm_user_policy_info followed by an array of 0
+ * or more templates.
+ */
+struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
+                                        u8 *data, int len, int *dir)
+{
+	struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data;
+	struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1);
+	struct xfrm_policy *xp;
+	int nr;
+
+	switch (family) {
+	case AF_INET:
+		if (opt != IP_XFRM_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		if (opt != IPV6_XFRM_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#endif
+	default:
+		*dir = -EINVAL;
+		return NULL;
+	}
+
+	*dir = -EINVAL;
+
+	if (len < sizeof(*p) ||
+	    verify_newpolicy_info(p))
+		return NULL;
+
+	nr = ((len - sizeof(*p)) / sizeof(*ut));
+	if (nr > XFRM_MAX_DEPTH)
+		return NULL;
+
+	xp = xfrm_policy_alloc(GFP_KERNEL);
+	if (xp == NULL) {
+		*dir = -ENOBUFS;
+		return NULL;
+	}
+
+	copy_from_user_policy(xp, p);
+	copy_templates(xp, ut, nr);
+
+	*dir = p->dir;
+
+	return xp;
+}
+
+static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
+			   int dir, int hard)
+{
+	struct xfrm_user_polexpire *upe;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe));
+	upe = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	copy_to_user_policy(xp, &upe->pol, dir);
+	if (copy_to_user_tmpl(xp, skb) < 0)
+		goto nlmsg_failure;
+	upe->hard = !!hard;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
+{
+	struct sk_buff *skb;
+	size_t len;
+
+	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
+	len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire));
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_polexpire(skb, xp, dir, hard) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
+
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
+}
+
+static struct xfrm_mgr netlink_mgr = {
+	.id		= "netlink",
+	.notify		= xfrm_send_state_notify,
+	.acquire	= xfrm_send_acquire,
+	.compile_policy	= xfrm_compile_policy,
+	.notify_policy	= xfrm_send_policy_notify,
+};
+
+static int __init xfrm_user_init(void)
+{
+	printk(KERN_INFO "Initializing IPsec netlink socket\n");
+
+	xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv);
+	if (xfrm_nl == NULL)
+		panic("xfrm_user_init: cannot initialize xfrm_nl\n");
+
+
+	xfrm_register_km(&netlink_mgr);
+
+	return 0;
+}
+
+static void __exit xfrm_user_exit(void)
+{
+	xfrm_unregister_km(&netlink_mgr);
+	sock_release(xfrm_nl->socket);
+}
+
+module_init(xfrm_user_init);
+module_exit(xfrm_user_exit);
+MODULE_LICENSE("GPL");
Index: scripts/tkgen.c
===================================================================
RCS file: /home/gondolin/herbert/src/CVS/debian/kernel-source-2.4/scripts/tkgen.c,v
retrieving revision 1.1.1.15
retrieving revision 1.1.1.15.2.1
diff -u -r1.1.1.15 -r1.1.1.15.2.1
--- a/scripts/tkgen.c	3 Aug 2002 00:39:46 -0000	1.1.1.15
+++ b/scripts/tkgen.c	16 Apr 2004 13:16:27 -0000	1.1.1.15.2.1
@@ -546,7 +546,7 @@
 	    printf( "set %s [expr $%s&15]",
 		vartable[cfg->nameindex].name, vartable[cfg->nameindex].name );
 	    printf( "} else {");
-	    printf( "set %s [expr $%s|16]}\n",
+	    printf( "set %s [expr $%s]}\n",
 		vartable[cfg->nameindex].name, vartable[cfg->nameindex].name );
 	    break;
 
@@ -612,7 +612,7 @@
 	/*
 	 * Clear the disable bit to enable the correct radiobutton.
 	 */
-	    printf( "set %s [expr $%s|16]}\n",
+	    printf( "set %s [expr $%s]}\n",
 		vartable[cfg->nameindex].name, vartable[cfg->nameindex].name );
 	    break;