struct net_device *dev = dst->dev;
        unsigned int hh_len = LL_RESERVED_SPACE(dev);
        struct neighbour *neigh;
-       u32 nexthop;
+       bool is_v6gw = false;
        int ret = -EINVAL;
 
        nf_reset(skb);
 
        rcu_read_lock_bh();
 
-       nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr);
-       neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
-       if (unlikely(!neigh))
-               neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+       neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
        if (!IS_ERR(neigh)) {
                sock_confirm_neigh(skb, neigh);
-               ret = neigh_output(neigh, skb, false);
+               /* if crossing protocols, can not use the cached header */
+               ret = neigh_output(neigh, skb, is_v6gw);
                rcu_read_unlock_bh();
                return ret;
        }
 
 #include <net/flow.h>
 #include <net/inet_sock.h>
 #include <net/ip_fib.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
 #include <linux/in_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/rcupdate.h>
        return hoplimit;
 }
 
+static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
+                                            __be32 daddr)
+{
+       struct neighbour *neigh;
+
+       neigh = __ipv4_neigh_lookup_noref(dev, daddr);
+       if (unlikely(!neigh))
+               neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
+
+       return neigh;
+}
+
+static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt,
+                                               struct sk_buff *skb,
+                                               bool *is_v6gw)
+{
+       struct net_device *dev = rt->dst.dev;
+       struct neighbour *neigh;
+
+       if (likely(rt->rt_gw_family == AF_INET)) {
+               neigh = ip_neigh_gw4(dev, rt->rt_gw4);
+       } else if (rt->rt_gw_family == AF_INET6) {
+               neigh = ip_neigh_gw6(dev, &rt->rt_gw6);
+               *is_v6gw = true;
+       } else {
+               neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr);
+       }
+       return neigh;
+}
+
 #endif /* _ROUTE_H */
 
        struct net_device *dev = dst->dev;
        unsigned int hh_len = LL_RESERVED_SPACE(dev);
        struct neighbour *neigh;
-       u32 nexthop;
+       bool is_v6gw = false;
 
        if (rt->rt_type == RTN_MULTICAST) {
                IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
        }
 
        rcu_read_lock_bh();
-       nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
-       neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
-       if (unlikely(!neigh))
-               neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+       neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
        if (!IS_ERR(neigh)) {
                int res;
 
                sock_confirm_neigh(skb, neigh);
-               res = neigh_output(neigh, skb, false);
-
+               /* if crossing protocols, can not use the cached header */
+               res = neigh_output(neigh, skb, is_v6gw);
                rcu_read_unlock_bh();
                return res;
        }
 
 {
        const struct rtable *rt = container_of(dst, struct rtable, dst);
        struct net_device *dev = dst->dev;
-       const __be32 *pkey = daddr;
        struct neighbour *n;
 
-       if (rt->rt_gw_family == AF_INET)
-               pkey = (const __be32 *) &rt->rt_gw4;
-       else if (skb)
-               pkey = &ip_hdr(skb)->daddr;
-
-       n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
-       if (n)
-               return n;
-       return neigh_create(&arp_tbl, pkey, dev);
+       rcu_read_lock_bh();
+
+       if (likely(rt->rt_gw_family == AF_INET)) {
+               n = ip_neigh_gw4(dev, rt->rt_gw4);
+       } else if (rt->rt_gw_family == AF_INET6) {
+               n = ip_neigh_gw6(dev, &rt->rt_gw6);
+        } else {
+               __be32 pkey;
+
+               pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
+               n = ip_neigh_gw4(dev, pkey);
+       }
+
+       if (n && !refcount_inc_not_zero(&n->refcnt))
+               n = NULL;
+
+       rcu_read_unlock_bh();
+
+       return n;
 }
 
 static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)