selftests/bpf: add ipip6 and ip6ip decap to test_tc_tunnel
authorZiyang Xuan <william.xuanziyang@huawei.com>
Fri, 13 Jan 2023 09:25:10 +0000 (17:25 +0800)
committerMartin KaFai Lau <martin.lau@kernel.org>
Sun, 15 Jan 2023 20:56:17 +0000 (12:56 -0800)
Add ipip6 and ip6ip decap testcases. Verify that bpf_skb_adjust_room()
correctly decapsulate ipip6 and ip6ip tunnel packets.

Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/dfd2d8cfdf9111bd129170d4345296f53bee6a67.1673574419.git.william.xuanziyang@huawei.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
tools/testing/selftests/bpf/progs/test_tc_tunnel.c
tools/testing/selftests/bpf/test_tc_tunnel.sh

index a0e7762b1e5ae48610050ef99cf4109797622b3e..e6e678aa98746c1d26de78343ed8a94485d6bb63 100644 (file)
@@ -38,6 +38,10 @@ static const int cfg_udp_src = 20000;
 #define        VXLAN_FLAGS     0x8
 #define        VXLAN_VNI       1
 
+#ifndef NEXTHDR_DEST
+#define NEXTHDR_DEST   60
+#endif
+
 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
                                                     MPLS_LS_S_MASK | 0xff);
@@ -363,6 +367,61 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
        return TC_ACT_OK;
 }
 
+static int encap_ipv6_ipip6(struct __sk_buff *skb)
+{
+       struct iphdr iph_inner;
+       struct v6hdr h_outer;
+       struct tcphdr tcph;
+       struct ethhdr eth;
+       __u64 flags;
+       int olen;
+
+       if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+                              sizeof(iph_inner)) < 0)
+               return TC_ACT_OK;
+
+       /* filter only packets we want */
+       if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
+                              &tcph, sizeof(tcph)) < 0)
+               return TC_ACT_OK;
+
+       if (tcph.dest != __bpf_constant_htons(cfg_port))
+               return TC_ACT_OK;
+
+       olen = sizeof(h_outer.ip);
+
+       flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
+
+       /* add room between mac and network header */
+       if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
+               return TC_ACT_SHOT;
+
+       /* prepare new outer network header */
+       memset(&h_outer.ip, 0, sizeof(h_outer.ip));
+       h_outer.ip.version = 6;
+       h_outer.ip.hop_limit = iph_inner.ttl;
+       h_outer.ip.saddr.s6_addr[1] = 0xfd;
+       h_outer.ip.saddr.s6_addr[15] = 1;
+       h_outer.ip.daddr.s6_addr[1] = 0xfd;
+       h_outer.ip.daddr.s6_addr[15] = 2;
+       h_outer.ip.payload_len = iph_inner.tot_len;
+       h_outer.ip.nexthdr = IPPROTO_IPIP;
+
+       /* store new outer network header */
+       if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
+                               BPF_F_INVALIDATE_HASH) < 0)
+               return TC_ACT_SHOT;
+
+       /* update eth->h_proto */
+       if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
+               return TC_ACT_SHOT;
+       eth.h_proto = bpf_htons(ETH_P_IPV6);
+       if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
+               return TC_ACT_SHOT;
+
+       return TC_ACT_OK;
+}
+
 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
                                      __u16 l2_proto)
 {
@@ -461,6 +520,15 @@ int __encap_ip6tnl_none(struct __sk_buff *skb)
                return TC_ACT_OK;
 }
 
+SEC("encap_ipip6_none")
+int __encap_ipip6_none(struct __sk_buff *skb)
+{
+       if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+               return encap_ipv6_ipip6(skb);
+       else
+               return TC_ACT_OK;
+}
+
 SEC("encap_ip6gre_none")
 int __encap_ip6gre_none(struct __sk_buff *skb)
 {
@@ -528,13 +596,33 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
 
 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 {
+       __u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
+       struct ipv6_opt_hdr ip6_opt_hdr;
        struct gre_hdr greh;
        struct udphdr udph;
        int olen = len;
 
        switch (proto) {
        case IPPROTO_IPIP:
+               flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+               break;
        case IPPROTO_IPV6:
+               flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+               break;
+       case NEXTHDR_DEST:
+               if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
+                                      sizeof(ip6_opt_hdr)) < 0)
+                       return TC_ACT_OK;
+               switch (ip6_opt_hdr.nexthdr) {
+               case IPPROTO_IPIP:
+                       flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+                       break;
+               case IPPROTO_IPV6:
+                       flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+                       break;
+               default:
+                       return TC_ACT_OK;
+               }
                break;
        case IPPROTO_GRE:
                olen += sizeof(struct gre_hdr);
@@ -569,8 +657,7 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
                return TC_ACT_OK;
        }
 
-       if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
-                               BPF_F_ADJ_ROOM_FIXED_GSO))
+       if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
                return TC_ACT_SHOT;
 
        return TC_ACT_OK;
index 334bdfeab9403886d0ccb55ca6e6faeb89900fc5..910044f08908a77febbbb225b6271bde8b6814dd 100755 (executable)
@@ -100,6 +100,9 @@ if [[ "$#" -eq "0" ]]; then
        echo "ipip"
        $0 ipv4 ipip none 100
 
+       echo "ipip6"
+       $0 ipv4 ipip6 none 100
+
        echo "ip6ip6"
        $0 ipv6 ip6tnl none 100
 
@@ -224,6 +227,9 @@ elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
 elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
        ttype="vxlan"
        targs="id 1 dstport 8472 udp6zerocsumrx"
+elif [[ "$tuntype" == "ipip6" ]]; then
+       ttype="ip6tnl"
+       targs=""
 else
        ttype=$tuntype
        targs=""
@@ -233,6 +239,9 @@ fi
 if [[ "${tuntype}" == "sit" ]]; then
        link_addr1="${ns1_v4}"
        link_addr2="${ns2_v4}"
+elif [[ "${tuntype}" == "ipip6" ]]; then
+       link_addr1="${ns1_v6}"
+       link_addr2="${ns2_v6}"
 else
        link_addr1="${addr1}"
        link_addr2="${addr2}"
@@ -287,12 +296,6 @@ else
        server_listen
 fi
 
-# bpf_skb_net_shrink does not take tunnel flags yet, cannot update L3.
-if [[ "${tuntype}" == "sit" ]]; then
-       echo OK
-       exit 0
-fi
-
 # serverside, use BPF for decap
 ip netns exec "${ns2}" ip link del dev testtun0
 ip netns exec "${ns2}" tc qdisc add dev veth2 clsact