commit 
11052589cf5c0bab3b4884d423d5f60c38fcf25d upstream.
Commit 
e21145a9871a ("ipv4: namespacify ip_early_demux sysctl knob") made
it possible to enable/disable early_demux on a per-netns basis.  Then, we
introduced two knobs, tcp_early_demux and udp_early_demux, to switch it for
TCP/UDP in commit 
dddb64bcb346 ("net: Add sysctl to toggle early demux for
tcp and udp").  However, the .proc_handler() was wrong and actually
disabled us from changing the behaviour in each netns.
We can execute early_demux if net.ipv4.ip_early_demux is on and each proto
.early_demux() handler is not NULL.  When we toggle (tcp|udp)_early_demux,
the change itself is saved in each netns variable, but the .early_demux()
handler is a global variable, so the handler is switched based on the
init_net's sysctl variable.  Thus, netns (tcp|udp)_early_demux knobs have
nothing to do with the logic.  Whether we CAN execute proto .early_demux()
is always decided by init_net's sysctl knob, and whether we DO it or not is
by each netns ip_early_demux knob.
This patch namespacifies (tcp|udp)_early_demux again.  For now, the users
of the .early_demux() handler are TCP and UDP only, and they are called
directly to avoid retpoline.  So, we can remove the .early_demux() handler
from inet6?_protos and need not dereference them in ip6?_rcv_finish_core().
If another proto needs .early_demux(), we can restore it at that time.
Fixes: dddb64bcb346 ("net: Add sysctl to toggle early demux for tcp and udp")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20220713175207.7727-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 
 /* This is used to register protocols. */
 struct net_protocol {
-       int                     (*early_demux)(struct sk_buff *skb);
-       int                     (*early_demux_handler)(struct sk_buff *skb);
        int                     (*handler)(struct sk_buff *skb);
 
        /* This returns an error if we weren't able to handle the error. */
 
 #if IS_ENABLED(CONFIG_IPV6)
 struct inet6_protocol {
-       void    (*early_demux)(struct sk_buff *skb);
-       void    (*early_demux_handler)(struct sk_buff *skb);
        int     (*handler)(struct sk_buff *skb);
 
        /* This returns an error if we weren't able to handle the error. */
 
 
 INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
 INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
+void tcp_v6_early_demux(struct sk_buff *skb);
 
 #endif
 
 
 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
                                                           struct sk_buff *));
 INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
-INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
+void udp_v6_early_demux(struct sk_buff *skb);
 INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
 
 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 
 };
 #endif
 
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol tcp_protocol = {
-       .early_demux    =       tcp_v4_early_demux,
-       .early_demux_handler =  tcp_v4_early_demux,
+static const struct net_protocol tcp_protocol = {
        .handler        =       tcp_v4_rcv,
        .err_handler    =       tcp_v4_err,
        .no_policy      =       1,
        .icmp_strict_tag_validation = 1,
 };
 
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol udp_protocol = {
-       .early_demux =  udp_v4_early_demux,
-       .early_demux_handler =  udp_v4_early_demux,
+static const struct net_protocol udp_protocol = {
        .handler =      udp_rcv,
        .err_handler =  udp_err,
        .no_policy =    1,
 
               ip_hdr(hint)->tos == iph->tos;
 }
 
-INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
+int tcp_v4_early_demux(struct sk_buff *skb);
+int udp_v4_early_demux(struct sk_buff *skb);
 static int ip_rcv_finish_core(struct net *net, struct sock *sk,
                              struct sk_buff *skb, struct net_device *dev,
                              const struct sk_buff *hint)
 {
        const struct iphdr *iph = ip_hdr(skb);
-       int (*edemux)(struct sk_buff *skb);
        int err, drop_reason;
        struct rtable *rt;
 
                        goto drop_error;
        }
 
-       if (net->ipv4.sysctl_ip_early_demux &&
+       if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
            !skb_dst(skb) &&
            !skb->sk &&
            !ip_is_fragment(iph)) {
-               const struct net_protocol *ipprot;
-               int protocol = iph->protocol;
-
-               ipprot = rcu_dereference(inet_protos[protocol]);
-               if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
-                       err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
-                                             udp_v4_early_demux, skb);
-                       if (unlikely(err))
-                               goto drop_error;
-                       /* must reload iph, skb->head might have changed */
-                       iph = ip_hdr(skb);
+               switch (iph->protocol) {
+               case IPPROTO_TCP:
+                       if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
+                               tcp_v4_early_demux(skb);
+
+                               /* must reload iph, skb->head might have changed */
+                               iph = ip_hdr(skb);
+                       }
+                       break;
+               case IPPROTO_UDP:
+                       if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
+                               err = udp_v4_early_demux(skb);
+                               if (unlikely(err))
+                                       goto drop_error;
+
+                               /* must reload iph, skb->head might have changed */
+                               iph = ip_hdr(skb);
+                       }
+                       break;
                }
        }
 
 
        return ret;
 }
 
-static void proc_configure_early_demux(int enabled, int protocol)
-{
-       struct net_protocol *ipprot;
-#if IS_ENABLED(CONFIG_IPV6)
-       struct inet6_protocol *ip6prot;
-#endif
-
-       rcu_read_lock();
-
-       ipprot = rcu_dereference(inet_protos[protocol]);
-       if (ipprot)
-               ipprot->early_demux = enabled ? ipprot->early_demux_handler :
-                                               NULL;
-
-#if IS_ENABLED(CONFIG_IPV6)
-       ip6prot = rcu_dereference(inet6_protos[protocol]);
-       if (ip6prot)
-               ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
-                                                NULL;
-#endif
-       rcu_read_unlock();
-}
-
-static int proc_tcp_early_demux(struct ctl_table *table, int write,
-                               void *buffer, size_t *lenp, loff_t *ppos)
-{
-       int ret = 0;
-
-       ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
-       if (write && !ret) {
-               int enabled = init_net.ipv4.sysctl_tcp_early_demux;
-
-               proc_configure_early_demux(enabled, IPPROTO_TCP);
-       }
-
-       return ret;
-}
-
-static int proc_udp_early_demux(struct ctl_table *table, int write,
-                               void *buffer, size_t *lenp, loff_t *ppos)
-{
-       int ret = 0;
-
-       ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
-       if (write && !ret) {
-               int enabled = init_net.ipv4.sysctl_udp_early_demux;
-
-               proc_configure_early_demux(enabled, IPPROTO_UDP);
-       }
-
-       return ret;
-}
-
 static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
                                             int write, void *buffer,
                                             size_t *lenp, loff_t *ppos)
                .data           = &init_net.ipv4.sysctl_udp_early_demux,
                .maxlen         = sizeof(u8),
                .mode           = 0644,
-               .proc_handler   = proc_udp_early_demux
+               .proc_handler   = proc_dou8vec_minmax,
        },
        {
                .procname       = "tcp_early_demux",
                .data           = &init_net.ipv4.sysctl_tcp_early_demux,
                .maxlen         = sizeof(u8),
                .mode           = 0644,
-               .proc_handler   = proc_tcp_early_demux
+               .proc_handler   = proc_dou8vec_minmax,
        },
        {
                .procname       = "nexthop_compat_mode",
 
 #include <net/inet_ecn.h>
 #include <net/dst_metadata.h>
 
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
 static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
                                struct sk_buff *skb)
 {
-       void (*edemux)(struct sk_buff *skb);
-
-       if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
-               const struct inet6_protocol *ipprot;
-
-               ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
-               if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
-                       INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
-                                       udp_v6_early_demux, skb);
+       if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
+           !skb_dst(skb) && !skb->sk) {
+               switch (ipv6_hdr(skb)->nexthdr) {
+               case IPPROTO_TCP:
+                       if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
+                               tcp_v6_early_demux(skb);
+                       break;
+               case IPPROTO_UDP:
+                       if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
+                               udp_v6_early_demux(skb);
+                       break;
+               }
        }
+
        if (!skb_valid_dst(skb))
                ip6_route_input(skb);
 }
 
        goto discard_it;
 }
 
-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
+void tcp_v6_early_demux(struct sk_buff *skb)
 {
        const struct ipv6hdr *hdr;
        const struct tcphdr *th;
 };
 EXPORT_SYMBOL_GPL(tcpv6_prot);
 
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol tcpv6_protocol = {
-       .early_demux    =       tcp_v6_early_demux,
-       .early_demux_handler =  tcp_v6_early_demux,
+static const struct inet6_protocol tcpv6_protocol = {
        .handler        =       tcp_v6_rcv,
        .err_handler    =       tcp_v6_err,
        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 
        return NULL;
 }
 
-INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
+void udp_v6_early_demux(struct sk_buff *skb)
 {
        struct net *net = dev_net(skb->dev);
        const struct udphdr *uh;
        return ipv6_getsockopt(sk, level, optname, optval, optlen);
 }
 
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol udpv6_protocol = {
-       .early_demux    =       udp_v6_early_demux,
-       .early_demux_handler =  udp_v6_early_demux,
+static const struct inet6_protocol udpv6_protocol = {
        .handler        =       udpv6_rcv,
        .err_handler    =       udpv6_err,
        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,