mm/slub: optimize free fast path code layout
authorVlastimil Babka <vbabka@suse.cz>
Fri, 27 Oct 2023 10:34:18 +0000 (12:34 +0200)
committerVlastimil Babka <vbabka@suse.cz>
Wed, 6 Dec 2023 10:57:22 +0000 (11:57 +0100)
Inspection of kmem_cache_free() disassembly showed we could make the
fast path smaller by providing few more hints to the compiler, and
splitting the memcg_slab_free_hook() into an inline part that only
checks if there's work to do, and an out of line part doing the actual
uncharge.

bloat-o-meter results:
add/remove: 2/0 grow/shrink: 0/3 up/down: 286/-554 (-268)
Function                                     old     new   delta
__memcg_slab_free_hook                         -     270    +270
__pfx___memcg_slab_free_hook                   -      16     +16
kfree                                        828     665    -163
kmem_cache_free                             1116     948    -168
kmem_cache_free_bulk.part                   1701    1478    -223

Checking kmem_cache_free() disassembly now shows the non-fastpath
cases are handled out of line, which should reduce instruction cache
usage.

Acked-by: David Rientjes <rientjes@google.com>
Tested-by: David Rientjes <rientjes@google.com>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
mm/slub.c

index 77d259f3d592f20d400c2d023128109cb695e817..3f8b957571068c83445af31fe29ce316a7a50d3b 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1959,20 +1959,11 @@ void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
        return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
 }
 
-static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
-                                       void **p, int objects)
+static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
+                                  void **p, int objects,
+                                  struct obj_cgroup **objcgs)
 {
-       struct obj_cgroup **objcgs;
-       int i;
-
-       if (!memcg_kmem_online())
-               return;
-
-       objcgs = slab_objcgs(slab);
-       if (!objcgs)
-               return;
-
-       for (i = 0; i < objects; i++) {
+       for (int i = 0; i < objects; i++) {
                struct obj_cgroup *objcg;
                unsigned int off;
 
@@ -1988,6 +1979,22 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
                obj_cgroup_put(objcg);
        }
 }
+
+static __fastpath_inline
+void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
+                         int objects)
+{
+       struct obj_cgroup **objcgs;
+
+       if (!memcg_kmem_online())
+               return;
+
+       objcgs = slab_objcgs(slab);
+       if (likely(!objcgs))
+               return;
+
+       __memcg_slab_free_hook(s, slab, p, objects, objcgs);
+}
 #else /* CONFIG_MEMCG_KMEM */
 static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
 {
@@ -2047,7 +2054,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
         * The initialization memset's clear the object and the metadata,
         * but don't touch the SLAB redzone.
         */
-       if (init) {
+       if (unlikely(init)) {
                int rsize;
 
                if (!kasan_has_integrated_init())
@@ -2083,7 +2090,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
                next = get_freepointer(s, object);
 
                /* If object's reuse doesn't have to be delayed */
-               if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
+               if (likely(!slab_free_hook(s, object,
+                                          slab_want_init_on_free(s)))) {
                        /* Move object to the new freelist */
                        set_freepointer(s, object, *head);
                        *head = object;
@@ -4282,7 +4290,7 @@ static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
         * With KASAN enabled slab_free_freelist_hook modifies the freelist
         * to remove objects, whose reuse must be delayed.
         */
-       if (slab_free_freelist_hook(s, &head, &tail, &cnt))
+       if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt)))
                do_slab_free(s, slab, head, tail, cnt, addr);
 }