+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2011 STRATO.  All rights reserved.
- */
-
-#include <linux/sched.h>
-#include <linux/pagemap.h>
-#include <linux/writeback.h>
-#include <linux/blkdev.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include "ctree.h"
-#include "volumes.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "dev-replace.h"
-#include "block-group.h"
-
-#undef DEBUG
-
-/*
- * This is the implementation for the generic read ahead framework.
- *
- * To trigger a readahead, btrfs_reada_add must be called. It will start
- * a read ahead for the given range [start, end) on tree root. The returned
- * handle can either be used to wait on the readahead to finish
- * (btrfs_reada_wait), or to send it to the background (btrfs_reada_detach).
- *
- * The read ahead works as follows:
- * On btrfs_reada_add, the root of the tree is inserted into a radix_tree.
- * reada_start_machine will then search for extents to prefetch and trigger
- * some reads. When a read finishes for a node, all contained node/leaf
- * pointers that lie in the given range will also be enqueued. The reads will
- * be triggered in sequential order, thus giving a big win over a naive
- * enumeration. It will also make use of multi-device layouts. Each disk
- * will have its on read pointer and all disks will by utilized in parallel.
- * Also will no two disks read both sides of a mirror simultaneously, as this
- * would waste seeking capacity. Instead both disks will read different parts
- * of the filesystem.
- * Any number of readaheads can be started in parallel. The read order will be
- * determined globally, i.e. 2 parallel readaheads will normally finish faster
- * than the 2 started one after another.
- */
-
-#define MAX_IN_FLIGHT 6
-
-struct reada_extctl {
-       struct list_head        list;
-       struct reada_control    *rc;
-       u64                     generation;
-};
-
-struct reada_extent {
-       u64                     logical;
-       u64                     owner_root;
-       struct btrfs_key        top;
-       struct list_head        extctl;
-       int                     refcnt;
-       spinlock_t              lock;
-       struct reada_zone       *zones[BTRFS_MAX_MIRRORS];
-       int                     nzones;
-       int                     scheduled;
-       int                     level;
-};
-
-struct reada_zone {
-       u64                     start;
-       u64                     end;
-       u64                     elems;
-       struct list_head        list;
-       spinlock_t              lock;
-       int                     locked;
-       struct btrfs_device     *device;
-       struct btrfs_device     *devs[BTRFS_MAX_MIRRORS]; /* full list, incl
-                                                          * self */
-       int                     ndevs;
-       struct kref             refcnt;
-};
-
-struct reada_machine_work {
-       struct btrfs_work       work;
-       struct btrfs_fs_info    *fs_info;
-};
-
-static void reada_extent_put(struct btrfs_fs_info *, struct reada_extent *);
-static void reada_control_release(struct kref *kref);
-static void reada_zone_release(struct kref *kref);
-static void reada_start_machine(struct btrfs_fs_info *fs_info);
-static void __reada_start_machine(struct btrfs_fs_info *fs_info);
-
-static int reada_add_block(struct reada_control *rc, u64 logical,
-                          struct btrfs_key *top, u64 owner_root,
-                          u64 generation, int level);
-
-/* recurses */
-/* in case of err, eb might be NULL */
-static void __readahead_hook(struct btrfs_fs_info *fs_info,
-                            struct reada_extent *re, struct extent_buffer *eb,
-                            int err)
-{
-       int nritems;
-       int i;
-       u64 bytenr;
-       u64 generation;
-       struct list_head list;
-
-       spin_lock(&re->lock);
-       /*
-        * just take the full list from the extent. afterwards we
-        * don't need the lock anymore
-        */
-       list_replace_init(&re->extctl, &list);
-       re->scheduled = 0;
-       spin_unlock(&re->lock);
-
-       /*
-        * this is the error case, the extent buffer has not been
-        * read correctly. We won't access anything from it and
-        * just cleanup our data structures. Effectively this will
-        * cut the branch below this node from read ahead.
-        */
-       if (err)
-               goto cleanup;
-
-       /*
-        * FIXME: currently we just set nritems to 0 if this is a leaf,
-        * effectively ignoring the content. In a next step we could
-        * trigger more readahead depending from the content, e.g.
-        * fetch the checksums for the extents in the leaf.
-        */
-       if (!btrfs_header_level(eb))
-               goto cleanup;
-
-       nritems = btrfs_header_nritems(eb);
-       generation = btrfs_header_generation(eb);
-       for (i = 0; i < nritems; i++) {
-               struct reada_extctl *rec;
-               u64 n_gen;
-               struct btrfs_key key;
-               struct btrfs_key next_key;
-
-               btrfs_node_key_to_cpu(eb, &key, i);
-               if (i + 1 < nritems)
-                       btrfs_node_key_to_cpu(eb, &next_key, i + 1);
-               else
-                       next_key = re->top;
-               bytenr = btrfs_node_blockptr(eb, i);
-               n_gen = btrfs_node_ptr_generation(eb, i);
-
-               list_for_each_entry(rec, &list, list) {
-                       struct reada_control *rc = rec->rc;
-
-                       /*
-                        * if the generation doesn't match, just ignore this
-                        * extctl. This will probably cut off a branch from
-                        * prefetch. Alternatively one could start a new (sub-)
-                        * prefetch for this branch, starting again from root.
-                        * FIXME: move the generation check out of this loop
-                        */
-#ifdef DEBUG
-                       if (rec->generation != generation) {
-                               btrfs_debug(fs_info,
-                                           "generation mismatch for (%llu,%d,%llu) %llu != %llu",
-                                           key.objectid, key.type, key.offset,
-                                           rec->generation, generation);
-                       }
-#endif
-                       if (rec->generation == generation &&
-                           btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
-                           btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
-                               reada_add_block(rc, bytenr, &next_key,
-                                               btrfs_header_owner(eb), n_gen,
-                                               btrfs_header_level(eb) - 1);
-               }
-       }
-
-cleanup:
-       /*
-        * free extctl records
-        */
-       while (!list_empty(&list)) {
-               struct reada_control *rc;
-               struct reada_extctl *rec;
-
-               rec = list_first_entry(&list, struct reada_extctl, list);
-               list_del(&rec->list);
-               rc = rec->rc;
-               kfree(rec);
-
-               kref_get(&rc->refcnt);
-               if (atomic_dec_and_test(&rc->elems)) {
-                       kref_put(&rc->refcnt, reada_control_release);
-                       wake_up(&rc->wait);
-               }
-               kref_put(&rc->refcnt, reada_control_release);
-
-               reada_extent_put(fs_info, re);  /* one ref for each entry */
-       }
-
-       return;
-}
-
-int btree_readahead_hook(struct extent_buffer *eb, int err)
-{
-       struct btrfs_fs_info *fs_info = eb->fs_info;
-       int ret = 0;
-       struct reada_extent *re;
-
-       /* find extent */
-       spin_lock(&fs_info->reada_lock);
-       re = radix_tree_lookup(&fs_info->reada_tree,
-                              eb->start >> fs_info->sectorsize_bits);
-       if (re)
-               re->refcnt++;
-       spin_unlock(&fs_info->reada_lock);
-       if (!re) {
-               ret = -1;
-               goto start_machine;
-       }
-
-       __readahead_hook(fs_info, re, eb, err);
-       reada_extent_put(fs_info, re);  /* our ref */
-
-start_machine:
-       reada_start_machine(fs_info);
-       return ret;
-}
-
-static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
-                                         struct btrfs_io_context *bioc)
-{
-       struct btrfs_fs_info *fs_info = dev->fs_info;
-       int ret;
-       struct reada_zone *zone;
-       struct btrfs_block_group *cache = NULL;
-       u64 start;
-       u64 end;
-       int i;
-
-       zone = NULL;
-       spin_lock(&fs_info->reada_lock);
-       ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
-                                    logical >> fs_info->sectorsize_bits, 1);
-       if (ret == 1 && logical >= zone->start && logical <= zone->end) {
-               kref_get(&zone->refcnt);
-               spin_unlock(&fs_info->reada_lock);
-               return zone;
-       }
-
-       spin_unlock(&fs_info->reada_lock);
-
-       cache = btrfs_lookup_block_group(fs_info, logical);
-       if (!cache)
-               return NULL;
-
-       start = cache->start;
-       end = start + cache->length - 1;
-       btrfs_put_block_group(cache);
-
-       zone = kzalloc(sizeof(*zone), GFP_KERNEL);
-       if (!zone)
-               return NULL;
-
-       ret = radix_tree_preload(GFP_KERNEL);
-       if (ret) {
-               kfree(zone);
-               return NULL;
-       }
-
-       zone->start = start;
-       zone->end = end;
-       INIT_LIST_HEAD(&zone->list);
-       spin_lock_init(&zone->lock);
-       zone->locked = 0;
-       kref_init(&zone->refcnt);
-       zone->elems = 0;
-       zone->device = dev; /* our device always sits at index 0 */
-       for (i = 0; i < bioc->num_stripes; ++i) {
-               /* bounds have already been checked */
-               zone->devs[i] = bioc->stripes[i].dev;
-       }
-       zone->ndevs = bioc->num_stripes;
-
-       spin_lock(&fs_info->reada_lock);
-       ret = radix_tree_insert(&dev->reada_zones,
-                       (unsigned long)(zone->end >> fs_info->sectorsize_bits),
-                       zone);
-
-       if (ret == -EEXIST) {
-               kfree(zone);
-               ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
-                                       logical >> fs_info->sectorsize_bits, 1);
-               if (ret == 1 && logical >= zone->start && logical <= zone->end)
-                       kref_get(&zone->refcnt);
-               else
-                       zone = NULL;
-       }
-       spin_unlock(&fs_info->reada_lock);
-       radix_tree_preload_end();
-
-       return zone;
-}
-
-static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
-                                             u64 logical,
-                                             struct btrfs_key *top,
-                                             u64 owner_root, int level)
-{
-       int ret;
-       struct reada_extent *re = NULL;
-       struct reada_extent *re_exist = NULL;
-       struct btrfs_io_context *bioc = NULL;
-       struct btrfs_device *dev;
-       struct btrfs_device *prev_dev;
-       u64 length;
-       int real_stripes;
-       int nzones = 0;
-       unsigned long index = logical >> fs_info->sectorsize_bits;
-       int dev_replace_is_ongoing;
-       int have_zone = 0;
-
-       spin_lock(&fs_info->reada_lock);
-       re = radix_tree_lookup(&fs_info->reada_tree, index);
-       if (re)
-               re->refcnt++;
-       spin_unlock(&fs_info->reada_lock);
-
-       if (re)
-               return re;
-
-       re = kzalloc(sizeof(*re), GFP_KERNEL);
-       if (!re)
-               return NULL;
-
-       re->logical = logical;
-       re->top = *top;
-       INIT_LIST_HEAD(&re->extctl);
-       spin_lock_init(&re->lock);
-       re->refcnt = 1;
-       re->owner_root = owner_root;
-       re->level = level;
-
-       /*
-        * map block
-        */
-       length = fs_info->nodesize;
-       ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
-                             &length, &bioc, 0);
-       if (ret || !bioc || length < fs_info->nodesize)
-               goto error;
-
-       if (bioc->num_stripes > BTRFS_MAX_MIRRORS) {
-               btrfs_err(fs_info,
-                          "readahead: more than %d copies not supported",
-                          BTRFS_MAX_MIRRORS);
-               goto error;
-       }
-
-       real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
-       for (nzones = 0; nzones < real_stripes; ++nzones) {
-               struct reada_zone *zone;
-
-               dev = bioc->stripes[nzones].dev;
-
-               /* cannot read ahead on missing device. */
-               if (!dev->bdev)
-                       continue;
-
-               zone = reada_find_zone(dev, logical, bioc);
-               if (!zone)
-                       continue;
-
-               re->zones[re->nzones++] = zone;
-               spin_lock(&zone->lock);
-               if (!zone->elems)
-                       kref_get(&zone->refcnt);
-               ++zone->elems;
-               spin_unlock(&zone->lock);
-               spin_lock(&fs_info->reada_lock);
-               kref_put(&zone->refcnt, reada_zone_release);
-               spin_unlock(&fs_info->reada_lock);
-       }
-       if (re->nzones == 0) {
-               /* not a single zone found, error and out */
-               goto error;
-       }
-
-       /* Insert extent in reada tree + all per-device trees, all or nothing */
-       down_read(&fs_info->dev_replace.rwsem);
-       ret = radix_tree_preload(GFP_KERNEL);
-       if (ret) {
-               up_read(&fs_info->dev_replace.rwsem);
-               goto error;
-       }
-
-       spin_lock(&fs_info->reada_lock);
-       ret = radix_tree_insert(&fs_info->reada_tree, index, re);
-       if (ret == -EEXIST) {
-               re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
-               re_exist->refcnt++;
-               spin_unlock(&fs_info->reada_lock);
-               radix_tree_preload_end();
-               up_read(&fs_info->dev_replace.rwsem);
-               goto error;
-       }
-       if (ret) {
-               spin_unlock(&fs_info->reada_lock);
-               radix_tree_preload_end();
-               up_read(&fs_info->dev_replace.rwsem);
-               goto error;
-       }
-       radix_tree_preload_end();
-       prev_dev = NULL;
-       dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
-                       &fs_info->dev_replace);
-       for (nzones = 0; nzones < re->nzones; ++nzones) {
-               dev = re->zones[nzones]->device;
-
-               if (dev == prev_dev) {
-                       /*
-                        * in case of DUP, just add the first zone. As both
-                        * are on the same device, there's nothing to gain
-                        * from adding both.
-                        * Also, it wouldn't work, as the tree is per device
-                        * and adding would fail with EEXIST
-                        */
-                       continue;
-               }
-               if (!dev->bdev)
-                       continue;
-
-               if (test_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state))
-                       continue;
-
-               if (dev_replace_is_ongoing &&
-                   dev == fs_info->dev_replace.tgtdev) {
-                       /*
-                        * as this device is selected for reading only as
-                        * a last resort, skip it for read ahead.
-                        */
-                       continue;
-               }
-               prev_dev = dev;
-               ret = radix_tree_insert(&dev->reada_extents, index, re);
-               if (ret) {
-                       while (--nzones >= 0) {
-                               dev = re->zones[nzones]->device;
-                               BUG_ON(dev == NULL);
-                               /* ignore whether the entry was inserted */
-                               radix_tree_delete(&dev->reada_extents, index);
-                       }
-                       radix_tree_delete(&fs_info->reada_tree, index);
-                       spin_unlock(&fs_info->reada_lock);
-                       up_read(&fs_info->dev_replace.rwsem);
-                       goto error;
-               }
-               have_zone = 1;
-       }
-       if (!have_zone)
-               radix_tree_delete(&fs_info->reada_tree, index);
-       spin_unlock(&fs_info->reada_lock);
-       up_read(&fs_info->dev_replace.rwsem);
-
-       if (!have_zone)
-               goto error;
-
-       btrfs_put_bioc(bioc);
-       return re;
-
-error:
-       for (nzones = 0; nzones < re->nzones; ++nzones) {
-               struct reada_zone *zone;
-
-               zone = re->zones[nzones];
-               kref_get(&zone->refcnt);
-               spin_lock(&zone->lock);
-               --zone->elems;
-               if (zone->elems == 0) {
-                       /*
-                        * no fs_info->reada_lock needed, as this can't be
-                        * the last ref
-                        */
-                       kref_put(&zone->refcnt, reada_zone_release);
-               }
-               spin_unlock(&zone->lock);
-
-               spin_lock(&fs_info->reada_lock);
-               kref_put(&zone->refcnt, reada_zone_release);
-               spin_unlock(&fs_info->reada_lock);
-       }
-       btrfs_put_bioc(bioc);
-       kfree(re);
-       return re_exist;
-}
-
-static void reada_extent_put(struct btrfs_fs_info *fs_info,
-                            struct reada_extent *re)
-{
-       int i;
-       unsigned long index = re->logical >> fs_info->sectorsize_bits;
-
-       spin_lock(&fs_info->reada_lock);
-       if (--re->refcnt) {
-               spin_unlock(&fs_info->reada_lock);
-               return;
-       }
-
-       radix_tree_delete(&fs_info->reada_tree, index);
-       for (i = 0; i < re->nzones; ++i) {
-               struct reada_zone *zone = re->zones[i];
-
-               radix_tree_delete(&zone->device->reada_extents, index);
-       }
-
-       spin_unlock(&fs_info->reada_lock);
-
-       for (i = 0; i < re->nzones; ++i) {
-               struct reada_zone *zone = re->zones[i];
-
-               kref_get(&zone->refcnt);
-               spin_lock(&zone->lock);
-               --zone->elems;
-               if (zone->elems == 0) {
-                       /* no fs_info->reada_lock needed, as this can't be
-                        * the last ref */
-                       kref_put(&zone->refcnt, reada_zone_release);
-               }
-               spin_unlock(&zone->lock);
-
-               spin_lock(&fs_info->reada_lock);
-               kref_put(&zone->refcnt, reada_zone_release);
-               spin_unlock(&fs_info->reada_lock);
-       }
-
-       kfree(re);
-}
-
-static void reada_zone_release(struct kref *kref)
-{
-       struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
-       struct btrfs_fs_info *fs_info = zone->device->fs_info;
-
-       lockdep_assert_held(&fs_info->reada_lock);
-
-       radix_tree_delete(&zone->device->reada_zones,
-                         zone->end >> fs_info->sectorsize_bits);
-
-       kfree(zone);
-}
-
-static void reada_control_release(struct kref *kref)
-{
-       struct reada_control *rc = container_of(kref, struct reada_control,
-                                               refcnt);
-
-       kfree(rc);
-}
-
-static int reada_add_block(struct reada_control *rc, u64 logical,
-                          struct btrfs_key *top, u64 owner_root,
-                          u64 generation, int level)
-{
-       struct btrfs_fs_info *fs_info = rc->fs_info;
-       struct reada_extent *re;
-       struct reada_extctl *rec;
-
-       /* takes one ref */
-       re = reada_find_extent(fs_info, logical, top, owner_root, level);
-       if (!re)
-               return -1;
-
-       rec = kzalloc(sizeof(*rec), GFP_KERNEL);
-       if (!rec) {
-               reada_extent_put(fs_info, re);
-               return -ENOMEM;
-       }
-
-       rec->rc = rc;
-       rec->generation = generation;
-       atomic_inc(&rc->elems);
-
-       spin_lock(&re->lock);
-       list_add_tail(&rec->list, &re->extctl);
-       spin_unlock(&re->lock);
-
-       /* leave the ref on the extent */
-
-       return 0;
-}
-
-/*
- * called with fs_info->reada_lock held
- */
-static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock)
-{
-       int i;
-       unsigned long index = zone->end >> zone->device->fs_info->sectorsize_bits;
-
-       for (i = 0; i < zone->ndevs; ++i) {
-               struct reada_zone *peer;
-               peer = radix_tree_lookup(&zone->devs[i]->reada_zones, index);
-               if (peer && peer->device != zone->device)
-                       peer->locked = lock;
-       }
-}
-
-/*
- * called with fs_info->reada_lock held
- */
-static int reada_pick_zone(struct btrfs_device *dev)
-{
-       struct reada_zone *top_zone = NULL;
-       struct reada_zone *top_locked_zone = NULL;
-       u64 top_elems = 0;
-       u64 top_locked_elems = 0;
-       unsigned long index = 0;
-       int ret;
-
-       if (dev->reada_curr_zone) {
-               reada_peer_zones_set_lock(dev->reada_curr_zone, 0);
-               kref_put(&dev->reada_curr_zone->refcnt, reada_zone_release);
-               dev->reada_curr_zone = NULL;
-       }
-       /* pick the zone with the most elements */
-       while (1) {
-               struct reada_zone *zone;
-
-               ret = radix_tree_gang_lookup(&dev->reada_zones,
-                                            (void **)&zone, index, 1);
-               if (ret == 0)
-                       break;
-               index = (zone->end >> dev->fs_info->sectorsize_bits) + 1;
-               if (zone->locked) {
-                       if (zone->elems > top_locked_elems) {
-                               top_locked_elems = zone->elems;
-                               top_locked_zone = zone;
-                       }
-               } else {
-                       if (zone->elems > top_elems) {
-                               top_elems = zone->elems;
-                               top_zone = zone;
-                       }
-               }
-       }
-       if (top_zone)
-               dev->reada_curr_zone = top_zone;
-       else if (top_locked_zone)
-               dev->reada_curr_zone = top_locked_zone;
-       else
-               return 0;
-
-       dev->reada_next = dev->reada_curr_zone->start;
-       kref_get(&dev->reada_curr_zone->refcnt);
-       reada_peer_zones_set_lock(dev->reada_curr_zone, 1);
-
-       return 1;
-}
-
-static int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
-                                   u64 owner_root, int level, int mirror_num,
-                                   struct extent_buffer **eb)
-{
-       struct extent_buffer *buf = NULL;
-       int ret;
-
-       buf = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level);
-       if (IS_ERR(buf))
-               return 0;
-
-       set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
-
-       ret = read_extent_buffer_pages(buf, WAIT_PAGE_LOCK, mirror_num);
-       if (ret) {
-               free_extent_buffer_stale(buf);
-               return ret;
-       }
-
-       if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
-               free_extent_buffer_stale(buf);
-               return -EIO;
-       } else if (extent_buffer_uptodate(buf)) {
-               *eb = buf;
-       } else {
-               free_extent_buffer(buf);
-       }
-       return 0;
-}
-
-static int reada_start_machine_dev(struct btrfs_device *dev)
-{
-       struct btrfs_fs_info *fs_info = dev->fs_info;
-       struct reada_extent *re = NULL;
-       int mirror_num = 0;
-       struct extent_buffer *eb = NULL;
-       u64 logical;
-       int ret;
-       int i;
-
-       spin_lock(&fs_info->reada_lock);
-       if (dev->reada_curr_zone == NULL) {
-               ret = reada_pick_zone(dev);
-               if (!ret) {
-                       spin_unlock(&fs_info->reada_lock);
-                       return 0;
-               }
-       }
-       /*
-        * FIXME currently we issue the reads one extent at a time. If we have
-        * a contiguous block of extents, we could also coagulate them or use
-        * plugging to speed things up
-        */
-       ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
-                               dev->reada_next >> fs_info->sectorsize_bits, 1);
-       if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
-               ret = reada_pick_zone(dev);
-               if (!ret) {
-                       spin_unlock(&fs_info->reada_lock);
-                       return 0;
-               }
-               re = NULL;
-               ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
-                               dev->reada_next >> fs_info->sectorsize_bits, 1);
-       }
-       if (ret == 0) {
-               spin_unlock(&fs_info->reada_lock);
-               return 0;
-       }
-       dev->reada_next = re->logical + fs_info->nodesize;
-       re->refcnt++;
-
-       spin_unlock(&fs_info->reada_lock);
-
-       spin_lock(&re->lock);
-       if (re->scheduled || list_empty(&re->extctl)) {
-               spin_unlock(&re->lock);
-               reada_extent_put(fs_info, re);
-               return 0;
-       }
-       re->scheduled = 1;
-       spin_unlock(&re->lock);
-
-       /*
-        * find mirror num
-        */
-       for (i = 0; i < re->nzones; ++i) {
-               if (re->zones[i]->device == dev) {
-                       mirror_num = i + 1;
-                       break;
-               }
-       }
-       logical = re->logical;
-
-       atomic_inc(&dev->reada_in_flight);
-       ret = reada_tree_block_flagged(fs_info, logical, re->owner_root,
-                                      re->level, mirror_num, &eb);
-       if (ret)
-               __readahead_hook(fs_info, re, NULL, ret);
-       else if (eb)
-               __readahead_hook(fs_info, re, eb, ret);
-
-       if (eb)
-               free_extent_buffer(eb);
-
-       atomic_dec(&dev->reada_in_flight);
-       reada_extent_put(fs_info, re);
-
-       return 1;
-
-}
-
-static void reada_start_machine_worker(struct btrfs_work *work)
-{
-       struct reada_machine_work *rmw;
-       int old_ioprio;
-
-       rmw = container_of(work, struct reada_machine_work, work);
-
-       old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current),
-                                      task_nice_ioprio(current));
-       set_task_ioprio(current, BTRFS_IOPRIO_READA);
-       __reada_start_machine(rmw->fs_info);
-       set_task_ioprio(current, old_ioprio);
-
-       atomic_dec(&rmw->fs_info->reada_works_cnt);
-
-       kfree(rmw);
-}
-
-/* Try to start up to 10k READA requests for a group of devices */
-static int reada_start_for_fsdevs(struct btrfs_fs_devices *fs_devices)
-{
-       u64 enqueued;
-       u64 total = 0;
-       struct btrfs_device *device;
-
-       do {
-               enqueued = 0;
-               list_for_each_entry(device, &fs_devices->devices, dev_list) {
-                       if (atomic_read(&device->reada_in_flight) <
-                           MAX_IN_FLIGHT)
-                               enqueued += reada_start_machine_dev(device);
-               }
-               total += enqueued;
-       } while (enqueued && total < 10000);
-
-       return total;
-}
-
-static void __reada_start_machine(struct btrfs_fs_info *fs_info)
-{
-       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
-       int i;
-       u64 enqueued = 0;
-
-       mutex_lock(&fs_devices->device_list_mutex);
-
-       enqueued += reada_start_for_fsdevs(fs_devices);
-       list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)
-               enqueued += reada_start_for_fsdevs(seed_devs);
-
-       mutex_unlock(&fs_devices->device_list_mutex);
-       if (enqueued == 0)
-               return;
-
-       /*
-        * If everything is already in the cache, this is effectively single
-        * threaded. To a) not hold the caller for too long and b) to utilize
-        * more cores, we broke the loop above after 10000 iterations and now
-        * enqueue to workers to finish it. This will distribute the load to
-        * the cores.
-        */
-       for (i = 0; i < 2; ++i) {
-               reada_start_machine(fs_info);
-               if (atomic_read(&fs_info->reada_works_cnt) >
-                   BTRFS_MAX_MIRRORS * 2)
-                       break;
-       }
-}
-
-static void reada_start_machine(struct btrfs_fs_info *fs_info)
-{
-       struct reada_machine_work *rmw;
-
-       rmw = kzalloc(sizeof(*rmw), GFP_KERNEL);
-       if (!rmw) {
-               /* FIXME we cannot handle this properly right now */
-               BUG();
-       }
-       btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL);
-       rmw->fs_info = fs_info;
-
-       btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
-       atomic_inc(&fs_info->reada_works_cnt);
-}
-
-#ifdef DEBUG
-static void dump_devs(struct btrfs_fs_info *fs_info, int all)
-{
-       struct btrfs_device *device;
-       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
-       unsigned long index;
-       int ret;
-       int i;
-       int j;
-       int cnt;
-
-       spin_lock(&fs_info->reada_lock);
-       list_for_each_entry(device, &fs_devices->devices, dev_list) {
-               btrfs_debug(fs_info, "dev %lld has %d in flight", device->devid,
-                       atomic_read(&device->reada_in_flight));
-               index = 0;
-               while (1) {
-                       struct reada_zone *zone;
-                       ret = radix_tree_gang_lookup(&device->reada_zones,
-                                                    (void **)&zone, index, 1);
-                       if (ret == 0)
-                               break;
-                       pr_debug("  zone %llu-%llu elems %llu locked %d devs",
-                                   zone->start, zone->end, zone->elems,
-                                   zone->locked);
-                       for (j = 0; j < zone->ndevs; ++j) {
-                               pr_cont(" %lld",
-                                       zone->devs[j]->devid);
-                       }
-                       if (device->reada_curr_zone == zone)
-                               pr_cont(" curr off %llu",
-                                       device->reada_next - zone->start);
-                       pr_cont("\n");
-                       index = (zone->end >> fs_info->sectorsize_bits) + 1;
-               }
-               cnt = 0;
-               index = 0;
-               while (all) {
-                       struct reada_extent *re = NULL;
-
-                       ret = radix_tree_gang_lookup(&device->reada_extents,
-                                                    (void **)&re, index, 1);
-                       if (ret == 0)
-                               break;
-                       pr_debug("  re: logical %llu size %u empty %d scheduled %d",
-                               re->logical, fs_info->nodesize,
-                               list_empty(&re->extctl), re->scheduled);
-
-                       for (i = 0; i < re->nzones; ++i) {
-                               pr_cont(" zone %llu-%llu devs",
-                                       re->zones[i]->start,
-                                       re->zones[i]->end);
-                               for (j = 0; j < re->zones[i]->ndevs; ++j) {
-                                       pr_cont(" %lld",
-                                               re->zones[i]->devs[j]->devid);
-                               }
-                       }
-                       pr_cont("\n");
-                       index = (re->logical >> fs_info->sectorsize_bits) + 1;
-                       if (++cnt > 15)
-                               break;
-               }
-       }
-
-       index = 0;
-       cnt = 0;
-       while (all) {
-               struct reada_extent *re = NULL;
-
-               ret = radix_tree_gang_lookup(&fs_info->reada_tree, (void **)&re,
-                                            index, 1);
-               if (ret == 0)
-                       break;
-               if (!re->scheduled) {
-                       index = (re->logical >> fs_info->sectorsize_bits) + 1;
-                       continue;
-               }
-               pr_debug("re: logical %llu size %u list empty %d scheduled %d",
-                       re->logical, fs_info->nodesize,
-                       list_empty(&re->extctl), re->scheduled);
-               for (i = 0; i < re->nzones; ++i) {
-                       pr_cont(" zone %llu-%llu devs",
-                               re->zones[i]->start,
-                               re->zones[i]->end);
-                       for (j = 0; j < re->zones[i]->ndevs; ++j) {
-                               pr_cont(" %lld",
-                                      re->zones[i]->devs[j]->devid);
-                       }
-               }
-               pr_cont("\n");
-               index = (re->logical >> fs_info->sectorsize_bits) + 1;
-       }
-       spin_unlock(&fs_info->reada_lock);
-}
-#endif
-
-/*
- * interface
- */
-struct reada_control *btrfs_reada_add(struct btrfs_root *root,
-                       struct btrfs_key *key_start, struct btrfs_key *key_end)
-{
-       struct reada_control *rc;
-       u64 start;
-       u64 generation;
-       int ret;
-       int level;
-       struct extent_buffer *node;
-       static struct btrfs_key max_key = {
-               .objectid = (u64)-1,
-               .type = (u8)-1,
-               .offset = (u64)-1
-       };
-
-       rc = kzalloc(sizeof(*rc), GFP_KERNEL);
-       if (!rc)
-               return ERR_PTR(-ENOMEM);
-
-       rc->fs_info = root->fs_info;
-       rc->key_start = *key_start;
-       rc->key_end = *key_end;
-       atomic_set(&rc->elems, 0);
-       init_waitqueue_head(&rc->wait);
-       kref_init(&rc->refcnt);
-       kref_get(&rc->refcnt); /* one ref for having elements */
-
-       node = btrfs_root_node(root);
-       start = node->start;
-       generation = btrfs_header_generation(node);
-       level = btrfs_header_level(node);
-       free_extent_buffer(node);
-
-       ret = reada_add_block(rc, start, &max_key, root->root_key.objectid,
-                             generation, level);
-       if (ret) {
-               kfree(rc);
-               return ERR_PTR(ret);
-       }
-
-       reada_start_machine(root->fs_info);
-
-       return rc;
-}
-
-#ifdef DEBUG
-int btrfs_reada_wait(void *handle)
-{
-       struct reada_control *rc = handle;
-       struct btrfs_fs_info *fs_info = rc->fs_info;
-
-       while (atomic_read(&rc->elems)) {
-               if (!atomic_read(&fs_info->reada_works_cnt))
-                       reada_start_machine(fs_info);
-               wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
-                                  5 * HZ);
-               dump_devs(fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
-       }
-
-       dump_devs(fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
-
-       kref_put(&rc->refcnt, reada_control_release);
-
-       return 0;
-}
-#else
-int btrfs_reada_wait(void *handle)
-{
-       struct reada_control *rc = handle;
-       struct btrfs_fs_info *fs_info = rc->fs_info;
-
-       while (atomic_read(&rc->elems)) {
-               if (!atomic_read(&fs_info->reada_works_cnt))
-                       reada_start_machine(fs_info);
-               wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
-                                  (HZ + 9) / 10);
-       }
-
-       kref_put(&rc->refcnt, reada_control_release);
-
-       return 0;
-}
-#endif
-
-void btrfs_reada_detach(void *handle)
-{
-       struct reada_control *rc = handle;
-
-       kref_put(&rc->refcnt, reada_control_release);
-}
-
-/*
- * Before removing a device (device replace or device remove ioctls), call this
- * function to wait for all existing readahead requests on the device and to
- * make sure no one queues more readahead requests for the device.
- *
- * Must be called without holding neither the device list mutex nor the device
- * replace semaphore, otherwise it will deadlock.
- */
-void btrfs_reada_remove_dev(struct btrfs_device *dev)
-{
-       struct btrfs_fs_info *fs_info = dev->fs_info;
-
-       /* Serialize with readahead extent creation at reada_find_extent(). */
-       spin_lock(&fs_info->reada_lock);
-       set_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
-       spin_unlock(&fs_info->reada_lock);
-
-       /*
-        * There might be readahead requests added to the radix trees which
-        * were not yet added to the readahead work queue. We need to start
-        * them and wait for their completion, otherwise we can end up with
-        * use-after-free problems when dropping the last reference on the
-        * readahead extents and their zones, as they need to access the
-        * device structure.
-        */
-       reada_start_machine(fs_info);
-       btrfs_flush_workqueue(fs_info->readahead_workers);
-}
-
-/*
- * If when removing a device (device replace or device remove ioctls) an error
- * happens after calling btrfs_reada_remove_dev(), call this to undo what that
- * function did. This is safe to call even if btrfs_reada_remove_dev() was not
- * called before.
- */
-void btrfs_reada_undo_remove_dev(struct btrfs_device *dev)
-{
-       spin_lock(&dev->fs_info->reada_lock);
-       clear_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
-       spin_unlock(&dev->fs_info->reada_lock);
-}