From be500ed721a6ec8d49bf0814c277ce7162acee0e Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Tue, 13 Apr 2021 11:03:45 +0100
Subject: [PATCH] dm space maps: improve performance with inc/dec on ranges of
 blocks

When we break sharing on btree nodes we typically need to increment
the reference counts to every value held in the node.  This can
cause a lot of repeated calls to the space maps.  Fix this by changing
the interface to the space map inc/dec methods to take ranges of
adjacent blocks to be operated on.

For installations that are using a lot of snapshots this will reduce
cpu overhead of fundamental operations such as provisioning a new block,
or deleting a snapshot, by as much as 10 times.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-era-target.c                    |  24 +-
 drivers/md/dm-thin-metadata.c                 |  91 ++--
 drivers/md/persistent-data/dm-array.c         |  52 +-
 .../md/persistent-data/dm-btree-internal.h    |  13 +
 drivers/md/persistent-data/dm-btree-remove.c  |   4 +-
 drivers/md/persistent-data/dm-btree-spine.c   |  16 +-
 drivers/md/persistent-data/dm-btree.c         |  91 +++-
 drivers/md/persistent-data/dm-btree.h         |  10 +-
 .../md/persistent-data/dm-space-map-common.c  | 448 +++++++++++++++++-
 .../md/persistent-data/dm-space-map-common.h  |  18 +-
 .../md/persistent-data/dm-space-map-disk.c    |  74 +--
 .../persistent-data/dm-space-map-metadata.c   |  96 ++--
 drivers/md/persistent-data/dm-space-map.h     |  18 +-
 .../persistent-data/dm-transaction-manager.c  |  52 ++
 .../persistent-data/dm-transaction-manager.h  |  12 +-
 15 files changed, 774 insertions(+), 245 deletions(-)

diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index d9ac7372108c9..3b748393fca5d 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -363,28 +363,32 @@ static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata
 	core->root = le64_to_cpu(disk->root);
 }
 
-static void ws_inc(void *context, const void *value)
+static void ws_inc(void *context, const void *value, unsigned count)
 {
 	struct era_metadata *md = context;
 	struct writeset_disk ws_d;
 	dm_block_t b;
+	unsigned i;
 
-	memcpy(&ws_d, value, sizeof(ws_d));
-	b = le64_to_cpu(ws_d.root);
-
-	dm_tm_inc(md->tm, b);
+	for (i = 0; i < count; i++) {
+		memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d));
+		b = le64_to_cpu(ws_d.root);
+		dm_tm_inc(md->tm, b);
+	}
 }
 
-static void ws_dec(void *context, const void *value)
+static void ws_dec(void *context, const void *value, unsigned count)
 {
 	struct era_metadata *md = context;
 	struct writeset_disk ws_d;
 	dm_block_t b;
+	unsigned i;
 
-	memcpy(&ws_d, value, sizeof(ws_d));
-	b = le64_to_cpu(ws_d.root);
-
-	dm_bitset_del(&md->bitset_info, b);
+	for (i = 0; i < count; i++) {
+		memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d));
+		b = le64_to_cpu(ws_d.root);
+		dm_bitset_del(&md->bitset_info, b);
+	}
 }
 
 static int ws_eq(void *context, const void *value1, const void *value2)
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index e75b20480e460..c88ed14d49e65 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -311,28 +311,53 @@ static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
 	*t = v & ((1 << 24) - 1);
 }
 
-static void data_block_inc(void *context, const void *value_le)
+/*
+ * It's more efficient to call dm_sm_{inc,dec}_blocks as few times as
+ * possible.  'with_runs' reads contiguous runs of blocks, and calls the
+ * given sm function.
+ */
+typedef int (*run_fn)(struct dm_space_map *, dm_block_t, dm_block_t);
+
+static void with_runs(struct dm_space_map *sm, const __le64 *value_le, unsigned count, run_fn fn)
 {
-	struct dm_space_map *sm = context;
-	__le64 v_le;
-	uint64_t b;
+	uint64_t b, begin, end;
 	uint32_t t;
+	bool in_run = false;
+	unsigned i;
 
-	memcpy(&v_le, value_le, sizeof(v_le));
-	unpack_block_time(le64_to_cpu(v_le), &b, &t);
-	dm_sm_inc_block(sm, b);
+	for (i = 0; i < count; i++, value_le++) {
+		/* We know value_le is 8 byte aligned */
+		unpack_block_time(le64_to_cpu(*value_le), &b, &t);
+
+		if (in_run) {
+			if (b == end) {
+				end++;
+			} else {
+				fn(sm, begin, end);
+				begin = b;
+				end = b + 1;
+			}
+		} else {
+			in_run = true;
+			begin = b;
+			end = b + 1;
+		}
+	}
+
+	if (in_run)
+		fn(sm, begin, end);
 }
 
-static void data_block_dec(void *context, const void *value_le)
+static void data_block_inc(void *context, const void *value_le, unsigned count)
 {
-	struct dm_space_map *sm = context;
-	__le64 v_le;
-	uint64_t b;
-	uint32_t t;
+	with_runs((struct dm_space_map *) context,
+		  (const __le64 *) value_le, count, dm_sm_inc_blocks);
+}
 
-	memcpy(&v_le, value_le, sizeof(v_le));
-	unpack_block_time(le64_to_cpu(v_le), &b, &t);
-	dm_sm_dec_block(sm, b);
+static void data_block_dec(void *context, const void *value_le, unsigned count)
+{
+	with_runs((struct dm_space_map *) context,
+		  (const __le64 *) value_le, count, dm_sm_dec_blocks);
 }
 
 static int data_block_equal(void *context, const void *value1_le, const void *value2_le)
@@ -349,27 +374,25 @@ static int data_block_equal(void *context, const void *value1_le, const void *va
 	return b1 == b2;
 }
 
-static void subtree_inc(void *context, const void *value)
+static void subtree_inc(void *context, const void *value, unsigned count)
 {
 	struct dm_btree_info *info = context;
-	__le64 root_le;
-	uint64_t root;
+	const __le64 *root_le = value;
+	unsigned i;
 
-	memcpy(&root_le, value, sizeof(root_le));
-	root = le64_to_cpu(root_le);
-	dm_tm_inc(info->tm, root);
+	for (i = 0; i < count; i++, root_le++)
+		dm_tm_inc(info->tm, le64_to_cpu(*root_le));
 }
 
-static void subtree_dec(void *context, const void *value)
+static void subtree_dec(void *context, const void *value, unsigned count)
 {
 	struct dm_btree_info *info = context;
-	__le64 root_le;
-	uint64_t root;
+	const __le64 *root_le = value;
+	unsigned i;
 
-	memcpy(&root_le, value, sizeof(root_le));
-	root = le64_to_cpu(root_le);
-	if (dm_btree_del(info, root))
-		DMERR("btree delete failed");
+	for (i = 0; i < count; i++, root_le++)
+		if (dm_btree_del(info, le64_to_cpu(*root_le)))
+			DMERR("btree delete failed");
 }
 
 static int subtree_equal(void *context, const void *value1_le, const void *value2_le)
@@ -1761,11 +1784,7 @@ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_
 	int r = 0;
 
 	pmd_write_lock(pmd);
-	for (; b != e; b++) {
-		r = dm_sm_inc_block(pmd->data_sm, b);
-		if (r)
-			break;
-	}
+	r = dm_sm_inc_blocks(pmd->data_sm, b, e);
 	pmd_write_unlock(pmd);
 
 	return r;
@@ -1776,11 +1795,7 @@ int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_
 	int r = 0;
 
 	pmd_write_lock(pmd);
-	for (; b != e; b++) {
-		r = dm_sm_dec_block(pmd->data_sm, b);
-		if (r)
-			break;
-	}
+	r = dm_sm_dec_blocks(pmd->data_sm, b, e);
 	pmd_write_unlock(pmd);
 
 	return r;
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index 185dc60360b55..3a963d783a865 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -108,12 +108,10 @@ static void *element_at(struct dm_array_info *info, struct array_block *ab,
  * in an array block.
  */
 static void on_entries(struct dm_array_info *info, struct array_block *ab,
-		       void (*fn)(void *, const void *))
+		       void (*fn)(void *, const void *, unsigned))
 {
-	unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);
-
-	for (i = 0; i < nr_entries; i++)
-		fn(info->value_type.context, element_at(info, ab, i));
+	unsigned nr_entries = le32_to_cpu(ab->nr_entries);
+	fn(info->value_type.context, element_at(info, ab, 0), nr_entries);
 }
 
 /*
@@ -175,19 +173,18 @@ static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
 static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
 			const void *value, unsigned new_nr)
 {
-	unsigned i;
-	uint32_t nr_entries;
+	uint32_t nr_entries, delta, i;
 	struct dm_btree_value_type *vt = &info->value_type;
 
 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
 	BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
 
 	nr_entries = le32_to_cpu(ab->nr_entries);
-	for (i = nr_entries; i < new_nr; i++) {
-		if (vt->inc)
-			vt->inc(vt->context, value);
+	delta = new_nr - nr_entries;
+	if (vt->inc)
+		vt->inc(vt->context, value, delta);
+	for (i = nr_entries; i < new_nr; i++)
 		memcpy(element_at(info, ab, i), value, vt->size);
-	}
 	ab->nr_entries = cpu_to_le32(new_nr);
 }
 
@@ -199,17 +196,16 @@ static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
 static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
 			unsigned new_nr)
 {
-	unsigned i;
-	uint32_t nr_entries;
+	uint32_t nr_entries, delta;
 	struct dm_btree_value_type *vt = &info->value_type;
 
 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
 	BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
 
 	nr_entries = le32_to_cpu(ab->nr_entries);
-	for (i = nr_entries; i > new_nr; i--)
-		if (vt->dec)
-			vt->dec(vt->context, element_at(info, ab, i - 1));
+	delta = nr_entries - new_nr;
+	if (vt->dec)
+		vt->dec(vt->context, element_at(info, ab, new_nr - 1), delta);
 	ab->nr_entries = cpu_to_le32(new_nr);
 }
 
@@ -573,16 +569,17 @@ static int grow(struct resize *resize)
  * These are the value_type functions for the btree elements, which point
  * to array blocks.
  */
-static void block_inc(void *context, const void *value)
+static void block_inc(void *context, const void *value, unsigned count)
 {
-	__le64 block_le;
+	const __le64 *block_le = value;
 	struct dm_array_info *info = context;
+	unsigned i;
 
-	memcpy(&block_le, value, sizeof(block_le));
-	dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
+	for (i = 0; i < count; i++, block_le++)
+		dm_tm_inc(info->btree_info.tm, le64_to_cpu(*block_le));
 }
 
-static void block_dec(void *context, const void *value)
+static void __block_dec(void *context, const void *value)
 {
 	int r;
 	uint64_t b;
@@ -621,6 +618,13 @@ static void block_dec(void *context, const void *value)
 	dm_tm_dec(info->btree_info.tm, b);
 }
 
+static void block_dec(void *context, const void *value, unsigned count)
+{
+	unsigned i;
+	for (i = 0; i < count; i++, value += sizeof(__le64))
+		__block_dec(context, value);
+}
+
 static int block_equal(void *context, const void *value1, const void *value2)
 {
 	return !memcmp(value1, value2, sizeof(__le64));
@@ -711,7 +715,7 @@ static int populate_ablock_with_values(struct dm_array_info *info, struct array_
 			return r;
 
 		if (vt->inc)
-			vt->inc(vt->context, element_at(info, ab, i));
+			vt->inc(vt->context, element_at(info, ab, i), 1);
 	}
 
 	ab->nr_entries = cpu_to_le32(new_nr);
@@ -822,9 +826,9 @@ static int array_set_value(struct dm_array_info *info, dm_block_t root,
 	old_value = element_at(info, ab, entry);
 	if (vt->dec &&
 	    (!vt->equal || !vt->equal(vt->context, old_value, value))) {
-		vt->dec(vt->context, old_value);
+		vt->dec(vt->context, old_value, 1);
 		if (vt->inc)
-			vt->inc(vt->context, value);
+			vt->inc(vt->context, value, 1);
 	}
 
 	memcpy(old_value, value, info->value_type.size);
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
index b1788853a3552..893edb426dba1 100644
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -144,4 +144,17 @@ extern struct dm_block_validator btree_node_validator;
 extern void init_le64_type(struct dm_transaction_manager *tm,
 			   struct dm_btree_value_type *vt);
 
+/*
+ * This returns a shadowed btree leaf that you may modify.  In practise
+ * this means overwrites only, since an insert could cause a node to
+ * be split.  Useful if you need access to the old value to calculate the
+ * new one.
+ *
+ * This only works with single level btrees.  The given key must be present in
+ * the tree, otherwise -EINVAL will be returned.
+ */
+int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root,
+			     uint64_t key, int *index,
+			     dm_block_t *new_root, struct dm_block **leaf);
+
 #endif	/* DM_BTREE_INTERNAL_H */
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index eff04fa23dfad..b34af195bf2a2 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -544,7 +544,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
 
 		if (info->value_type.dec)
 			info->value_type.dec(info->value_type.context,
-					     value_ptr(n, index));
+					     value_ptr(n, index), 1);
 
 		delete_at(n, index);
 	}
@@ -653,7 +653,7 @@ static int remove_one(struct dm_btree_info *info, dm_block_t root,
 	if (k >= keys[last_level] && k < end_key) {
 		if (info->value_type.dec)
 			info->value_type.dec(info->value_type.context,
-					     value_ptr(n, index));
+					     value_ptr(n, index), 1);
 
 		delete_at(n, index);
 		keys[last_level] = k + 1ull;
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
index 2061ab8655677..f5bd76ed8fe6d 100644
--- a/drivers/md/persistent-data/dm-btree-spine.c
+++ b/drivers/md/persistent-data/dm-btree-spine.c
@@ -236,22 +236,14 @@ dm_block_t shadow_root(struct shadow_spine *s)
 	return s->root;
 }
 
-static void le64_inc(void *context, const void *value_le)
+static void le64_inc(void *context, const void *value_le, unsigned count)
 {
-	struct dm_transaction_manager *tm = context;
-	__le64 v_le;
-
-	memcpy(&v_le, value_le, sizeof(v_le));
-	dm_tm_inc(tm, le64_to_cpu(v_le));
+	dm_tm_with_runs(context, value_le, count, dm_tm_inc_range);
 }
 
-static void le64_dec(void *context, const void *value_le)
+static void le64_dec(void *context, const void *value_le, unsigned count)
 {
-	struct dm_transaction_manager *tm = context;
-	__le64 v_le;
-
-	memcpy(&v_le, value_le, sizeof(v_le));
-	dm_tm_dec(tm, le64_to_cpu(v_le));
+	dm_tm_with_runs(context, value_le, count, dm_tm_dec_range);
 }
 
 static int le64_equal(void *context, const void *value1_le, const void *value2_le)
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 18282932bedce..0703ca7a7d9a4 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -71,15 +71,13 @@ static int upper_bound(struct btree_node *n, uint64_t key)
 void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
 		  struct dm_btree_value_type *vt)
 {
-	unsigned i;
 	uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
 
 	if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
-		for (i = 0; i < nr_entries; i++)
-			dm_tm_inc(tm, value64(n, i));
+		dm_tm_with_runs(tm, value_ptr(n, 0), nr_entries, dm_tm_inc_range);
+
 	else if (vt->inc)
-		for (i = 0; i < nr_entries; i++)
-			vt->inc(vt->context, value_ptr(n, i));
+		vt->inc(vt->context, value_ptr(n, 0), nr_entries);
 }
 
 static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
@@ -318,13 +316,9 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
 				goto out;
 
 		} else {
-			if (info->value_type.dec) {
-				unsigned i;
-
-				for (i = 0; i < f->nr_children; i++)
-					info->value_type.dec(info->value_type.context,
-							     value_ptr(f->n, i));
-			}
+			if (info->value_type.dec)
+				info->value_type.dec(info->value_type.context,
+						     value_ptr(f->n, 0), f->nr_children);
 			pop_frame(s);
 		}
 	}
@@ -1146,6 +1140,77 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
 	return 0;
 }
 
+static int __btree_get_overwrite_leaf(struct shadow_spine *s, dm_block_t root,
+				      uint64_t key, int *index)
+{
+	int r, i = -1;
+	struct btree_node *node;
+
+	*index = 0;
+	for (;;) {
+		r = shadow_step(s, root, &s->info->value_type);
+		if (r < 0)
+			return r;
+
+		node = dm_block_data(shadow_current(s));
+
+		/*
+		 * We have to patch up the parent node, ugly, but I don't
+		 * see a way to do this automatically as part of the spine
+		 * op.
+		 */
+		if (shadow_has_parent(s) && i >= 0) {
+			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+
+			__dm_bless_for_disk(&location);
+			memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i),
+				    &location, sizeof(__le64));
+		}
+
+		node = dm_block_data(shadow_current(s));
+		i = lower_bound(node, key);
+
+		BUG_ON(i < 0);
+		BUG_ON(i >= le32_to_cpu(node->header.nr_entries));
+
+		if (le32_to_cpu(node->header.flags) & LEAF_NODE) {
+			if (key != le64_to_cpu(node->keys[i]))
+				return -EINVAL;
+			break;
+		}
+
+		root = value64(node, i);
+	}
+
+	*index = i;
+	return 0;
+}
+
+int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root,
+			     uint64_t key, int *index,
+			     dm_block_t *new_root, struct dm_block **leaf)
+{
+	int r;
+	struct shadow_spine spine;
+
+	BUG_ON(info->levels > 1);
+	init_shadow_spine(&spine, info);
+	r = __btree_get_overwrite_leaf(&spine, root, key, index);
+	if (!r) {
+		*new_root = shadow_root(&spine);
+		*leaf = shadow_current(&spine);
+
+		/*
+		 * Decrement the count so exit_shadow_spine() doesn't
+		 * unlock the leaf.
+		 */
+		spine.count--;
+	}
+	exit_shadow_spine(&spine);
+
+	return r;
+}
+
 static bool need_insert(struct btree_node *node, uint64_t *keys,
 			unsigned level, unsigned index)
 {
@@ -1222,7 +1287,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
 			     value_ptr(n, index),
 			     value))) {
 			info->value_type.dec(info->value_type.context,
-					     value_ptr(n, index));
+					     value_ptr(n, index), 1);
 		}
 		memcpy_disk(value_ptr(n, index),
 			    value, info->value_type.size);
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index 3dc5bb1a4748b..d2ae5aa4d00b6 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -51,21 +51,21 @@ struct dm_btree_value_type {
 	 */
 
 	/*
-	 * The btree is making a duplicate of the value, for instance
+	 * The btree is making a duplicate of a run of values, for instance
 	 * because previously-shared btree nodes have now diverged.
 	 * @value argument is the new copy that the copy function may modify.
 	 * (Probably it just wants to increment a reference count
 	 * somewhere.) This method is _not_ called for insertion of a new
 	 * value: It is assumed the ref count is already 1.
 	 */
-	void (*inc)(void *context, const void *value);
+	void (*inc)(void *context, const void *value, unsigned count);
 
 	/*
-	 * This value is being deleted.  The btree takes care of freeing
+	 * These values are being deleted.  The btree takes care of freeing
 	 * the memory pointed to by @value.  Often the del function just
-	 * needs to decrement a reference count somewhere.
+	 * needs to decrement a reference counts somewhere.
 	 */
-	void (*dec)(void *context, const void *value);
+	void (*dec)(void *context, const void *value, unsigned count);
 
 	/*
 	 * A test for equality between two values.  When a value is
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index a213bf11738fb..5552941912afd 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -6,6 +6,7 @@
 
 #include "dm-space-map-common.h"
 #include "dm-transaction-manager.h"
+#include "dm-btree-internal.h"
 
 #include <linux/bitops.h>
 #include <linux/device-mapper.h>
@@ -409,12 +410,13 @@ int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
 	return r;
 }
 
-static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
-			int (*mutator)(void *context, uint32_t old, uint32_t *new),
-			void *context, enum allocation_event *ev)
+/*----------------------------------------------------------------*/
+
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
+		 uint32_t ref_count, int32_t *nr_allocations)
 {
 	int r;
-	uint32_t bit, old, ref_count;
+	uint32_t bit, old;
 	struct dm_block *nb;
 	dm_block_t index = b;
 	struct disk_index_entry ie_disk;
@@ -433,10 +435,9 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
 		return r;
 	}
 	ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
-
 	bm_le = dm_bitmap_data(nb);
-	old = sm_lookup_bitmap(bm_le, bit);
 
+	old = sm_lookup_bitmap(bm_le, bit);
 	if (old > 2) {
 		r = sm_ll_lookup_big_ref_count(ll, b, &old);
 		if (r < 0) {
@@ -445,7 +446,6 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
 		}
 	}
 
-	r = mutator(context, old, &ref_count);
 	if (r) {
 		dm_tm_unlock(ll->tm, nb);
 		return r;
@@ -453,7 +453,6 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
 
 	if (ref_count <= 2) {
 		sm_set_bitmap(bm_le, bit, ref_count);
-
 		dm_tm_unlock(ll->tm, nb);
 
 		if (old > 2) {
@@ -480,62 +479,459 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
 	}
 
 	if (ref_count && !old) {
-		*ev = SM_ALLOC;
+		*nr_allocations = 1;
 		ll->nr_allocated++;
 		le32_add_cpu(&ie_disk.nr_free, -1);
 		if (le32_to_cpu(ie_disk.none_free_before) == bit)
 			ie_disk.none_free_before = cpu_to_le32(bit + 1);
 
 	} else if (old && !ref_count) {
-		*ev = SM_FREE;
+		*nr_allocations = -1;
 		ll->nr_allocated--;
 		le32_add_cpu(&ie_disk.nr_free, 1);
 		ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
 	} else
-		*ev = SM_NONE;
+		*nr_allocations = 0;
 
 	return ll->save_ie(ll, index, &ie_disk);
 }
 
-static int set_ref_count(void *context, uint32_t old, uint32_t *new)
+/*----------------------------------------------------------------*/
+
+/*
+ * Holds useful intermediate results for the range based inc and dec
+ * operations.
+ */
+struct inc_context {
+	struct disk_index_entry ie_disk;
+	struct dm_block *bitmap_block;
+	void *bitmap;
+
+	struct dm_block *overflow_leaf;
+};
+
+static inline void init_inc_context(struct inc_context *ic)
+{
+	ic->bitmap_block = NULL;
+	ic->bitmap = NULL;
+	ic->overflow_leaf = NULL;
+}
+
+static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
+{
+	if (ic->bitmap_block)
+		dm_tm_unlock(ll->tm, ic->bitmap_block);
+	if (ic->overflow_leaf)
+		dm_tm_unlock(ll->tm, ic->overflow_leaf);
+}
+
+static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
+{
+	exit_inc_context(ll, ic);
+	init_inc_context(ic);
+}
+
+/*
+ * Confirms a btree node contains a particular key at an index.
+ */
+static bool contains_key(struct btree_node *n, uint64_t key, int index)
+{
+	return index >= 0 &&
+		index < le32_to_cpu(n->header.nr_entries) &&
+		le64_to_cpu(n->keys[index]) == key;
+}
+
+static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
 {
-	*new = *((uint32_t *) context);
+	int r;
+	int index;
+	struct btree_node *n;
+	__le32 *v_ptr;
+	uint32_t rc;
+
+	/*
+	 * bitmap_block needs to be unlocked because getting the
+	 * overflow_leaf may need to allocate, and thus use the space map.
+	 */
+	reset_inc_context(ll, ic);
+
+	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
+				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
+	if (r < 0)
+		return r;
+
+	n = dm_block_data(ic->overflow_leaf);
+
+	if (!contains_key(n, b, index)) {
+		DMERR("overflow btree is missing an entry");
+		return -EINVAL;
+	}
+
+	v_ptr = value_ptr(n, index);
+	rc = le32_to_cpu(*v_ptr) + 1;
+	*v_ptr = cpu_to_le32(rc);
+
 	return 0;
 }
 
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
-		 uint32_t ref_count, enum allocation_event *ev)
+static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
+{
+	int index;
+	struct btree_node *n;
+	__le32 *v_ptr;
+	uint32_t rc;
+
+	/*
+	 * Do we already have the correct overflow leaf?
+	 */
+	if (ic->overflow_leaf) {
+		n = dm_block_data(ic->overflow_leaf);
+		index = lower_bound(n, b);
+		if (contains_key(n, b, index)) {
+			v_ptr = value_ptr(n, index);
+			rc = le32_to_cpu(*v_ptr) + 1;
+			*v_ptr = cpu_to_le32(rc);
+
+			return 0;
+		}
+	}
+
+	return __sm_ll_inc_overflow(ll, b, ic);
+}
+
+static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
+{
+	int r, inc;
+	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
+			       &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
+	if (r < 0) {
+		DMERR("dm_tm_shadow_block() failed");
+		return r;
+	}
+	ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
+	ic->bitmap = dm_bitmap_data(ic->bitmap_block);
+	return 0;
+}
+
+/*
+ * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
+ * we can reopen the bitmap with a simple write lock, rather than re calling
+ * dm_tm_shadow_block().
+ */
+static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
+{
+	if (!ic->bitmap_block) {
+		int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
+					 &dm_sm_bitmap_validator, &ic->bitmap_block);
+		if (r) {
+			DMERR("unable to re-get write lock for bitmap");
+			return r;
+		}
+		ic->bitmap = dm_bitmap_data(ic->bitmap_block);
+	}
+
+	return 0;
+}
+
+/*
+ * Loops round incrementing entries in a single bitmap.
+ */
+static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
+				   uint32_t bit, uint32_t bit_end,
+				   int32_t *nr_allocations, dm_block_t *new_b,
+				   struct inc_context *ic)
+{
+	int r;
+	__le32 le_rc;
+	uint32_t old;
+
+	for (; bit != bit_end; bit++, b++) {
+		/*
+		 * We only need to drop the bitmap if we need to find a new btree
+		 * leaf for the overflow.  So if it was dropped last iteration,
+		 * we now re-get it.
+		 */
+		r = ensure_bitmap(ll, ic);
+		if (r)
+			return r;
+
+		old = sm_lookup_bitmap(ic->bitmap, bit);
+		switch (old) {
+		case 0:
+			/* inc bitmap, adjust nr_allocated */
+			sm_set_bitmap(ic->bitmap, bit, 1);
+			(*nr_allocations)++;
+			ll->nr_allocated++;
+			le32_add_cpu(&ic->ie_disk.nr_free, -1);
+			if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
+				ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
+			break;
+
+		case 1:
+			/* inc bitmap */
+			sm_set_bitmap(ic->bitmap, bit, 2);
+			break;
+
+		case 2:
+			/* inc bitmap and insert into overflow */
+			sm_set_bitmap(ic->bitmap, bit, 3);
+			reset_inc_context(ll, ic);
+
+			le_rc = cpu_to_le32(3);
+			__dm_bless_for_disk(&le_rc);
+			r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
+					    &b, &le_rc, &ll->ref_count_root);
+			if (r < 0) {
+				DMERR("ref count insert failed");
+				return r;
+			}
+			break;
+
+		default:
+			/*
+			 * inc within the overflow tree only.
+			 */
+			r = sm_ll_inc_overflow(ll, b, ic);
+			if (r < 0)
+				return r;
+		}
+	}
+
+	*new_b = b;
+	return 0;
+}
+
+/*
+ * Finds a bitmap that contains entries in the block range, and increments
+ * them.
+ */
+static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+		       int32_t *nr_allocations, dm_block_t *new_b)
 {
-	return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev);
+	int r;
+	struct inc_context ic;
+	uint32_t bit, bit_end;
+	dm_block_t index = b;
+
+	init_inc_context(&ic);
+
+	bit = do_div(index, ll->entries_per_block);
+	r = ll->load_ie(ll, index, &ic.ie_disk);
+	if (r < 0)
+		return r;
+
+	r = shadow_bitmap(ll, &ic);
+	if (r)
+		return r;
+
+	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
+	r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
+
+	exit_inc_context(ll, &ic);
+
+	if (r)
+		return r;
+
+	return ll->save_ie(ll, index, &ic.ie_disk);
 }
 
-static int inc_ref_count(void *context, uint32_t old, uint32_t *new)
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+	      int32_t *nr_allocations)
 {
-	*new = old + 1;
+	*nr_allocations = 0;
+	while (b != e) {
+		int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
+		if (r)
+			return r;
+	}
+
 	return 0;
 }
 
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+/*----------------------------------------------------------------*/
+
+static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
+				struct inc_context *ic)
 {
-	return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev);
+	reset_inc_context(ll, ic);
+	return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
+			       &b, &ll->ref_count_root);
 }
 
-static int dec_ref_count(void *context, uint32_t old, uint32_t *new)
+static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
+				struct inc_context *ic, uint32_t *old_rc)
 {
-	if (!old) {
-		DMERR_LIMIT("unable to decrement a reference count below 0");
+	int r;
+	int index = -1;
+	struct btree_node *n;
+	__le32 *v_ptr;
+	uint32_t rc;
+
+	reset_inc_context(ll, ic);
+	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
+				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
+	if (r < 0)
+		return r;
+
+	n = dm_block_data(ic->overflow_leaf);
+
+	if (!contains_key(n, b, index)) {
+		DMERR("overflow btree is missing an entry");
 		return -EINVAL;
 	}
 
-	*new = old - 1;
+	v_ptr = value_ptr(n, index);
+	rc = le32_to_cpu(*v_ptr);
+	*old_rc = rc;
+
+	if (rc == 3) {
+		return __sm_ll_del_overflow(ll, b, ic);
+	} else {
+		rc--;
+		*v_ptr = cpu_to_le32(rc);
+		return 0;
+	}
+}
+
+static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
+			      struct inc_context *ic, uint32_t *old_rc)
+{
+	/*
+	 * Do we already have the correct overflow leaf?
+	 */
+	if (ic->overflow_leaf) {
+		int index;
+		struct btree_node *n;
+		__le32 *v_ptr;
+		uint32_t rc;
+
+		n = dm_block_data(ic->overflow_leaf);
+		index = lower_bound(n, b);
+		if (contains_key(n, b, index)) {
+			v_ptr = value_ptr(n, index);
+			rc = le32_to_cpu(*v_ptr);
+			*old_rc = rc;
+
+			if (rc > 3) {
+				rc--;
+				*v_ptr = cpu_to_le32(rc);
+				return 0;
+			} else {
+				return __sm_ll_del_overflow(ll, b, ic);
+			}
+
+		}
+	}
+
+	return __sm_ll_dec_overflow(ll, b, ic, old_rc);
+}
+
+/*
+ * Loops round incrementing entries in a single bitmap.
+ */
+static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
+				   uint32_t bit, uint32_t bit_end,
+				   struct inc_context *ic,
+				   int32_t *nr_allocations, dm_block_t *new_b)
+{
+	int r;
+	uint32_t old;
+
+	for (; bit != bit_end; bit++, b++) {
+		/*
+		 * We only need to drop the bitmap if we need to find a new btree
+		 * leaf for the overflow.  So if it was dropped last iteration,
+		 * we now re-get it.
+		 */
+		r = ensure_bitmap(ll, ic);
+		if (r)
+			return r;
+
+		old = sm_lookup_bitmap(ic->bitmap, bit);
+		switch (old) {
+		case 0:
+			DMERR("unable to decrement block");
+			return -EINVAL;
+
+		case 1:
+			/* dec bitmap */
+			sm_set_bitmap(ic->bitmap, bit, 0);
+			(*nr_allocations)--;
+			ll->nr_allocated--;
+			le32_add_cpu(&ic->ie_disk.nr_free, 1);
+			ic->ie_disk.none_free_before =
+				cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
+			break;
+
+		case 2:
+			/* dec bitmap and insert into overflow */
+			sm_set_bitmap(ic->bitmap, bit, 1);
+			break;
+
+		case 3:
+			r = sm_ll_dec_overflow(ll, b, ic, &old);
+			if (r < 0)
+				return r;
+
+			if (old == 3) {
+				r = ensure_bitmap(ll, ic);
+				if (r)
+					return r;
+
+				sm_set_bitmap(ic->bitmap, bit, 2);
+			}
+			break;
+		}
+	}
+
+	*new_b = b;
 	return 0;
 }
 
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+		       int32_t *nr_allocations, dm_block_t *new_b)
+{
+	int r;
+	uint32_t bit, bit_end;
+	struct inc_context ic;
+	dm_block_t index = b;
+
+	init_inc_context(&ic);
+
+	bit = do_div(index, ll->entries_per_block);
+	r = ll->load_ie(ll, index, &ic.ie_disk);
+	if (r < 0)
+		return r;
+
+	r = shadow_bitmap(ll, &ic);
+	if (r)
+		return r;
+
+	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
+	r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
+	exit_inc_context(ll, &ic);
+
+	if (r)
+		return r;
+
+	return ll->save_ie(ll, index, &ic.ie_disk);
+}
+
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+	      int32_t *nr_allocations)
 {
-	return sm_ll_mutate(ll, b, dec_ref_count, NULL, ev);
+	*nr_allocations = 0;
+	while (b != e) {
+		int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
+		if (r)
+			return r;
+	}
+
+	return 0;
 }
 
+/*----------------------------------------------------------------*/
+
 int sm_ll_commit(struct ll_disk *ll)
 {
 	int r = 0;
diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h
index 87e17909ef521..4a22183e78b73 100644
--- a/drivers/md/persistent-data/dm-space-map-common.h
+++ b/drivers/md/persistent-data/dm-space-map-common.h
@@ -96,12 +96,6 @@ struct disk_bitmap_header {
 	__le64 blocknr;
 } __attribute__ ((packed, aligned(8)));
 
-enum allocation_event {
-	SM_NONE,
-	SM_ALLOC,
-	SM_FREE,
-};
-
 /*----------------------------------------------------------------*/
 
 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks);
@@ -111,9 +105,15 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
 			  dm_block_t end, dm_block_t *result);
 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
 	                         dm_block_t begin, dm_block_t end, dm_block_t *result);
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev);
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
+
+/*
+ * The next three functions return (via nr_allocations) the net number of
+ * allocations that were made.  This number may be negative if there were
+ * more frees than allocs.
+ */
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, int32_t *nr_allocations);
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations);
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations);
 int sm_ll_commit(struct ll_disk *ll);
 
 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm);
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index 4f8069bb04816..d0a8d5e73c280 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -87,76 +87,39 @@ static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,
 			     uint32_t count)
 {
 	int r;
-	uint32_t old_count;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
-	r = sm_ll_insert(&smd->ll, b, count, &ev);
+	r = sm_ll_insert(&smd->ll, b, count, &nr_allocations);
 	if (!r) {
-		switch (ev) {
-		case SM_NONE:
-			break;
-
-		case SM_ALLOC:
-			/*
-			 * This _must_ be free in the prior transaction
-			 * otherwise we've lost atomicity.
-			 */
-			smd->nr_allocated_this_transaction++;
-			break;
-
-		case SM_FREE:
-			/*
-			 * It's only free if it's also free in the last
-			 * transaction.
-			 */
-			r = sm_ll_lookup(&smd->old_ll, b, &old_count);
-			if (r)
-				return r;
-
-			if (!old_count)
-				smd->nr_allocated_this_transaction--;
-			break;
-		}
+		smd->nr_allocated_this_transaction += nr_allocations;
 	}
 
 	return r;
 }
 
-static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_disk_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
 	int r;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
-	r = sm_ll_inc(&smd->ll, b, &ev);
-	if (!r && (ev == SM_ALLOC))
-		/*
-		 * This _must_ be free in the prior transaction
-		 * otherwise we've lost atomicity.
-		 */
-		smd->nr_allocated_this_transaction++;
+	r = sm_ll_inc(&smd->ll, b, e, &nr_allocations);
+	if (!r)
+		smd->nr_allocated_this_transaction += nr_allocations;
 
 	return r;
 }
 
-static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_disk_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
 	int r;
-	uint32_t old_count;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
-	r = sm_ll_dec(&smd->ll, b, &ev);
-	if (!r && (ev == SM_FREE)) {
-		/*
-		 * It's only free if it's also free in the last
-		 * transaction.
-		 */
-		r = sm_ll_lookup(&smd->old_ll, b, &old_count);
-		if (!r && !old_count)
-			smd->nr_allocated_this_transaction--;
-	}
+	r = sm_ll_dec(&smd->ll, b, e, &nr_allocations);
+	if (!r)
+		smd->nr_allocated_this_transaction += nr_allocations;
 
 	return r;
 }
@@ -164,7 +127,7 @@ static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
 static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
 {
 	int r;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
 	/*
@@ -183,10 +146,9 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
 		return r;
 
 	smd->begin = *b + 1;
-	r = sm_ll_inc(&smd->ll, *b, &ev);
+	r = sm_ll_inc(&smd->ll, *b, *b + 1, &nr_allocations);
 	if (!r) {
-		BUG_ON(ev != SM_ALLOC);
-		smd->nr_allocated_this_transaction++;
+		smd->nr_allocated_this_transaction += nr_allocations;
 	}
 
 	return r;
@@ -242,8 +204,8 @@ static struct dm_space_map ops = {
 	.get_count = sm_disk_get_count,
 	.count_is_more_than_one = sm_disk_count_is_more_than_one,
 	.set_count = sm_disk_set_count,
-	.inc_block = sm_disk_inc_block,
-	.dec_block = sm_disk_dec_block,
+	.inc_blocks = sm_disk_inc_blocks,
+	.dec_blocks = sm_disk_dec_blocks,
 	.new_block = sm_disk_new_block,
 	.commit = sm_disk_commit,
 	.root_size = sm_disk_root_size,
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index da439ac857963..392ae26134a4e 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -89,7 +89,8 @@ enum block_op_type {
 
 struct block_op {
 	enum block_op_type type;
-	dm_block_t block;
+	dm_block_t b;
+	dm_block_t e;
 };
 
 struct bop_ring_buffer {
@@ -116,7 +117,7 @@ static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old)
 }
 
 static int brb_push(struct bop_ring_buffer *brb,
-		    enum block_op_type type, dm_block_t b)
+		    enum block_op_type type, dm_block_t b, dm_block_t e)
 {
 	struct block_op *bop;
 	unsigned next = brb_next(brb, brb->end);
@@ -130,7 +131,8 @@ static int brb_push(struct bop_ring_buffer *brb,
 
 	bop = brb->bops + brb->end;
 	bop->type = type;
-	bop->block = b;
+	bop->b = b;
+	bop->e = e;
 
 	brb->end = next;
 
@@ -145,9 +147,7 @@ static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
 		return -ENODATA;
 
 	bop = brb->bops + brb->begin;
-	result->type = bop->type;
-	result->block = bop->block;
-
+	memcpy(result, bop, sizeof(*result));
 	return 0;
 }
 
@@ -178,10 +178,9 @@ struct sm_metadata {
 	struct threshold threshold;
 };
 
-static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
+static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e)
 {
-	int r = brb_push(&smm->uncommitted, type, b);
-
+	int r = brb_push(&smm->uncommitted, type, b, e);
 	if (r) {
 		DMERR("too many recursive allocations");
 		return -ENOMEM;
@@ -193,15 +192,15 @@ static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t
 static int commit_bop(struct sm_metadata *smm, struct block_op *op)
 {
 	int r = 0;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 
 	switch (op->type) {
 	case BOP_INC:
-		r = sm_ll_inc(&smm->ll, op->block, &ev);
+		r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations);
 		break;
 
 	case BOP_DEC:
-		r = sm_ll_dec(&smm->ll, op->block, &ev);
+		r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations);
 		break;
 	}
 
@@ -314,7 +313,7 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
 	     i = brb_next(&smm->uncommitted, i)) {
 		struct block_op *op = smm->uncommitted.bops + i;
 
-		if (op->block != b)
+		if (b < op->b || b >= op->e)
 			continue;
 
 		switch (op->type) {
@@ -355,7 +354,7 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
 
 		struct block_op *op = smm->uncommitted.bops + i;
 
-		if (op->block != b)
+		if (b < op->b || b >= op->e)
 			continue;
 
 		switch (op->type) {
@@ -393,7 +392,7 @@ static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
 				 uint32_t count)
 {
 	int r, r2;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
 	if (smm->recursion_count) {
@@ -402,40 +401,42 @@ static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
 	}
 
 	in(smm);
-	r = sm_ll_insert(&smm->ll, b, count, &ev);
+	r = sm_ll_insert(&smm->ll, b, count, &nr_allocations);
 	r2 = out(smm);
 
 	return combine_errors(r, r2);
 }
 
-static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
 	int r, r2 = 0;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
-	if (recursing(smm))
-		r = add_bop(smm, BOP_INC, b);
-	else {
+	if (recursing(smm)) {
+		r = add_bop(smm, BOP_INC, b, e);
+		if (r)
+			return r;
+	} else {
 		in(smm);
-		r = sm_ll_inc(&smm->ll, b, &ev);
+		r = sm_ll_inc(&smm->ll, b, e, &nr_allocations);
 		r2 = out(smm);
 	}
 
 	return combine_errors(r, r2);
 }
 
-static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
 	int r, r2 = 0;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
 	if (recursing(smm))
-		r = add_bop(smm, BOP_DEC, b);
+		r = add_bop(smm, BOP_DEC, b, e);
 	else {
 		in(smm);
-		r = sm_ll_dec(&smm->ll, b, &ev);
+		r = sm_ll_dec(&smm->ll, b, e, &nr_allocations);
 		r2 = out(smm);
 	}
 
@@ -445,7 +446,7 @@ static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
 {
 	int r, r2 = 0;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
 	/*
@@ -466,10 +467,10 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
 	smm->begin = *b + 1;
 
 	if (recursing(smm))
-		r = add_bop(smm, BOP_INC, *b);
+		r = add_bop(smm, BOP_INC, *b, *b + 1);
 	else {
 		in(smm);
-		r = sm_ll_inc(&smm->ll, *b, &ev);
+		r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations);
 		r2 = out(smm);
 	}
 
@@ -563,8 +564,8 @@ static const struct dm_space_map ops = {
 	.get_count = sm_metadata_get_count,
 	.count_is_more_than_one = sm_metadata_count_is_more_than_one,
 	.set_count = sm_metadata_set_count,
-	.inc_block = sm_metadata_inc_block,
-	.dec_block = sm_metadata_dec_block,
+	.inc_blocks = sm_metadata_inc_blocks,
+	.dec_blocks = sm_metadata_dec_blocks,
 	.new_block = sm_metadata_new_block,
 	.commit = sm_metadata_commit,
 	.root_size = sm_metadata_root_size,
@@ -648,18 +649,28 @@ static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
 	return 0;
 }
 
-static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
+	int r;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
-	return add_bop(smm, BOP_INC, b);
+	r = add_bop(smm, BOP_INC, b, e);
+	if (r)
+		return r;
+
+	return 0;
 }
 
-static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
+	int r;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
-	return add_bop(smm, BOP_DEC, b);
+	r = add_bop(smm, BOP_DEC, b, e);
+	if (r)
+		return r;
+
+	return 0;
 }
 
 static int sm_bootstrap_commit(struct dm_space_map *sm)
@@ -690,8 +701,8 @@ static const struct dm_space_map bootstrap_ops = {
 	.get_count = sm_bootstrap_get_count,
 	.count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
 	.set_count = sm_bootstrap_set_count,
-	.inc_block = sm_bootstrap_inc_block,
-	.dec_block = sm_bootstrap_dec_block,
+	.inc_blocks = sm_bootstrap_inc_blocks,
+	.dec_blocks = sm_bootstrap_dec_blocks,
 	.new_block = sm_bootstrap_new_block,
 	.commit = sm_bootstrap_commit,
 	.root_size = sm_bootstrap_root_size,
@@ -703,7 +714,7 @@ static const struct dm_space_map bootstrap_ops = {
 
 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
 {
-	int r, i;
+	int r;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 	dm_block_t old_len = smm->ll.nr_blocks;
 
@@ -725,9 +736,7 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
 	 * allocate any new blocks.
 	 */
 	do {
-		for (i = old_len; !r && i < smm->begin; i++)
-			r = add_bop(smm, BOP_INC, i);
-
+		r = add_bop(smm, BOP_INC, old_len, smm->begin);
 		if (r)
 			goto out;
 
@@ -774,7 +783,6 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
 			  dm_block_t superblock)
 {
 	int r;
-	dm_block_t i;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
 	smm->begin = superblock + 1;
@@ -799,9 +807,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
 	 * Now we need to update the newly created data structures with the
 	 * allocated blocks that they were built from.
 	 */
-	for (i = superblock; !r && i < smm->begin; i++)
-		r = add_bop(smm, BOP_INC, i);
-
+	r = add_bop(smm, BOP_INC, superblock, smm->begin);
 	if (r)
 		return r;
 
diff --git a/drivers/md/persistent-data/dm-space-map.h b/drivers/md/persistent-data/dm-space-map.h
index 3e6d1153b7c4b..a015cd11f6e97 100644
--- a/drivers/md/persistent-data/dm-space-map.h
+++ b/drivers/md/persistent-data/dm-space-map.h
@@ -46,8 +46,8 @@ struct dm_space_map {
 
 	int (*commit)(struct dm_space_map *sm);
 
-	int (*inc_block)(struct dm_space_map *sm, dm_block_t b);
-	int (*dec_block)(struct dm_space_map *sm, dm_block_t b);
+	int (*inc_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e);
+	int (*dec_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e);
 
 	/*
 	 * new_block will increment the returned block.
@@ -117,14 +117,24 @@ static inline int dm_sm_commit(struct dm_space_map *sm)
 	return sm->commit(sm);
 }
 
+static inline int dm_sm_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
+{
+	return sm->inc_blocks(sm, b, e);
+}
+
 static inline int dm_sm_inc_block(struct dm_space_map *sm, dm_block_t b)
 {
-	return sm->inc_block(sm, b);
+	return dm_sm_inc_blocks(sm, b, b + 1);
+}
+
+static inline int dm_sm_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
+{
+	return sm->dec_blocks(sm, b, e);
 }
 
 static inline int dm_sm_dec_block(struct dm_space_map *sm, dm_block_t b)
 {
-	return sm->dec_block(sm, b);
+	return dm_sm_dec_blocks(sm, b, b + 1);
 }
 
 static inline int dm_sm_new_block(struct dm_space_map *sm, dm_block_t *b)
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 4353e1146d738..16643fc974e84 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -359,6 +359,17 @@ void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
 }
 EXPORT_SYMBOL_GPL(dm_tm_inc);
 
+void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e)
+{
+	/*
+	 * The non-blocking clone doesn't support this.
+	 */
+	BUG_ON(tm->is_clone);
+
+	dm_sm_inc_blocks(tm->sm, b, e);
+}
+EXPORT_SYMBOL_GPL(dm_tm_inc_range);
+
 void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
 {
 	/*
@@ -370,6 +381,47 @@ void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
 }
 EXPORT_SYMBOL_GPL(dm_tm_dec);
 
+void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e)
+{
+	/*
+	 * The non-blocking clone doesn't support this.
+	 */
+	BUG_ON(tm->is_clone);
+
+	dm_sm_dec_blocks(tm->sm, b, e);
+}
+EXPORT_SYMBOL_GPL(dm_tm_dec_range);
+
+void dm_tm_with_runs(struct dm_transaction_manager *tm,
+		     const __le64 *value_le, unsigned count, dm_tm_run_fn fn)
+{
+	uint64_t b, begin, end;
+	bool in_run = false;
+	unsigned i;
+
+	for (i = 0; i < count; i++, value_le++) {
+		b = le64_to_cpu(*value_le);
+
+		if (in_run) {
+			if (b == end)
+				end++;
+			else {
+				fn(tm, begin, end);
+				begin = b;
+				end = b + 1;
+			}
+		} else {
+			in_run = true;
+			begin = b;
+			end = b + 1;
+		}
+	}
+
+	if (in_run)
+		fn(tm, begin, end);
+}
+EXPORT_SYMBOL_GPL(dm_tm_with_runs);
+
 int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
 	      uint32_t *result)
 {
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h
index 3d75cc59bbb82..906c02ed0365b 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.h
+++ b/drivers/md/persistent-data/dm-transaction-manager.h
@@ -100,8 +100,18 @@ void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b);
  * Functions for altering the reference count of a block directly.
  */
 void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b);
-
+void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e);
 void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b);
+void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e);
+
+/*
+ * Builds up runs of adjacent blocks, and then calls the given fn
+ * (typically dm_tm_inc/dec).  Very useful when you have to perform
+ * the same tm operation on all values in a btree leaf.
+ */
+typedef void (*dm_tm_run_fn)(struct dm_transaction_manager *, dm_block_t, dm_block_t);
+void dm_tm_with_runs(struct dm_transaction_manager *tm,
+		     const __le64 *value_le, unsigned count, dm_tm_run_fn fn);
 
 int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b, uint32_t *result);
 
-- 
GitLab