diff --git a/Documentation/admin-guide/bcache.rst b/Documentation/admin-guide/bcache.rst
index 8d3a2d045c0ae7..20636aca454078 100644
--- a/Documentation/admin-guide/bcache.rst
+++ b/Documentation/admin-guide/bcache.rst
@@ -147,6 +147,86 @@ the backing devices to passthrough mode.
    writeback mode). It currently doesn't do anything intelligent if it fails to
    read some of the dirty data, though.
 
+SSD LONGEVITY: PER-PROCESS CACHE HINTING WITH IO PRIORITY
+---------------------------------------------------------
+
+Processes can be assigned an IO priority using `ionice` and bcache will
+either try to writeback or bypass the cache based on the IO priority
+level assigned to the process and the configuration of the syfs ioprio
+hints.  If configured properly for your workload, this can both increase
+performance and reduce SSD wear (erase/write cycles).
+
+Having idle IOs bypass the cache can increase performance elsewhere
+since you probably don't care about their performance.  In addition,
+this prevents idle IOs from promoting into (polluting) your cache and
+evicting blocks that are more important elsewhere.
+
+Default sysfs values:
+	2,7: ioprio_bypass is hinted for process IOs at-or-below best-effort-7.
+	0,0: ioprio_writeback hinting is disabled by default.
+
+Cache hinting is configured by writing 'class,level' pairs to sysfs.
+In this example, we write the following:
+
+    echo 2,7 > /sys/block/bcache0/bcache/ioprio_bypass
+    echo 2,0 > /sys/block/bcache0/bcache/ioprio_writeback
+
+Thus, processes with the following IO class (ionice -c) and level (-n)
+will the behave as shown in this table:
+
+	(-c) IO Class    (-n) Class level       Action
+	-----------------------------------------------------
+	(1) Realtime      0-7                   Writeback
+	(2) Best-effort     0                   Writeback
+	(2) Best-effort   1-6                   Normal, as if hinting were disabled
+	(2) Best-effort     7                   Bypass cache
+	(3) Idle          n/a                   Bypass cache
+
+For processes at-or-below best-effort-7 (ionice -c2 -n7), the
+ioprio_bypass behavior is as follows:
+
+* Reads will come from the backing device and will not promote into
+  (pollute) your cache.  If the block being read was already in the cache,
+  then it will be read from the cache (and remain cached).
+
+* If you are using writeback mode, then low-priority bypass-hinted writes
+  will go directly to the backing device.  If the write was dirty in
+  cache, it will cache-invalidate and write directly to the backing
+  device.  If a high-priority task later writes the same block then it
+  will writeback so no performance is lost for write-after-write.
+
+  For read-after-bypassed-write, the block will be read from the backing
+  device (not cached) so there may be a miss penalty when a low-priority
+  process write bypasses the cache followed by a high-priority read that
+  would otherwise have hit.  In practice, this is not an issue; to date,
+  none have wanted low-priority writes and high-priority reads of the
+  same block.
+
+For processes in our example at-or-above best-effort-0 (ionice -c2 -n0),
+the ioprio_writeback behavior is as follows:
+
+* The writeback hint has no effect unless your 'cache_mode' is writeback.
+  Assuming writeback mode, all writes at this priority will writeback.
+  Of course this will increase SSD wear, so only use writeback hinting
+  if you need it.
+
+* Reads are unaffected by ioprio_writeback, except that read-after-write
+  will of course read from the cache.
+
+Linux assigns processes the best-effort class with a level of 4 if
+no process is assigned  Thus, without `ionice` your processes will
+follow normal bcache should_writeback/should_bypass symantecs as if the
+ioprio_writeback/ioprio_bypass sysfs flags were disabled.
+
+Also note that in order to be hinted by ioprio_writeback/ioprio_bypass,
+the process must have a valid ioprio setting as returned by
+get_task_io_context()->ioprio. Thus, a process without an IO context
+will be ignored by the ioprio_writeback/ioprio_bypass hints even if your
+sysfs hints specify that best-effort-4 should be flagged for bypass
+or writeback.  If in doubt, explicitly set the process IO priority with
+`ionice`.
+
+See `man ionice` for more detail about per-process IO priority in Linux.
 
 Howto/cookbook
 --------------
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 8c371d5eef8eb9..097577ae3c4717 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -482,8 +482,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k)
 	unsigned int i;
 
 	for (i = 0; i < KEY_PTRS(k); i++)
-		__bch_bucket_free(PTR_CACHE(c, k, i),
-				  PTR_BUCKET(c, k, i));
+		__bch_bucket_free(c->cache, PTR_BUCKET(c, k, i));
 }
 
 int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
@@ -674,7 +673,7 @@ bool bch_alloc_sectors(struct cache_set *c,
 		SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors);
 
 		atomic_long_add(sectors,
-				&PTR_CACHE(c, &b->key, i)->sectors_written);
+				&c->cache->sectors_written);
 	}
 
 	if (b->sectors_free < c->cache->sb.block_size)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index e8bf4f752e8bee..eab16a8a69f670 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -373,6 +373,7 @@ struct cached_dev {
 	unsigned int		partial_stripes_expensive:1;
 	unsigned int		writeback_metadata:1;
 	unsigned int		writeback_running:1;
+	unsigned int		writeback_consider_fragment:1;
 	unsigned char		writeback_percent;
 	unsigned int		writeback_delay;
 
@@ -385,6 +386,9 @@ struct cached_dev {
 	unsigned int		writeback_rate_update_seconds;
 	unsigned int		writeback_rate_i_term_inverse;
 	unsigned int		writeback_rate_p_term_inverse;
+	unsigned int		writeback_rate_fp_term_low;
+	unsigned int		writeback_rate_fp_term_mid;
+	unsigned int		writeback_rate_fp_term_high;
 	unsigned int		writeback_rate_minimum;
 
 	enum stop_on_failure	stop_when_cache_set_failed;
@@ -393,6 +397,9 @@ struct cached_dev {
 	unsigned int		error_limit;
 	unsigned int		offline_seconds;
 
+	unsigned short		ioprio_writeback;
+	unsigned short		ioprio_bypass;
+
 	char			backing_dev_name[BDEVNAME_SIZE];
 };
 
@@ -800,13 +807,6 @@ static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
 	return s & (c->cache->sb.bucket_size - 1);
 }
 
-static inline struct cache *PTR_CACHE(struct cache_set *c,
-				      const struct bkey *k,
-				      unsigned int ptr)
-{
-	return c->cache;
-}
-
 static inline size_t PTR_BUCKET_NR(struct cache_set *c,
 				   const struct bkey *k,
 				   unsigned int ptr)
@@ -818,7 +818,7 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c,
 					const struct bkey *k,
 					unsigned int ptr)
 {
-	return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
+	return c->cache->buckets + PTR_BUCKET_NR(c, k, ptr);
 }
 
 static inline uint8_t gen_after(uint8_t a, uint8_t b)
@@ -837,7 +837,7 @@ static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
 static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
 				 unsigned int i)
 {
-	return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
+	return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && c->cache;
 }
 
 /* Btree key macros */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fe6dce125aba22..183a58c893774d 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -426,7 +426,7 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent)
 	do_btree_node_write(b);
 
 	atomic_long_add(set_blocks(i, block_bytes(b->c->cache)) * b->c->cache->sb.block_size,
-			&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
+			&b->c->cache->btree_sectors_written);
 
 	b->written += set_blocks(i, block_bytes(b->c->cache));
 }
@@ -1161,7 +1161,7 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k)
 
 	for (i = 0; i < KEY_PTRS(k); i++)
 		SET_PTR_GEN(k, i,
-			    bch_inc_gen(PTR_CACHE(b->c, &b->key, i),
+			    bch_inc_gen(b->c->cache,
 					PTR_BUCKET(b->c, &b->key, i)));
 
 	mutex_unlock(&b->c->bucket_lock);
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index b00fd08d696b5f..b2eb59b9cd7107 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -50,7 +50,7 @@ void bch_btree_verify(struct btree *b)
 	v->keys.ops = b->keys.ops;
 
 	bio = bch_bbio_alloc(b->c);
-	bio_set_dev(bio, PTR_CACHE(b->c, &b->key, 0)->bdev);
+	bio_set_dev(bio, c->cache->bdev);
 	bio->bi_iter.bi_sector	= PTR_OFFSET(&b->key, 0);
 	bio->bi_iter.bi_size	= KEY_SIZE(&v->key) << 9;
 	bio->bi_opf		= REQ_OP_READ | REQ_META;
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index f4658a1f37b862..d626ffcbecb99c 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -50,7 +50,7 @@ static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
 
 	for (i = 0; i < KEY_PTRS(k); i++)
 		if (ptr_available(c, k, i)) {
-			struct cache *ca = PTR_CACHE(c, k, i);
+			struct cache *ca = c->cache;
 			size_t bucket = PTR_BUCKET_NR(c, k, i);
 			size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
 
@@ -71,7 +71,7 @@ static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
 
 	for (i = 0; i < KEY_PTRS(k); i++)
 		if (ptr_available(c, k, i)) {
-			struct cache *ca = PTR_CACHE(c, k, i);
+			struct cache *ca = c->cache;
 			size_t bucket = PTR_BUCKET_NR(c, k, i);
 			size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
 
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index dad71a6b78891c..e4388fe3ab7ef9 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -36,7 +36,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
 	struct bbio *b = container_of(bio, struct bbio, bio);
 
 	bio->bi_iter.bi_sector	= PTR_OFFSET(&b->key, 0);
-	bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
+	bio_set_dev(bio, c->cache->bdev);
 
 	b->submit_time_us = local_clock_us();
 	closure_bio_submit(c, bio, bio->bi_private);
@@ -137,7 +137,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
 			      blk_status_t error, const char *m)
 {
 	struct bbio *b = container_of(bio, struct bbio, bio);
-	struct cache *ca = PTR_CACHE(c, &b->key, 0);
+	struct cache *ca = c->cache;
 	int is_read = (bio_data_dir(bio) == READ ? 1 : 0);
 
 	unsigned int threshold = op_is_write(bio_op(bio))
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index c6613e81733376..de2c0d7699cf54 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -768,7 +768,7 @@ static void journal_write_unlocked(struct closure *cl)
 	w->data->csum		= csum_set(w->data);
 
 	for (i = 0; i < KEY_PTRS(k); i++) {
-		ca = PTR_CACHE(c, k, i);
+		ca = c->cache;
 		bio = &ca->journal.bio;
 
 		atomic_long_add(sectors, &ca->meta_sectors_written);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 21432638314562..28d922bbbf1c0f 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -367,6 +367,8 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 	unsigned int sectors, congested;
 	struct task_struct *task = current;
 	struct io *i;
+	struct io_context *ioc;
+	unsigned short ioprio;
 
 	if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
 	    c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
@@ -394,6 +396,29 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 			goto skip;
 	}
 
+	/* If the ioprio already exists on the bio, use that.  We assume that
+	 * the upper layer properly assigned the calling process's ioprio to
+	 * the bio being passed to bcache. Otherwise, use current's ioc. */
+	ioprio = bio_prio(bio);
+	if (!ioprio_valid(ioprio)) {
+		ioc = get_task_io_context(current, GFP_NOIO, NUMA_NO_NODE);
+		if (ioc) {
+			if (ioprio_valid(ioc->ioprio))
+				ioprio = ioc->ioprio;
+			put_io_context(ioc);
+			ioc = NULL;
+		}
+	}
+
+	/* If process ioprio is lower-or-equal to dc->ioprio_bypass, and the
+	 * request is not REQ_META|REQ_PRIO, then hint for bypass. Note that a
+	 * lower-priority IO class+value has a greater numeric value. */
+	if (ioprio_valid(ioprio) && ioprio_valid(dc->ioprio_writeback)
+		&& ioprio >= dc->ioprio_bypass) {
+		if (!(bio->bi_opf & (REQ_META|REQ_PRIO)))
+			goto skip;
+	}
+
 	if (bio->bi_iter.bi_sector & (c->cache->sb.block_size - 1) ||
 	    bio_sectors(bio) & (c->cache->sb.block_size - 1)) {
 		pr_debug("skipping unaligned io\n");
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 248bda63f08527..29d5705bda4fdb 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1052,6 +1052,7 @@ static int cached_dev_status_update(void *arg)
 
 int bch_cached_dev_run(struct cached_dev *dc)
 {
+	int ret = 0;
 	struct bcache_device *d = &dc->disk;
 	char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL);
 	char *env[] = {
@@ -1064,19 +1065,15 @@ int bch_cached_dev_run(struct cached_dev *dc)
 	if (dc->io_disable) {
 		pr_err("I/O disabled on cached dev %s\n",
 		       dc->backing_dev_name);
-		kfree(env[1]);
-		kfree(env[2]);
-		kfree(buf);
-		return -EIO;
+		ret = -EIO;
+		goto out;
 	}
 
 	if (atomic_xchg(&dc->running, 1)) {
-		kfree(env[1]);
-		kfree(env[2]);
-		kfree(buf);
 		pr_info("cached dev %s is running already\n",
 		       dc->backing_dev_name);
-		return -EBUSY;
+		ret = -EBUSY;
+		goto out;
 	}
 
 	if (!d->c &&
@@ -1097,15 +1094,13 @@ int bch_cached_dev_run(struct cached_dev *dc)
 	 * only class / kset properties are persistent
 	 */
 	kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
-	kfree(env[1]);
-	kfree(env[2]);
-	kfree(buf);
 
 	if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
 	    sysfs_create_link(&disk_to_dev(d->disk)->kobj,
 			      &d->kobj, "bcache")) {
 		pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out;
 	}
 
 	dc->status_update_thread = kthread_run(cached_dev_status_update,
@@ -1114,7 +1109,11 @@ int bch_cached_dev_run(struct cached_dev *dc)
 		pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n");
 	}
 
-	return 0;
+out:
+	kfree(env[1]);
+	kfree(env[2]);
+	kfree(buf);
+	return ret;
 }
 
 /*
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 554e3afc9b688b..6a01e331b1e5bc 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -117,10 +117,14 @@ rw_attribute(writeback_running);
 rw_attribute(writeback_percent);
 rw_attribute(writeback_delay);
 rw_attribute(writeback_rate);
+rw_attribute(writeback_consider_fragment);
 
 rw_attribute(writeback_rate_update_seconds);
 rw_attribute(writeback_rate_i_term_inverse);
 rw_attribute(writeback_rate_p_term_inverse);
+rw_attribute(writeback_rate_fp_term_low);
+rw_attribute(writeback_rate_fp_term_mid);
+rw_attribute(writeback_rate_fp_term_high);
 rw_attribute(writeback_rate_minimum);
 read_attribute(writeback_rate_debug);
 
@@ -149,6 +153,9 @@ rw_attribute(idle_max_writeback_rate);
 rw_attribute(gc_after_writeback);
 rw_attribute(size);
 
+rw_attribute(ioprio_writeback);
+rw_attribute(ioprio_bypass);
+
 static ssize_t bch_snprint_string_list(char *buf,
 				       size_t size,
 				       const char * const list[],
@@ -195,6 +202,7 @@ SHOW(__bch_cached_dev)
 	var_printf(bypass_torture_test,	"%i");
 	var_printf(writeback_metadata,	"%i");
 	var_printf(writeback_running,	"%i");
+	var_printf(writeback_consider_fragment,	"%i");
 	var_print(writeback_delay);
 	var_print(writeback_percent);
 	sysfs_hprint(writeback_rate,
@@ -205,6 +213,9 @@ SHOW(__bch_cached_dev)
 	var_print(writeback_rate_update_seconds);
 	var_print(writeback_rate_i_term_inverse);
 	var_print(writeback_rate_p_term_inverse);
+	var_print(writeback_rate_fp_term_low);
+	var_print(writeback_rate_fp_term_mid);
+	var_print(writeback_rate_fp_term_high);
 	var_print(writeback_rate_minimum);
 
 	if (attr == &sysfs_writeback_rate_debug) {
@@ -277,6 +288,16 @@ SHOW(__bch_cached_dev)
 		return strlen(buf);
 	}
 
+	if (attr == &sysfs_ioprio_bypass)
+		return snprintf(buf, PAGE_SIZE-1, "%d,%ld\n",
+			IOPRIO_PRIO_CLASS(dc->ioprio_bypass),
+			IOPRIO_PRIO_DATA(dc->ioprio_bypass));
+
+	if (attr == &sysfs_ioprio_writeback)
+		return snprintf(buf, PAGE_SIZE-1, "%d,%ld\n",
+			IOPRIO_PRIO_CLASS(dc->ioprio_writeback),
+			IOPRIO_PRIO_DATA(dc->ioprio_writeback));
+
 #undef var
 	return 0;
 }
@@ -289,6 +310,10 @@ STORE(__cached_dev)
 	ssize_t v;
 	struct cache_set *c;
 	struct kobj_uevent_env *env;
+	unsigned ioprio_class = 0; /* invalid initial ioprio values */
+	unsigned ioprio_level = IOPRIO_BE_NR;
+	unsigned short *ioprio_hint = NULL;
+	char *ioprio_type = NULL;
 
 	/* no user space access if system is rebooting */
 	if (bcache_is_reboot)
@@ -303,6 +328,7 @@ STORE(__cached_dev)
 	sysfs_strtoul_bool(bypass_torture_test, dc->bypass_torture_test);
 	sysfs_strtoul_bool(writeback_metadata, dc->writeback_metadata);
 	sysfs_strtoul_bool(writeback_running, dc->writeback_running);
+	sysfs_strtoul_bool(writeback_consider_fragment, dc->writeback_consider_fragment);
 	sysfs_strtoul_clamp(writeback_delay, dc->writeback_delay, 0, UINT_MAX);
 
 	sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent,
@@ -331,6 +357,16 @@ STORE(__cached_dev)
 	sysfs_strtoul_clamp(writeback_rate_p_term_inverse,
 			    dc->writeback_rate_p_term_inverse,
 			    1, UINT_MAX);
+	sysfs_strtoul_clamp(writeback_rate_fp_term_low,
+			    dc->writeback_rate_fp_term_low,
+			    1, dc->writeback_rate_fp_term_mid - 1);
+	sysfs_strtoul_clamp(writeback_rate_fp_term_mid,
+			    dc->writeback_rate_fp_term_mid,
+			    dc->writeback_rate_fp_term_low + 1,
+			    dc->writeback_rate_fp_term_high - 1);
+	sysfs_strtoul_clamp(writeback_rate_fp_term_high,
+			    dc->writeback_rate_fp_term_high,
+			    dc->writeback_rate_fp_term_mid + 1, UINT_MAX);
 	sysfs_strtoul_clamp(writeback_rate_minimum,
 			    dc->writeback_rate_minimum,
 			    1, UINT_MAX);
@@ -435,6 +471,57 @@ STORE(__cached_dev)
 	if (attr == &sysfs_stop)
 		bcache_device_stop(&dc->disk);
 
+	/* ioprio hinting: we use ioprio_hint to reduce duplicate printk verbiage */
+	if (attr == &sysfs_ioprio_writeback) {
+		ioprio_hint = &dc->ioprio_writeback;
+		ioprio_type = "writeback";
+	}
+
+	if (attr == &sysfs_ioprio_bypass) {
+		ioprio_hint = &dc->ioprio_bypass;
+		ioprio_type = "bypass";
+	}
+
+	if (ioprio_hint != NULL)
+	{
+		if (sscanf(buf, "%u,%u", &ioprio_class, &ioprio_level) != 2
+			|| ioprio_class > IOPRIO_CLASS_IDLE
+			|| ioprio_level >= IOPRIO_BE_NR) {
+			pr_err("ioprio_%s invalid, expecting: (class,level) but parsed (%u,%u); ignored.",
+				ioprio_type,
+				ioprio_class, ioprio_level);
+			return size;
+		}
+
+		/* Use the maximum(/minimum) value in the class shift space to make integer
+		  comparison correct for ioprio_writeback(/ioprio_bypass) for IOPRIO_CLASS_IDLE.
+		  This is necessary because there are no ioprio levels for the idle class. */
+		if (ioprio_class == IOPRIO_CLASS_IDLE) {
+			if (ioprio_hint == &dc->ioprio_writeback)
+				ioprio_level = IOPRIO_PRIO_MASK;
+			else
+				/* Same, but 0 for bypass (inverted vs. writeback) */
+				ioprio_level = 0;
+		}
+
+		*ioprio_hint = IOPRIO_PRIO_VALUE(ioprio_class, ioprio_level);
+
+		if (!ioprio_valid(*ioprio_hint))
+			pr_info("disabled ioprio_%s hints.", ioprio_type);
+		else
+			pr_info("set hint for cache %s with priority %s: (class,level) = (%u,%u)",
+				ioprio_type,
+				( ioprio_hint == &dc->ioprio_writeback ? "at-or-above" : "at-or-below" ),
+				ioprio_class, ioprio_level);
+
+		if (ioprio_valid(dc->ioprio_writeback)
+			&& ioprio_valid(dc->ioprio_bypass)
+			&& dc->ioprio_writeback >= dc->ioprio_bypass)
+			pr_warn(
+				"warning: ioprio_writeback hint is neither disabled nor higher priority than the bypass hint; "
+				"will always writeback!\n");
+	}
+
 	return size;
 }
 
@@ -499,9 +586,13 @@ static struct attribute *bch_cached_dev_files[] = {
 	&sysfs_writeback_delay,
 	&sysfs_writeback_percent,
 	&sysfs_writeback_rate,
+	&sysfs_writeback_consider_fragment,
 	&sysfs_writeback_rate_update_seconds,
 	&sysfs_writeback_rate_i_term_inverse,
 	&sysfs_writeback_rate_p_term_inverse,
+	&sysfs_writeback_rate_fp_term_low,
+	&sysfs_writeback_rate_fp_term_mid,
+	&sysfs_writeback_rate_fp_term_high,
 	&sysfs_writeback_rate_minimum,
 	&sysfs_writeback_rate_debug,
 	&sysfs_io_errors,
@@ -522,6 +613,8 @@ static struct attribute *bch_cached_dev_files[] = {
 #endif
 	&sysfs_backing_dev_name,
 	&sysfs_backing_dev_uuid,
+	&sysfs_ioprio_bypass,
+	&sysfs_ioprio_writeback,
 	NULL
 };
 KTYPE(bch_cached_dev);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 3c74996978dade..d1a5ddabb22af3 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -88,6 +88,44 @@ static void __update_writeback_rate(struct cached_dev *dc)
 	int64_t integral_scaled;
 	uint32_t new_rate;
 
+	/*
+	 * We need to consider the number of dirty buckets as well
+	 * when calculating the proportional_scaled, Otherwise we might
+	 * have an unreasonable small writeback rate at a highly fragmented situation
+	 * when very few dirty sectors consumed a lot dirty buckets, the
+	 * worst case is when dirty buckets reached cutoff_writeback_sync and
+	 * dirty data is still not even reached to writeback percent, so the rate
+	 * still will be at the minimum value, which will cause the write
+	 * stuck at a non-writeback mode.
+	 */
+	struct cache_set *c = dc->disk.c;
+
+	int64_t dirty_buckets = c->nbuckets - c->avail_nbuckets;
+
+	if (dc->writeback_consider_fragment &&
+		c->gc_stats.in_use > BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW && dirty > 0) {
+		int64_t fragment =
+			div_s64((dirty_buckets *  c->cache->sb.bucket_size), dirty);
+		int64_t fp_term;
+		int64_t fps;
+
+		if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID) {
+			fp_term = (int64_t)dc->writeback_rate_fp_term_low *
+			(c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW);
+		} else if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH) {
+			fp_term = (int64_t)dc->writeback_rate_fp_term_mid *
+			(c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID);
+		} else {
+			fp_term = (int64_t)dc->writeback_rate_fp_term_high *
+			(c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH);
+		}
+		fps = div_s64(dirty, dirty_buckets) * fp_term;
+		if (fragment > 3 && fps > proportional_scaled) {
+			/* Only overrite the p when fragment > 3 */
+			proportional_scaled = fps;
+		}
+	}
+
 	if ((error < 0 && dc->writeback_rate_integral > 0) ||
 	    (error > 0 && time_before64(local_clock(),
 			 dc->writeback_rate.next + NSEC_PER_MSEC))) {
@@ -378,7 +416,7 @@ static void read_dirty_endio(struct bio *bio)
 	struct dirty_io *io = w->private;
 
 	/* is_read = 1 */
-	bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
+	bch_count_io_errors(io->dc->disk.c->cache,
 			    bio->bi_status, 1,
 			    "reading dirty data from cache");
 
@@ -472,8 +510,7 @@ static void read_dirty(struct cached_dev *dc)
 			dirty_init(w);
 			bio_set_op_attrs(&io->bio, REQ_OP_READ, 0);
 			io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
-			bio_set_dev(&io->bio,
-				    PTR_CACHE(dc->disk.c, &w->key, 0)->bdev);
+			bio_set_dev(&io->bio, dc->disk.c->cache->bdev);
 			io->bio.bi_end_io	= read_dirty_endio;
 
 			if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
@@ -968,6 +1005,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
 
 	dc->writeback_metadata		= true;
 	dc->writeback_running		= false;
+	dc->writeback_consider_fragment = true;
 	dc->writeback_percent		= 10;
 	dc->writeback_delay		= 30;
 	atomic_long_set(&dc->writeback_rate.rate, 1024);
@@ -975,8 +1013,21 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
 
 	dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
 	dc->writeback_rate_p_term_inverse = 40;
+	dc->writeback_rate_fp_term_low = 1;
+	dc->writeback_rate_fp_term_mid = 10;
+	dc->writeback_rate_fp_term_high = 1000;
 	dc->writeback_rate_i_term_inverse = 10000;
 
+	/*
+	 * These defaults provide the best SSD life by enabling bypass
+	 * for priorities at-or-below BE-7. This also provides better
+	 * performance (cache hits) by preventing (near-)idle processes from
+	 * polluting the cache working set.  Only set ioprio_writeback if
+	 * you really need it: it will wear out your SSD sooner.
+	 */
+	dc->ioprio_writeback = IOPRIO_PRIO_VALUE(0, 0);
+	dc->ioprio_bypass    = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, (IOPRIO_BE_NR-1));
+
 	WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
 	INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
 }
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 3f1230e22de013..ffa41b1c27a74a 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -16,6 +16,10 @@
 
 #define BCH_AUTO_GC_DIRTY_THRESHOLD	50
 
+#define BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW 50
+#define BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID 57
+#define BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH 64
+
 #define BCH_DIRTY_INIT_THRD_MAX	64
 /*
  * 14 (16384ths) is chosen here as something that each backing device
@@ -99,6 +103,8 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
 				    unsigned int cache_mode, bool would_skip)
 {
 	unsigned int in_use = dc->disk.c->gc_stats.in_use;
+	struct io_context *ioc;
+	unsigned short ioprio;
 
 	if (cache_mode != CACHE_MODE_WRITEBACK ||
 	    test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
@@ -116,6 +122,28 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
 	if (would_skip)
 		return false;
 
+	/* If the ioprio already exists on the bio, use that.  We assume that
+	 * the upper layer properly assigned the calling process's ioprio to
+	 * the bio being passed to bcache. Otherwise, use current's ioc. */
+	ioprio = bio_prio(bio);
+	if (!ioprio_valid(ioprio)) {
+		ioc = get_task_io_context(current, GFP_NOIO, NUMA_NO_NODE);
+		if (ioc) {
+			if (ioprio_valid(ioc->ioprio))
+				ioprio = ioc->ioprio;
+			put_io_context(ioc);
+			ioc = NULL;
+		}
+	}
+
+	/* If process ioprio is higher-or-equal to dc->ioprio_writeback, then
+	 * hint for writeback. Note that a higher-priority IO class+value
+	 * has a lesser numeric value. */
+	if (ioprio_valid(ioprio) && ioprio_valid(dc->ioprio_writeback)
+		&& ioprio <= dc->ioprio_writeback) {
+		return true;
+	}
+
 	return (op_is_sync(bio->bi_opf) ||
 		bio->bi_opf & (REQ_META|REQ_PRIO) ||
 		in_use <= bch_cutoff_writeback);