diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 7c75ed7e89799..7c2ea350ea563 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2359,6 +2359,27 @@ static long wb_check_start_all(struct bdi_writeback *wb) return nr_pages; } +static long wb_check_start_dontcache(struct bdi_writeback *wb) +{ + long nr_pages; + + if (!test_and_clear_bit(WB_start_dontcache, &wb->state)) + return 0; + + nr_pages = wb_stat_sum(wb, WB_DONTCACHE_DIRTY); + if (nr_pages) { + struct wb_writeback_work work = { + .nr_pages = nr_pages, + .sync_mode = WB_SYNC_NONE, + .range_cyclic = 1, + .reason = WB_REASON_DONTCACHE, + }; + + nr_pages = wb_writeback(wb, &work); + } + + return nr_pages; +} /* * Retrieve work items and do the writeback they describe @@ -2380,6 +2401,11 @@ static long wb_do_writeback(struct bdi_writeback *wb) */ wrote += wb_check_start_all(wb); + /* + * Check for dontcache writeback request + */ + wrote += wb_check_start_dontcache(wb); + /* * Check for periodic writeback, kupdated() style */ @@ -2454,6 +2480,39 @@ void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, rcu_read_unlock(); } +/** + * filemap_dontcache_kick_writeback - kick flusher for IOCB_DONTCACHE writes + * @mapping: address_space that was just written to + * + * Kick the writeback flusher thread to expedite writeback of dontcache dirty + * pages. Queue writeback for the inode's wb for as many pages as there are + * dontcache pages, but don't restrict writeback to dontcache pages only. + * + * This significantly improves performance over either writing all wb's pages + * or writing only dontcache pages. Although it doesn't guarantee quick + * writeback and reclaim of dontcache pages, it keeps the amount of dirty pages + * in check. Over longer term dontcache pages get written and reclaimed by + * background writeback even with this rough heuristic. + */ +void filemap_dontcache_kick_writeback(struct address_space *mapping) +{ + struct inode *inode = mapping->host; + struct bdi_writeback *wb; + struct wb_lock_cookie cookie = {}; + bool need_wakeup = false; + + wb = unlocked_inode_to_wb_begin(inode, &cookie); + if (wb_has_dirty_io(wb) && + !test_bit(WB_start_dontcache, &wb->state) && + !test_and_set_bit(WB_start_dontcache, &wb->state)) + need_wakeup = true; + unlocked_inode_to_wb_end(inode, &cookie); + + if (need_wakeup) + wb_wakeup(wb); +} +EXPORT_SYMBOL_GPL(filemap_dontcache_kick_writeback); + /* * Wakeup the flusher threads to start writeback of all currently dirty pages */ diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index c88fd4d37d1fb..e32581ae91993 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -26,6 +26,7 @@ enum wb_state { WB_writeback_running, /* Writeback is in progress */ WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ WB_start_all, /* nr_pages == 0 (all) work pending */ + WB_start_dontcache, /* dontcache writeback pending */ }; enum wb_stat_item { @@ -33,6 +34,7 @@ enum wb_stat_item { WB_WRITEBACK, WB_DIRTIED, WB_WRITTEN, + WB_DONTCACHE_DIRTY, NR_WB_STAT_ITEMS }; @@ -55,6 +57,7 @@ enum wb_reason { */ WB_REASON_FORKER_THREAD, WB_REASON_FOREIGN_FLUSH, + WB_REASON_DONTCACHE, WB_REASON_MAX, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index 8b3dd145b25ec..2fd36608ac733 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2610,6 +2610,7 @@ extern int __must_check file_write_and_wait_range(struct file *file, loff_t start, loff_t end); int filemap_flush_range(struct address_space *mapping, loff_t start, loff_t end); +void filemap_dontcache_kick_writeback(struct address_space *mapping); static inline int file_write_and_wait(struct file *file) { @@ -2643,10 +2644,7 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) if (ret) return ret; } else if (iocb->ki_flags & IOCB_DONTCACHE) { - struct address_space *mapping = iocb->ki_filp->f_mapping; - - filemap_flush_range(mapping, iocb->ki_pos - count, - iocb->ki_pos - 1); + filemap_dontcache_kick_writeback(iocb->ki_filp->f_mapping); } return count; diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 4d3d8c8f3a1bc..9727af5426993 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -44,7 +44,8 @@ EM( WB_REASON_PERIODIC, "periodic") \ EM( WB_REASON_FS_FREE_SPACE, "fs_free_space") \ EM( WB_REASON_FORKER_THREAD, "forker_thread") \ - EMe(WB_REASON_FOREIGN_FLUSH, "foreign_flush") + EM( WB_REASON_FOREIGN_FLUSH, "foreign_flush") \ + EMe(WB_REASON_DONTCACHE, "dontcache") WB_WORK_REASON diff --git a/mm/filemap.c b/mm/filemap.c index 406cef06b684a..42a33c468621e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2051,8 +2051,19 @@ struct folio *__filemap_get_folio_mpol(struct address_space *mapping, if (!folio) return ERR_PTR(-ENOENT); /* not an uncached lookup, clear uncached if set */ - if (folio_test_dropbehind(folio) && !(fgp_flags & FGP_DONTCACHE)) - folio_clear_dropbehind(folio); + if (!(fgp_flags & FGP_DONTCACHE) && folio_test_clear_dropbehind(folio)) { + if (folio_test_dirty(folio) && + mapping_can_writeback(mapping)) { + struct inode *inode = mapping->host; + struct bdi_writeback *wb; + struct wb_lock_cookie cookie = {}; + + wb = unlocked_inode_to_wb_begin(inode, &cookie); + wb_stat_mod(wb, WB_DONTCACHE_DIRTY, + -folio_nr_pages(folio)); + unlocked_inode_to_wb_end(inode, &cookie); + } + } return folio; } EXPORT_SYMBOL(__filemap_get_folio_mpol); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 601a5e048d128..f56287d8eb3b9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2609,6 +2609,8 @@ static void folio_account_dirtied(struct folio *folio, wb = inode_to_wb(inode); lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr); + if (folio_test_dropbehind(folio)) + wb_stat_mod(wb, WB_DONTCACHE_DIRTY, nr); __zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr); __node_stat_mod_folio(folio, NR_DIRTIED, nr); wb_stat_mod(wb, WB_RECLAIMABLE, nr); @@ -2630,6 +2632,8 @@ void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb) long nr = folio_nr_pages(folio); lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); + if (folio_test_dropbehind(folio)) + wb_stat_mod(wb, WB_DONTCACHE_DIRTY, -nr); zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); wb_stat_mod(wb, WB_RECLAIMABLE, -nr); task_io_account_cancelled_write(nr * PAGE_SIZE); @@ -2899,6 +2903,8 @@ bool folio_clear_dirty_for_io(struct folio *folio) if (folio_test_clear_dirty(folio)) { long nr = folio_nr_pages(folio); lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); + if (folio_test_dropbehind(folio)) + wb_stat_mod(wb, WB_DONTCACHE_DIRTY, -nr); zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); wb_stat_mod(wb, WB_RECLAIMABLE, -nr); ret = true;