Erases for block devices were always just emulated by writing 0xff.
Some time back the write was removed and only the page cache was
changed to 0xff.  Superficialy a good idea with two problems:
1. Touching the page cache isn't necessary either.
2. However, writing out 0xff _is_ necessary for the journal.  As the
   journal is scanned linearly, an old non-overwritten commit entry
   can be used on next mount and cause havoc.

This should fix both aspects.
This commit is contained in:
Joern Engel 2010-03-04 21:30:58 +01:00
Родитель 5c564c2a04
Коммит 9421502b4f
6 изменённых файлов: 97 добавлений и 20 удалений

Просмотреть файл

@ -167,27 +167,91 @@ static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
generic_unplug_device(bdev_get_queue(logfs_super(sb)->s_bdev));
}
static int bdev_erase(struct super_block *sb, loff_t to, size_t len)
static void erase_end_io(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct super_block *sb = bio->bi_private;
struct logfs_super *super = logfs_super(sb);
BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */
BUG_ON(err);
BUG_ON(bio->bi_vcnt == 0);
bio_put(bio);
if (atomic_dec_and_test(&super->s_pending_writes))
wake_up(&wq);
}
static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
size_t nr_pages)
{
struct logfs_super *super = logfs_super(sb);
struct bio *bio;
struct request_queue *q = bdev_get_queue(sb->s_bdev);
unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
int i;
bio = bio_alloc(GFP_NOFS, max_pages);
BUG_ON(!bio); /* FIXME: handle this */
for (i = 0; i < nr_pages; i++) {
if (i >= max_pages) {
/* Block layer cannot split bios :( */
bio->bi_vcnt = i;
bio->bi_idx = 0;
bio->bi_size = i * PAGE_SIZE;
bio->bi_bdev = super->s_bdev;
bio->bi_sector = ofs >> 9;
bio->bi_private = sb;
bio->bi_end_io = erase_end_io;
atomic_inc(&super->s_pending_writes);
submit_bio(WRITE, bio);
ofs += i * PAGE_SIZE;
index += i;
nr_pages -= i;
i = 0;
bio = bio_alloc(GFP_NOFS, max_pages);
BUG_ON(!bio);
}
bio->bi_io_vec[i].bv_page = super->s_erase_page;
bio->bi_io_vec[i].bv_len = PAGE_SIZE;
bio->bi_io_vec[i].bv_offset = 0;
}
bio->bi_vcnt = nr_pages;
bio->bi_idx = 0;
bio->bi_size = nr_pages * PAGE_SIZE;
bio->bi_bdev = super->s_bdev;
bio->bi_sector = ofs >> 9;
bio->bi_private = sb;
bio->bi_end_io = erase_end_io;
atomic_inc(&super->s_pending_writes);
submit_bio(WRITE, bio);
return 0;
}
static int bdev_erase(struct super_block *sb, loff_t to, size_t len,
int ensure_write)
{
struct logfs_super *super = logfs_super(sb);
struct address_space *mapping = super->s_mapping_inode->i_mapping;
struct page *page;
pgoff_t index = to >> PAGE_SHIFT;
int i, nr_pages = len >> PAGE_SHIFT;
BUG_ON(to & (PAGE_SIZE - 1));
BUG_ON(len & (PAGE_SIZE - 1));
if (logfs_super(sb)->s_flags & LOGFS_SB_FLAG_RO)
if (super->s_flags & LOGFS_SB_FLAG_RO)
return -EROFS;
for (i = 0; i < nr_pages; i++) {
page = find_get_page(mapping, index + i);
if (page) {
memset(page_address(page), 0xFF, PAGE_SIZE);
page_cache_release(page);
}
if (ensure_write) {
/*
* Object store doesn't care whether erases happen or not.
* But for the journal they are required. Otherwise a scan
* can find an old commit entry and assume it is the current
* one, travelling back in time.
*/
do_erase(sb, to, to >> PAGE_SHIFT, len >> PAGE_SHIFT);
}
return 0;
}

Просмотреть файл

@ -83,7 +83,8 @@ static int mtd_erase_mapping(struct super_block *sb, loff_t ofs, size_t len)
return 0;
}
static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len)
static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len,
int ensure_write)
{
struct mtd_info *mtd = logfs_super(sb)->s_mtd;
struct erase_info ei;

Просмотреть файл

@ -392,7 +392,7 @@ static int journal_erase_segment(struct logfs_area *area)
u64 ofs;
int err;
err = logfs_erase_segment(sb, area->a_segno);
err = logfs_erase_segment(sb, area->a_segno, 1);
if (err)
return err;

Просмотреть файл

@ -151,7 +151,8 @@ struct logfs_device_ops {
int (*write_sb)(struct super_block *sb, struct page *page);
int (*readpage)(void *_sb, struct page *page);
void (*writeseg)(struct super_block *sb, u64 ofs, size_t len);
int (*erase)(struct super_block *sb, loff_t ofs, size_t len);
int (*erase)(struct super_block *sb, loff_t ofs, size_t len,
int ensure_write);
void (*sync)(struct super_block *sb);
void (*put_device)(struct super_block *sb);
};
@ -327,6 +328,7 @@ struct logfs_super {
u64 s_feature_compat;
u64 s_feature_flags;
u64 s_sb_ofs[2];
struct page *s_erase_page; /* for dev_bdev.c */
/* alias.c fields */
struct btree_head32 s_segment_alias; /* remapped segments */
int s_no_object_aliases;
@ -572,7 +574,7 @@ int get_page_reserve(struct inode *inode, struct page *page);
extern struct logfs_block_ops indirect_block_ops;
/* segment.c */
int logfs_erase_segment(struct super_block *sb, u32 ofs);
int logfs_erase_segment(struct super_block *sb, u32 ofs, int ensure_erase);
int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf);
int logfs_segment_read(struct inode *inode, struct page *page, u64 ofs, u64 bix,
level_t level);

Просмотреть файл

@ -25,14 +25,14 @@ static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
return 0;
}
int logfs_erase_segment(struct super_block *sb, u32 segno)
int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase)
{
struct logfs_super *super = logfs_super(sb);
super->s_gec++;
return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
super->s_segsize);
super->s_segsize, ensure_erase);
}
static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
@ -798,7 +798,7 @@ static int ostore_erase_segment(struct logfs_area *area)
u64 ofs;
int err;
err = logfs_erase_segment(sb, area->a_segno);
err = logfs_erase_segment(sb, area->a_segno, 0);
if (err)
return err;

Просмотреть файл

@ -317,6 +317,7 @@ static int logfs_make_writeable(struct super_block *sb)
static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
{
struct logfs_super *super = logfs_super(sb);
struct inode *rootdir;
int err;
@ -329,15 +330,22 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
if (!sb->s_root)
goto fail;
super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
if (!super->s_erase_page)
goto fail2;
memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
/* FIXME: check for read-only mounts */
err = logfs_make_writeable(sb);
if (err)
goto fail2;
goto fail3;
log_super("LogFS: Finished mounting\n");
simple_set_mnt(mnt, sb);
return 0;
fail3:
__free_page(super->s_erase_page);
fail2:
iput(rootdir);
fail:
@ -498,6 +506,8 @@ static void logfs_kill_sb(struct super_block *sb)
logfs_cleanup_journal(sb);
logfs_cleanup_areas(sb);
logfs_cleanup_rw(sb);
if (super->s_erase_page)
__free_page(super->s_erase_page);
super->s_devops->put_device(sb);
mempool_destroy(super->s_btree_pool);
mempool_destroy(super->s_alias_pool);