summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaksym Planeta <mcsim.planeta@gmail.com>2012-10-07 15:46:05 +0300
committerMaksym Planeta <mcsim.planeta@gmail.com>2012-10-07 21:30:47 +0300
commitb01792edffe744d1f3fb9efef8376ba904b95346 (patch)
tree30c7c9cd8d538bf3beadcfb6bd5e10af63d88dcb
parentaa984dcaa231fa865281b869009b6269fc74746b (diff)
Make ext2fs work with partitions bigger than 2Gb.
This commit is port of large store patch from debian repository. Additionally this patch was changed to use new libpager interface. The main idea behind this patch is to use special disk cache up to 2Gb size instead of mapping the whole partition. This cache is used to map only those pages of partitions that are used at the moment or were used recently. This allows to map only up to 2Gb of memory, although partition could be much bigger. This commit does not bring support for large files, only for partitions! * ext2fs/balloc.c (ext2_free_blocks): Function changed to access disk blocks in a new way. (ext2_new_block): Likewise. (ext2_count_free_blocks): Likewise. (ext2_check_blocks_bitmap): Likewise. * ext2fs/ext2fs.c (main): As now not the whole image is being mapped, but there is disk cache is being used, appropriately were changed some names of functions and variables. (diskfs_reload_global_state): Likewise. * ext2fs/ext2fs.h (DISK_CACHE_BLOCKS): New macro. Sets size of disk cache. (DC_INCORE, DC_UNTOUCHED, DC_FIXED, DC_DONT_REUSE, DC_NO_BLOCK): New macros, used for keeping status of block in disk cache. (DISK_CACHE_LAST_READ_XOR) [NDEBUG]: New macro for debugging purposes. (struct disk_cache_info): Structure that keeps information about block in disk cache. (bptr_index): New macro for converting address in memory to index of block in disk cache. (boffs_ptr): Macro has been converted to function to take into account presence of disk cache. (bptr_offs): Likewise. (dino -> dino_ref): Function has been renamed and changed to take into account presence of disk cache. (dino_deref): New function that releases dinode. (record_global_poke): Function changed to access disk blocks in a new way. (sync_global_ptr): Likewise. (record_indir_poke): Likewise. (sync_global): Add debugging macro. * ext2fs/getblk.c (ext2_alloc_block): Function changed to access disk block in a new way. (block_getblk): Likewise. * ext2fs/hyper.c (get_hypermetadata): Function has been changed because, to read sblock on its own. (map_hypermetadata): New function that sets sblock and group_desc_image as pointers in disk cache. (diskfs_set_hypermetadata): Increase reference count in disk cache for sblock, when it is poked. (diskfs_readonly_changed): Update function to use disk cache. * ext2fs/ialloc.c (diskfs_free_node): Function changed to access disk blocks in a new way. (ext2_alloc_inode): Likewise. (ext2_count_free_inodes): Likewise. (ext2_check_inodes_bitmap): Likewise. * ext2fs/inode.c (diskfs_cached_lookup): Use disk cache instead of disk image. (read_node): Function changed to access disk blocks using interface. Also debugging macro has been added. (write_node): Likewise. (diskfs_set_translator): Function changed to access disk blocks using interface. (diskfs_set_translator): Likewise. * ext2fs/pager.c (disk_image): Obsolete variable removed. (STAT_ADD): New macro. (FREE_PAGE_BUFS): Remove obsolete macro. (get_page_buf -> get_buf): Function renamed and changed to return buffer of asked size. (free_page_buf -> free_buf): Function renamed and changed. (file_pager_read_page -> file_pager_read): Function renamed and changed to process several pages at once. (file_pager_write_page -> file_pager_write): Likewise. (disk_pager_read_page -> disk_pager_read): Likewise. (disk_pager_write_page -> disk_pager_write): Likewise. (pending_blocks_write): Function has been changed. (ext2_pager_notify_evict): New function. (pager_read_page -> ext2_read_pages): Function has been renamed and changed to support new libpager interface. (pager_write_page -> ext2_write_pages): Likewise. (pager_unlock_page -> ext2_unlock_pages): Likewise. (pager_report_extent -> ext2_report_extent): Likewise. (pager_clear_user_data -> ext2_clear_user_data): Likewise. (pager_dropweak): Remove obsolete function. (ext2_ops): New variable. (disk_cache): Likewise. (disk_cache_size): Likewise. (disk_cache_blocks): Likewise. (disk_cache_bptr): Likewise. (disk_cache_info): Likewise. (disk_cache_hint): Likewise. (disk_cache_lock): Likewise. (disk_cache_reassociation): Likewise. (disk_cache_init): New function. (disk_cache_return_unused): Likewise. (disk_cache_block_ref_no_block): Likewise. (disk_cache_block_is_cached): Likewise. (disk_cache_block_ref): Likewise. (disk_cache_block_ref_hint_no_block): Likewise. (disk_cache_block_ref_ptr): Likewise. (disk_cache_block_deref): Likewise. (disk_cache_block_is_ref): Likewise. (create_disk_pager): Function has been changed to support both new interfaces of libpager and disk cache. (diskfs_get_filemap): Function has been changed to support new interface of libpager. * ext2fs/pokel.c (pokel_add): Function changed to take into account new way of working with disk blocks. (__pokel_exec): Likewise. * ext2fs/truncate.c (trunc_indirect): Function changed to take into account both new way of working with disk blocks and changes in libpager. (force_delayed_copies): Function changed to support new libpager interface.
-rw-r--r--ext2fs/balloc.c20
-rw-r--r--ext2fs/ext2fs.c5
-rw-r--r--ext2fs/ext2fs.h141
-rw-r--r--ext2fs/getblk.c19
-rw-r--r--ext2fs/hyper.c32
-rw-r--r--ext2fs/ialloc.c22
-rw-r--r--ext2fs/inode.c49
-rw-r--r--ext2fs/pager.c1100
-rw-r--r--ext2fs/pokel.c40
-rw-r--r--ext2fs/truncate.c17
10 files changed, 1149 insertions, 296 deletions
diff --git a/ext2fs/balloc.c b/ext2fs/balloc.c
index b0fc5f2b..f81dfc04 100644
--- a/ext2fs/balloc.c
+++ b/ext2fs/balloc.c
@@ -92,7 +92,7 @@ ext2_free_blocks (block_t block, unsigned long count)
block, count);
}
gdp = group_desc (block_group);
- bh = bptr (gdp->bg_block_bitmap);
+ bh = disk_cache_block_ref (gdp->bg_block_bitmap);
if (in_range (gdp->bg_block_bitmap, block, gcount) ||
in_range (gdp->bg_inode_bitmap, block, gcount) ||
@@ -114,6 +114,7 @@ ext2_free_blocks (block_t block, unsigned long count)
}
record_global_poke (bh);
+ disk_cache_block_ref_ptr (gdp);
record_global_poke (gdp);
block += gcount;
@@ -180,7 +181,7 @@ repeat:
if (j)
goal_attempts++;
#endif
- bh = bptr (gdp->bg_block_bitmap);
+ bh = disk_cache_block_ref (gdp->bg_block_bitmap);
ext2_debug ("goal is at %d:%d", i, j);
@@ -246,6 +247,8 @@ repeat:
j = k;
goto got_block;
}
+
+ disk_cache_block_deref (bh);
bh = 0;
}
@@ -269,7 +272,8 @@ repeat:
spin_unlock (&global_lock);
return 0;
}
- bh = bptr (gdp->bg_block_bitmap);
+
+ bh = disk_cache_block_ref (gdp->bg_block_bitmap);
r = memscan (bh, 0, sblock->s_blocks_per_group >> 3);
j = (r - bh) << 3;
if (j < sblock->s_blocks_per_group)
@@ -279,6 +283,7 @@ repeat:
sblock->s_blocks_per_group);
if (j >= sblock->s_blocks_per_group)
{
+ disk_cache_block_deref (bh);
bh = 0;
ext2_error ("free blocks count corrupted for block group %d", i);
spin_unlock (&global_lock);
@@ -309,6 +314,7 @@ got_block:
if (set_bit (j, bh))
{
ext2_warning ("bit already set for block %d", j);
+ disk_cache_block_deref (bh);
bh = 0;
goto repeat;
}
@@ -399,9 +405,12 @@ ext2_count_free_blocks ()
gdp = NULL;
for (i = 0; i < groups_count; i++)
{
+ void *bh;
gdp = group_desc (i);
desc_count += gdp->bg_free_blocks_count;
- x = count_free (bptr (gdp->bg_block_bitmap), block_size);
+ bh = disk_cache_block_ref (gdp->bg_block_bitmap);
+ x = count_free (bh, block_size);
+ disk_cache_block_deref (bh);
printf ("group %d: stored = %d, counted = %lu",
i, gdp->bg_free_blocks_count, x);
bitmap_count += x;
@@ -462,7 +471,7 @@ ext2_check_blocks_bitmap ()
gdp = group_desc (i);
desc_count += gdp->bg_free_blocks_count;
- bh = bptr (gdp->bg_block_bitmap);
+ bh = disk_cache_block_ref (gdp->bg_block_bitmap);
if (!EXT2_HAS_RO_COMPAT_FEATURE (sblock,
EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)
@@ -488,6 +497,7 @@ ext2_check_blocks_bitmap ()
ext2_error ("block #%d of the inode table in group %d is marked free", j, i);
x = count_free (bh, block_size);
+ disk_cache_block_deref (bh);
if (gdp->bg_free_blocks_count != x)
ext2_error ("wrong free blocks count for group %d,"
" stored = %d, counted = %lu",
diff --git a/ext2fs/ext2fs.c b/ext2fs/ext2fs.c
index afee2fbb..9ec44455 100644
--- a/ext2fs/ext2fs.c
+++ b/ext2fs/ext2fs.c
@@ -181,9 +181,9 @@ main (int argc, char **argv)
/* Map the entire disk. */
create_disk_pager ();
- pokel_init (&global_pokel, diskfs_disk_pager, disk_image);
+ pokel_init (&global_pokel, diskfs_disk_pager, disk_cache);
- get_hypermetadata();
+ map_hypermetadata ();
inode_init ();
@@ -213,5 +213,6 @@ diskfs_reload_global_state ()
pager_flush (diskfs_disk_pager, 1);
sblock = 0;
get_hypermetadata ();
+ map_hypermetadata ();
return 0;
}
diff --git a/ext2fs/ext2fs.h b/ext2fs/ext2fs.h
index 579ef746..39f80688 100644
--- a/ext2fs/ext2fs.h
+++ b/ext2fs/ext2fs.h
@@ -23,7 +23,9 @@
#include <hurd/pager.h>
#include <hurd/fshelp.h>
#include <hurd/iohelp.h>
+#include <hurd/store.h>
#include <hurd/diskfs.h>
+#include <hurd/ihash.h>
#include <assert.h>
#include <rwlock.h>
#include <sys/mman.h>
@@ -195,6 +197,8 @@ struct user_pager_info
/* ---------------------------------------------------------------- */
/* pager.c */
+#define DISK_CACHE_BLOCKS 65536
+
#include <hurd/diskfs-pager.h>
/* Set up the disk pager. */
@@ -218,10 +222,57 @@ extern struct store *store;
/* What the user specified. */
extern struct store_parsed *store_parsed;
-/* Mapped image of the disk. */
-extern void *disk_image;
+/* Mapped image of cached blocks of the disk. */
+extern void *disk_cache;
+extern store_offset_t disk_cache_size;
+extern int disk_cache_blocks;
+
+#define DC_INCORE 0x01 /* Not in core. */
+#define DC_UNTOUCHED 0x02 /* Not touched by disk_pager_read_paged
+ or disk_cache_block_ref. */
+#define DC_FIXED 0x04 /* Must not be re-associated. */
+
+/* Flags that forbid re-association of page. DC_UNTOUCHED is included
+ because this flag is used only when page is already to be
+ re-associated, so it's not good candidate for another
+ remapping. */
+#define DC_DONT_REUSE (DC_INCORE | DC_UNTOUCHED | DC_FIXED)
+
+#define DC_NO_BLOCK ((block_t) -1L)
+
+#ifndef NDEBUG
+#define DISK_CACHE_LAST_READ_XOR 0xDEADBEEF
+#endif
-/* Our in-core copy of the super-block (pointer into the disk_image). */
+/* Disk cache blocks' meta info. */
+struct disk_cache_info
+{
+ block_t block;
+ uint16_t flags;
+ uint16_t ref_count;
+#ifndef NDEBUG
+ block_t last_read, last_read_xor;
+#endif
+};
+
+/* block num --> pointer to in-memory block */
+extern hurd_ihash_t disk_cache_bptr;
+/* Metadata about cached block. */
+extern struct disk_cache_info *disk_cache_info;
+/* Lock for these mappings */
+extern struct mutex disk_cache_lock;
+/* Fired when a re-association is done. */
+extern struct condition disk_cache_reassociation;
+
+void *disk_cache_block_ref (block_t block);
+void *disk_cache_block_ref_no_block (block_t block);
+void *disk_cache_block_ref_hint_no_block (block_t block, int hint);
+void disk_cache_block_ref_ptr (void *ptr);
+int disk_cache_block_is_cached (block_t block);
+void disk_cache_block_deref (void *ptr);
+int disk_cache_block_is_ref (block_t block);
+
+/* Our in-core copy of the super-block (pointer into the disk_cache). */
struct ext2_super_block *sblock;
/* True if sblock has been modified. */
int sblock_dirty;
@@ -251,6 +302,9 @@ vm_address_t zeroblock;
/* Get the superblock from the disk, & setup various global info from it. */
void get_hypermetadata ();
+
+/* Map `sblock' and `group_desc_image' pointers to disk cache. */
+void map_hypermetadata ();
/* ---------------------------------------------------------------- */
/* Random stuff calculated from the super block. */
@@ -274,7 +328,7 @@ spin_lock_t generation_lock;
unsigned long next_generation;
/* ---------------------------------------------------------------- */
-/* Functions for looking inside disk_image */
+/* Functions for looking inside disk_cache */
#define trunc_block(offs) \
((off_t) ((offs) >> log2_block_size) << log2_block_size)
@@ -286,10 +340,39 @@ unsigned long next_generation;
/* byte offset on disk --> block num */
#define boffs_block(offs) ((offs) >> log2_block_size)
+/* pointer to in-memory block -> index in disk_cache_info */
+#define bptr_index(ptr) (((char *)ptr - (char *)disk_cache) >> log2_block_size)
+
/* byte offset on disk --> pointer to in-memory block */
-#define boffs_ptr(offs) (((char *)disk_image) + (offs))
+EXT2FS_EI char *
+boffs_ptr (off_t offset)
+{
+ block_t block = boffs_block (offset);
+ mutex_lock (&disk_cache_lock);
+ char *ptr = hurd_ihash_find (disk_cache_bptr, block);
+ mutex_unlock (&disk_cache_lock);
+ assert (ptr);
+ ptr += offset % block_size;
+ ext2_debug ("(%Ld) = %p", offset, ptr);
+ return ptr;
+}
+
/* pointer to in-memory block --> byte offset on disk */
-#define bptr_offs(ptr) ((char *)(ptr) - ((char *)disk_image))
+EXT2FS_EI off_t
+bptr_offs (void *ptr)
+{
+ vm_offset_t mem_offset = (char *)ptr - (char *)disk_cache;
+ off_t offset;
+ assert (mem_offset < disk_cache_size);
+ mutex_lock (&disk_cache_lock);
+ offset = (off_t) disk_cache_info[boffs_block (mem_offset)].block
+ << log2_block_size;
+ assert (offset || mem_offset < block_size);
+ offset += mem_offset % block_size;
+ mutex_unlock (&disk_cache_lock);
+ ext2_debug ("(%p) = %Ld", ptr, offset);
+ return offset;
+}
/* block num --> pointer to in-memory block */
#define bptr(block) boffs_ptr(boffs(block))
@@ -309,14 +392,24 @@ extern struct ext2_inode *dino (ino_t inum);
#if defined(__USE_EXTERN_INLINES) || defined(EXT2FS_DEFINE_EI)
/* Convert an inode number to the dinode on disk. */
EXT2FS_EI struct ext2_inode *
-dino (ino_t inum)
+dino_ref (ino_t inum)
{
unsigned long inodes_per_group = sblock->s_inodes_per_group;
unsigned long bg_num = (inum - 1) / inodes_per_group;
unsigned long group_inum = (inum - 1) % inodes_per_group;
- struct ext2_group_desc *bg = group_desc(bg_num);
+ struct ext2_group_desc *bg = group_desc (bg_num);
block_t block = bg->bg_inode_table + (group_inum / inodes_per_block);
- return ((struct ext2_inode *)bptr(block)) + group_inum % inodes_per_block;
+ struct ext2_inode *inode = disk_cache_block_ref (block);
+ inode += group_inum % inodes_per_block;
+ ext2_debug ("(%qd) = %p", inum, inode);
+ return inode;
+}
+
+EXT2FS_EI void
+dino_deref (struct ext2_inode *inode)
+{
+ ext2_debug ("(%p)", inode);
+ disk_cache_block_deref (inode);
}
#endif /* Use extern inlines. */
@@ -378,27 +471,38 @@ global_block_modified (block_t block)
EXT2FS_EI void
record_global_poke (void *ptr)
{
- int boffs = trunc_block (bptr_offs (ptr));
- global_block_modified (boffs_block (boffs));
- pokel_add (&global_pokel, boffs_ptr(boffs), block_size);
+ block_t block = boffs_block (bptr_offs (ptr));
+ void *block_ptr = bptr (block);
+ ext2_debug ("(%p = %p)", ptr, block_ptr);
+ assert (disk_cache_block_is_ref (block));
+ global_block_modified (block);
+ pokel_add (&global_pokel, block_ptr, block_size);
}
/* This syncs a modification to a non-file block. */
EXT2FS_EI void
sync_global_ptr (void *bptr, int wait)
{
- vm_offset_t boffs = trunc_block (bptr_offs (bptr));
- global_block_modified (boffs_block (boffs));
- pager_sync_some (diskfs_disk_pager, trunc_page (boffs), vm_page_size, wait);
+ block_t block = bptr_block (bptr);
+ void *block_ptr = bptr (block);
+ size_t pages = block_size < vm_page_size ? 1 : block_size / vm_page_size;
+ ext2_debug ("(%p -> %u)", bptr, (block_t)block);
+ global_block_modified (block);
+ disk_cache_block_deref (block_ptr);
+ pager_sync_some (diskfs_disk_pager,
+ (block_ptr - disk_cache) / vm_page_size, pages, wait);
}
/* This records a modification to one of a file's indirect blocks. */
EXT2FS_EI void
record_indir_poke (struct node *node, void *ptr)
{
- int boffs = trunc_block (bptr_offs (ptr));
- global_block_modified (boffs_block (boffs));
- pokel_add (&node->dn->indir_pokel, boffs_ptr(boffs), block_size);
+ block_t block = boffs_block (bptr_offs (ptr));
+ void *block_ptr = bptr (block);
+ ext2_debug ("(%d, %p)", (int)node->cache_id, ptr);
+ assert (disk_cache_block_is_ref (block));
+ global_block_modified (block);
+ pokel_add (&node->dn->indir_pokel, block_ptr, block_size);
}
/* ---------------------------------------------------------------- */
@@ -406,6 +510,7 @@ record_indir_poke (struct node *node, void *ptr)
EXT2FS_EI void
sync_global (int wait)
{
+ ext2_debug ("%d", wait);
pokel_sync (&global_pokel, wait);
}
diff --git a/ext2fs/getblk.c b/ext2fs/getblk.c
index 86e94aa9..8dac694c 100644
--- a/ext2fs/getblk.c
+++ b/ext2fs/getblk.c
@@ -104,7 +104,7 @@ ext2_alloc_block (struct node *node, block_t goal, int zero)
if (result && zero)
{
- char *bh = bptr (result);
+ char *bh = disk_cache_block_ref (result);
memset (bh, 0, block_size);
record_indir_poke (node, bh);
}
@@ -180,14 +180,20 @@ block_getblk (struct node *node, block_t block, int nr, int create, int zero,
{
int i;
block_t goal = 0;
- block_t *bh = (block_t *)bptr (block);
+ block_t *bh = (block_t *)disk_cache_block_ref (block);
*result = bh[nr];
if (*result)
- return 0;
+ {
+ disk_cache_block_deref (bh);
+ return 0;
+ }
if (!create)
- return EINVAL;
+ {
+ disk_cache_block_deref (bh);
+ return EINVAL;
+ }
if (node->dn->info.i_next_alloc_block == new_block)
goal = node->dn->info.i_next_alloc_goal;
@@ -207,7 +213,10 @@ block_getblk (struct node *node, block_t block, int nr, int create, int zero,
*result = ext2_alloc_block (node, goal, zero);
if (!*result)
- return ENOSPC;
+ {
+ disk_cache_block_deref (bh);
+ return ENOSPC;
+ }
bh[nr] = *result;
diff --git a/ext2fs/hyper.c b/ext2fs/hyper.c
index bee4175f..3af1947c 100644
--- a/ext2fs/hyper.c
+++ b/ext2fs/hyper.c
@@ -58,11 +58,14 @@ static int ext2fs_clean; /* fs clean before we started writing? */
void
get_hypermetadata (void)
{
- error_t err = diskfs_catch_exception ();
- if (err)
- ext2_panic ("can't read superblock: %s", strerror (err));
+ error_t err;
+ size_t read = 0;
- sblock = (struct ext2_super_block *) boffs_ptr (SBLOCK_OFFS);
+ assert (! sblock);
+ err = store_read (store, SBLOCK_OFFS >> store->log2_block_size,
+ SBLOCK_SIZE, (void **)&sblock, &read);
+ if (err || read != SBLOCK_SIZE)
+ ext2_panic ("Cannot read hypermetadata");
if (sblock->s_magic != EXT2_SUPER_MAGIC
#ifdef EXT2FS_PRE_02B_COMPAT
@@ -152,15 +155,22 @@ get_hypermetadata (void)
allocate_mod_map ();
- diskfs_end_catch_exception ();
+ /* A handy source of page-aligned zeros. */
+ if (zeroblock == 0)
+ zeroblock = (vm_address_t) mmap (0, block_size, PROT_READ, MAP_ANON, 0, 0);
+
+ munmap (sblock, SBLOCK_SIZE);
+ sblock = NULL;
+}
+
+void
+map_hypermetadata (void)
+{
+ sblock = (struct ext2_super_block *) boffs_ptr (SBLOCK_OFFS);
/* Cache a convenient pointer to the block group descriptors for allocation.
These are stored in the filesystem blocks following the superblock. */
group_desc_image = (struct ext2_group_desc *) bptr (bptr_block (sblock) + 1);
-
- /* A handy source of page-aligned zeros. */
- if (zeroblock == 0)
- zeroblock = (vm_address_t) mmap (0, block_size, PROT_READ, MAP_ANON, 0, 0);
}
error_t
@@ -183,6 +193,7 @@ diskfs_set_hypermetadata (int wait, int clean)
if (sblock_dirty)
{
sblock_dirty = 0;
+ disk_cache_block_ref_ptr (sblock);
record_global_poke (sblock);
}
@@ -199,7 +210,8 @@ diskfs_readonly_changed (int readonly)
(*(readonly ? store_set_flags : store_clear_flags)) (store, STORE_READONLY);
- mprotect (disk_image, store->size, PROT_READ | (readonly ? 0 : PROT_WRITE));
+ mprotect (disk_cache, disk_cache_size,
+ PROT_READ | (readonly ? 0 : PROT_WRITE));
if (!readonly && !(sblock->s_state & EXT2_VALID_FS))
ext2_warning ("UNCLEANED FILESYSTEM NOW WRITABLE");
diff --git a/ext2fs/ialloc.c b/ext2fs/ialloc.c
index 2c61c723..eee28b82 100644
--- a/ext2fs/ialloc.c
+++ b/ext2fs/ialloc.c
@@ -75,22 +75,25 @@ diskfs_free_node (struct node *np, mode_t old_mode)
bit = (inum - 1) % sblock->s_inodes_per_group;
gdp = group_desc (block_group);
- bh = bptr (gdp->bg_inode_bitmap);
+ bh = disk_cache_block_ref (gdp->bg_inode_bitmap);
if (!clear_bit (bit, bh))
ext2_warning ("bit already cleared for inode %Ld", inum);
else
{
+ disk_cache_block_ref_ptr (bh);
record_global_poke (bh);
gdp->bg_free_inodes_count++;
if (S_ISDIR (old_mode))
gdp->bg_used_dirs_count--;
+ disk_cache_block_ref_ptr (gdp);
record_global_poke (gdp);
sblock->s_free_inodes_count++;
}
+ disk_cache_block_deref (bh);
sblock_dirty = 1;
spin_unlock (&global_lock);
alloc_sync(0);
@@ -214,7 +217,7 @@ repeat:
return 0;
}
- bh = bptr (gdp->bg_inode_bitmap);
+ bh = disk_cache_block_ref (gdp->bg_inode_bitmap);
if ((inum =
find_first_zero_bit ((unsigned long *) bh, sblock->s_inodes_per_group))
< sblock->s_inodes_per_group)
@@ -222,6 +225,7 @@ repeat:
if (set_bit (inum, bh))
{
ext2_warning ("bit already set for inode %d", inum);
+ disk_cache_block_deref (bh);
bh = 0;
goto repeat;
}
@@ -230,6 +234,7 @@ repeat:
}
else
{
+ disk_cache_block_deref (bh);
bh = 0;
if (gdp->bg_free_inodes_count != 0)
{
@@ -252,6 +257,7 @@ repeat:
gdp->bg_free_inodes_count--;
if (S_ISDIR (mode))
gdp->bg_used_dirs_count++;
+ disk_cache_block_ref_ptr (gdp);
record_global_poke (gdp);
sblock->s_free_inodes_count--;
@@ -358,10 +364,12 @@ ext2_count_free_inodes ()
gdp = NULL;
for (i = 0; i < groups_count; i++)
{
+ void *bh;
gdp = group_desc (i);
desc_count += gdp->bg_free_inodes_count;
- x = count_free (bptr (gdp->bg_inode_bitmap),
- sblock->s_inodes_per_group / 8);
+ bh = disk_cache_block_ref (gdp->bg_inode_bitmap);
+ x = count_free (bh, sblock->s_inodes_per_group / 8);
+ disk_cache_block_deref (bh);
ext2_debug ("group %d: stored = %d, counted = %lu",
i, gdp->bg_free_inodes_count, x);
bitmap_count += x;
@@ -391,10 +399,12 @@ ext2_check_inodes_bitmap ()
gdp = NULL;
for (i = 0; i < groups_count; i++)
{
+ void *bh;
gdp = group_desc (i);
desc_count += gdp->bg_free_inodes_count;
- x = count_free (bptr (gdp->bg_inode_bitmap),
- sblock->s_inodes_per_group / 8);
+ bh = disk_cache_block_ref (gdp->bg_inode_bitmap);
+ x = count_free (bh, sblock->s_inodes_per_group / 8);
+ disk_cache_block_deref (bh);
if (gdp->bg_free_inodes_count != x)
ext2_error ("wrong free inodes count in group %d, "
"stored = %d, counted = %lu",
diff --git a/ext2fs/inode.c b/ext2fs/inode.c
index bda964fc..56311e29 100644
--- a/ext2fs/inode.c
+++ b/ext2fs/inode.c
@@ -92,7 +92,7 @@ diskfs_cached_lookup (ino_t inum, struct node **npp)
dn->dir_idx = 0;
dn->pager = 0;
rwlock_init (&dn->alloc_lock);
- pokel_init (&dn->indir_pokel, diskfs_disk_pager, disk_image);
+ pokel_init (&dn->indir_pokel, diskfs_disk_pager, disk_cache);
/* Create the new node. */
np = diskfs_make_node (dn);
@@ -201,13 +201,17 @@ read_node (struct node *np)
error_t err;
struct stat *st = &np->dn_stat;
struct disknode *dn = np->dn;
- struct ext2_inode *di = dino (np->cache_id);
+ struct ext2_inode *di;
struct ext2_inode_info *info = &dn->info;
+ ext2_debug ("(%d)", np->cache_id);
+
err = diskfs_catch_exception ();
if (err)
return err;
+ di = dino_ref (np->cache_id);
+
st->st_fstype = FSTYPE_EXT2FS;
st->st_fsid = getpid (); /* This call is very cheap. */
st->st_ino = np->cache_id;
@@ -285,6 +289,7 @@ read_node (struct node *np)
info->i_high_size = di->i_size_high;
if (info->i_high_size) /* XXX */
{
+ dino_deref (di);
ext2_warning ("cannot handle large file inode %Ld", np->cache_id);
return EFBIG;
}
@@ -307,20 +312,12 @@ read_node (struct node *np)
}
dn->info_i_translator = di->i_translator;
+ dino_deref (di);
diskfs_end_catch_exception ();
if (S_ISREG (st->st_mode) || S_ISDIR (st->st_mode)
|| (S_ISLNK (st->st_mode) && st->st_blocks))
- {
- unsigned offset;
-
- np->allocsize = np->dn_stat.st_size;
-
- /* Round up to a block multiple. */
- offset = np->allocsize & ((1 << log2_block_size) - 1);
- if (offset > 0)
- np->allocsize += block_size - offset;
- }
+ np->allocsize = round_block (np->dn_stat.st_size);
else
/* Allocsize should be zero for anything except directories, files, and
long symlinks. These are the only things allowed to have any blocks
@@ -408,7 +405,9 @@ write_node (struct node *np)
{
error_t err;
struct stat *st = &np->dn_stat;
- struct ext2_inode *di = dino (np->cache_id);
+ struct ext2_inode *di;
+
+ ext2_debug ("(%d)", np->cache_id);
if (np->dn->info.i_prealloc_count)
ext2_discard_prealloc (np);
@@ -425,6 +424,8 @@ write_node (struct node *np)
if (err)
return NULL;
+ di = dino_ref (np->cache_id);
+
di->i_generation = st->st_gen;
/* We happen to know that the stat mode bits are the same
@@ -505,6 +506,7 @@ write_node (struct node *np)
diskfs_end_catch_exception ();
np->dn_stat_dirty = 0;
+ /* Leave invoking dino_deref (di) to the caller. */
return di;
}
else
@@ -664,7 +666,7 @@ diskfs_set_translator (struct node *np, const char *name, unsigned namelen,
if (err)
return err;
- di = dino (np->cache_id);
+ di = dino_ref (np->cache_id);
blkno = di->i_translator;
if (namelen && !blkno)
@@ -677,6 +679,7 @@ diskfs_set_translator (struct node *np, const char *name, unsigned namelen,
0, 0, 0);
if (blkno == 0)
{
+ dino_deref (di);
diskfs_end_catch_exception ();
return ENOSPC;
}
@@ -700,15 +703,20 @@ diskfs_set_translator (struct node *np, const char *name, unsigned namelen,
np->dn_stat.st_mode &= ~S_IPTRANS;
np->dn_set_ctime = 1;
}
+ else
+ dino_deref (di);
if (namelen)
{
+ void *blkptr;
+
buf[0] = namelen & 0xFF;
buf[1] = (namelen >> 8) & 0xFF;
memcpy (buf + 2, name, namelen);
- bcopy (buf, bptr (blkno), block_size);
- record_global_poke (bptr (blkno));
+ blkptr = disk_cache_block_ref (blkno);
+ memcpy (blkptr, buf, block_size);
+ record_global_poke (blkptr);
np->dn_stat.st_mode |= S_IPTRANS;
np->dn_set_ctime = 1;
@@ -726,7 +734,7 @@ diskfs_get_translator (struct node *np, char **namep, unsigned *namelen)
error_t err = 0;
daddr_t blkno;
unsigned datalen;
- const void *transloc;
+ void *transloc;
assert (sblock->s_creator_os == EXT2_OS_HURD);
@@ -734,9 +742,11 @@ diskfs_get_translator (struct node *np, char **namep, unsigned *namelen)
if (err)
return err;
- blkno = (dino (np->cache_id))->i_translator;
+ struct ext2_inode *di = dino_ref (np->cache_id);
+ blkno = di->i_translator;
+ dino_deref (di);
assert (blkno);
- transloc = bptr (blkno);
+ transloc = disk_cache_block_ref (blkno);
datalen =
((unsigned char *)transloc)[0] + (((unsigned char *)transloc)[1] << 8);
@@ -751,6 +761,7 @@ diskfs_get_translator (struct node *np, char **namep, unsigned *namelen)
memcpy (*namep, transloc + 2, datalen);
}
+ disk_cache_block_deref (transloc);
diskfs_end_catch_exception ();
*namelen = datalen;
diff --git a/ext2fs/pager.c b/ext2fs/pager.c
index 6540be04..469c886c 100644
--- a/ext2fs/pager.c
+++ b/ext2fs/pager.c
@@ -18,17 +18,18 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <hurd/store.h>
#include "ext2fs.h"
+/* XXX */
+#include "../libpager/priv.h"
+
/* A ports bucket to hold pager ports. */
struct port_bucket *pager_bucket;
-/* Mapped image of the disk. */
-void *disk_image;
-
spin_lock_t node_to_page_lock = SPIN_LOCK_INITIALIZER;
#ifdef DONT_CACHE_MEMORY_OBJECTS
@@ -65,56 +66,29 @@ do { spin_lock (&ext2s_pager_stats.lock); \
ext2s_pager_stats.field++; \
spin_unlock (&ext2s_pager_stats.lock); } while (0)
+#define STAT_ADD(field, value) \
+ do { spin_lock (&ext2s_pager_stats.lock); \
+ ext2s_pager_stats.field += value; \
+ spin_unlock (&ext2s_pager_stats.lock); } while (0)
+
#else /* !STATS */
#define STAT_INC(field) /* nop */0
#endif /* STATS */
-#define FREE_PAGE_BUFS 24
-/* Returns a single page page-aligned buffer. */
-static void *
-get_page_buf ()
+/* Returns a page-aligned buffer. */
+static void * get_buf (size_t size)
{
- static struct mutex free_page_bufs_lock = MUTEX_INITIALIZER;
- static void *free_page_bufs;
- static int num_free_page_bufs;
void *buf;
-
- mutex_lock (&free_page_bufs_lock);
- if (num_free_page_bufs > 0)
- {
- buf = free_page_bufs;
- num_free_page_bufs --;
- if (num_free_page_bufs > 0)
- free_page_bufs += vm_page_size;
-#ifndef NDEBUG
- else
- free_page_bufs = 0;
-#endif /* ! NDEBUG */
- }
- else
- {
- assert (free_page_bufs == 0);
- buf = mmap (0, vm_page_size * FREE_PAGE_BUFS,
- PROT_READ|PROT_WRITE, MAP_ANON, 0, 0);
- if (buf == MAP_FAILED)
- buf = 0;
- else
- {
- free_page_bufs = buf + vm_page_size;
- num_free_page_bufs = FREE_PAGE_BUFS - 1;
- }
- }
-
- mutex_unlock (&free_page_bufs_lock);
+ posix_memalign (&buf, vm_page_size, round_page (size));
return buf;
}
-/* Frees a block returned by get_page_buf. */
+/* Frees a block returned by get_buf. */
static inline void
-free_page_buf (void *buf)
+free_buf (void *buf)
{
- munmap (buf, vm_page_size);
+ free (buf);
}
/* Find the location on disk of page OFFSET in NODE. Return the disk block
@@ -148,20 +122,31 @@ find_block (struct node *node, vm_offset_t offset,
return err;
}
-/* Read one page for the pager backing NODE at offset PAGE, into BUF. This
- may need to read several filesystem blocks to satisfy one page, and tries
- to consolidate the i/o if possible. */
-static error_t
-file_pager_read_page (struct node *node, vm_offset_t page,
- void **buf, int *writelock)
+/* Read LENGTH bytes for the pager backing NODE at offset PAGE, into BUF.
+ This may need to read several filesystem blocks to satisfy range, and
+ tries to consolidate the i/o if possible. */
+static void
+file_pager_read (struct pager *pager, struct node *node,
+ off_t start, off_t npages)
{
+ void *buf;
error_t err;
int offs = 0;
int partial = 0; /* A page truncated by the EOF. */
+ size_t length = npages * vm_page_size;
+ vm_offset_t page;
+ int left = length;
struct rwlock *lock = NULL;
- int left = vm_page_size;
block_t pending_blocks = 0;
int num_pending_blocks = 0;
+ int writelock = 0, precious = 0, deallocate = 1;
+
+ assert (left > 0);
+
+ page = start * vm_page_size;
+
+ ext2_debug ("reading inode %Ld page %jd[%jd]",
+ node->cache_id, start, npages);
/* Read the NUM_PENDING_BLOCKS blocks in PENDING_BLOCKS, into the buffer
pointed to by BUF (allocating it if necessary) at offset OFFS. OFFS in
@@ -171,14 +156,15 @@ file_pager_read_page (struct node *node, vm_offset_t page,
{
if (num_pending_blocks > 0)
{
- block_t dev_block = pending_blocks << log2_dev_blocks_per_fs_block;
+ store_offset_t dev_block = (store_offset_t) pending_blocks
+ << log2_dev_blocks_per_fs_block;
size_t amount = num_pending_blocks << log2_block_size;
/* The buffer we try to read into; on the first read, we pass in a
size of zero, so that the read is guaranteed to allocate a new
buffer, otherwise, we try to read directly into the tail of the
buffer we've already got. */
- void *new_buf = *buf + offs;
- size_t new_len = offs == 0 ? 0 : vm_page_size - offs;
+ void *new_buf = buf + offs;
+ size_t new_len = length - offs;
STAT_INC (file_pagein_reads);
@@ -188,20 +174,13 @@ file_pager_read_page (struct node *node, vm_offset_t page,
else if (amount != new_len)
return EIO;
- if (new_buf != *buf + offs)
+ if (new_buf != buf + offs)
{
/* The read went into a different buffer than the one we
- passed. */
- if (offs == 0)
- /* First read, make the returned page be our buffer. */
- *buf = new_buf;
- else
- /* We've already got some buffer, so copy into it. */
- {
- bcopy (new_buf, *buf + offs, new_len);
- free_page_buf (new_buf); /* Return NEW_BUF to our pool. */
- STAT_INC (file_pagein_freed_bufs);
- }
+ passed. */
+ memcpy (buf + offs, new_buf, new_len);
+ munmap (new_buf, new_len);
+ STAT_ADD (file_pagein_freed_bufs, npages);
}
offs += new_len;
@@ -213,7 +192,10 @@ file_pager_read_page (struct node *node, vm_offset_t page,
STAT_INC (file_pageins);
- *writelock = 0;
+#define page_aligned(addr) (((size_t) addr & (vm_page_size - 1)) == 0)
+ assert (page_aligned (page) && page_aligned (left) &&
+ page_aligned (node->allocsize));
+#undef page_aligned
if (page >= node->allocsize)
{
@@ -222,10 +204,24 @@ file_pager_read_page (struct node *node, vm_offset_t page,
}
else if (page + left > node->allocsize)
{
+ size_t last_page = node->allocsize >> log2_block_size;
+ size_t tail = start + npages - last_page;
+
+ pager_data_read_error (pager, last_page, tail, KERN_NO_DATA);
+
left = node->allocsize - page;
+ length = left;
partial = 1;
}
+ buf = mmap (0, length, PROT_READ | PROT_WRITE, MAP_ANON, 0, 0);
+ if (!buf)
+ {
+ err = ENOMEM;
+ goto end;
+ }
+ STAT_ADD (file_pagein_alloced_bufs, npages);
+
while (left > 0)
{
block_t block;
@@ -245,16 +241,8 @@ file_pager_read_page (struct node *node, vm_offset_t page,
if (block == 0)
/* Reading unallocated block, just make a zero-filled one. */
{
- *writelock = 1;
- if (offs == 0)
- /* No page allocated to read into yet. */
- {
- *buf = get_page_buf ();
- if (! *buf)
- break;
- STAT_INC (file_pagein_alloced_bufs);
- }
- bzero (*buf + offs, block_size);
+ writelock = 1;
+ memset (buf + offs, 0, block_size);
offs += block_size;
}
else
@@ -264,16 +252,23 @@ file_pager_read_page (struct node *node, vm_offset_t page,
left -= block_size;
}
+ end:
if (!err && num_pending_blocks > 0)
err = do_pending_reads();
- if (!err && partial && !*writelock)
+ if (!err && partial && !writelock)
node->dn->last_page_partially_writable = 1;
if (lock)
rwlock_reader_unlock (lock);
- return err;
+ /* Note that amount of returned data could change, so update NPAGES */
+ npages = length >> log2_block_size;
+ if (err)
+ pager_data_read_error (pager, start, npages, err);
+ else
+ pager_data_supply (pager, precious, writelock, start, npages,
+ buf, deallocate);
}
struct pending_blocks
@@ -295,7 +290,8 @@ pending_blocks_write (struct pending_blocks *pb)
if (pb->num > 0)
{
error_t err;
- block_t dev_block = pb->block << log2_dev_blocks_per_fs_block;
+ store_offset_t dev_block = (store_offset_t) pb->block
+ << log2_dev_blocks_per_fs_block;
size_t length = pb->num << log2_block_size, amount;
ext2_debug ("writing block %u[%Ld]", pb->block, pb->num);
@@ -303,10 +299,10 @@ pending_blocks_write (struct pending_blocks *pb)
if (pb->offs > 0)
/* Put what we're going to write into a page-aligned buffer. */
{
- void *page_buf = get_page_buf ();
- bcopy (pb->buf + pb->offs, (void *)page_buf, length);
+ void *page_buf = get_buf (length);
+ memcpy ((void *)page_buf, pb->buf + pb->offs, length);
err = store_write (store, dev_block, page_buf, length, &amount);
- free_page_buf (page_buf);
+ free_buf (page_buf);
}
else
err = store_write (store, dev_block, pb->buf, length, &amount);
@@ -357,17 +353,21 @@ pending_blocks_add (struct pending_blocks *pb, block_t block)
return 0;
}
-/* Write one page for the pager backing NODE, at offset PAGE, into BUF. This
- may need to write several filesystem blocks to satisfy one page, and tries
- to consolidate the i/o if possible. */
-static error_t
-file_pager_write_page (struct node *node, vm_offset_t offset, void *buf)
+/* Write LENGTH bytes for the pager backing NODE, at offset PAGE, into BUF.
+ This may need to write several filesystem blocks to satisfy range, and
+ tries to consolidate the i/o if possible. */
+static void
+file_pager_write (struct pager * pager, struct node *node,
+ vm_offset_t start, size_t npages, void *buf)
{
error_t err = 0;
struct pending_blocks pb;
struct rwlock *lock = &node->dn->alloc_lock;
block_t block;
- int left = vm_page_size;
+ int left = npages * vm_page_size;
+ vm_offset_t offset = start * vm_page_size;
+
+ assert (left > 0);
pending_blocks_init (&pb, buf);
@@ -401,55 +401,140 @@ file_pager_write_page (struct node *node, vm_offset_t offset, void *buf)
rwlock_reader_unlock (&node->dn->alloc_lock);
- return err;
+ if (err)
+ pager_data_write_error (pager, start, npages, err);
}
-static error_t
-disk_pager_read_page (vm_offset_t page, void **buf, int *writelock)
+static void
+disk_pager_read (struct pager *pager, off_t start_page, size_t npages)
{
- error_t err;
- size_t length = vm_page_size, read = 0;
- vm_size_t dev_end = store->size;
+ size_t left = npages;
+ store_offset_t offset = start_page, dev_end = store->size;
+
+ assert (block_size == vm_page_size);
+
+ void supply_data (off_t start, size_t nblocks)
+ {
+ void *buf;
+ error_t err;
+ size_t read = 0, length = nblocks << log2_block_size;
+ const int writelock = 0, precious = 0, deallocate = 1;
+
+ off_t get_addr = (((store_offset_t) disk_cache_info[start].block << log2_block_size)
+ >> store->log2_block_size);
+ err = store_read (store, get_addr, length, &buf, &read);
+
+ if (!err && (read == length))
+ pager_data_supply (pager, precious, writelock, start, nblocks,
+ buf, deallocate);
+ else
+ pager_data_read_error (pager, start, nblocks, EIO);
+ }
+
+ if (offset + left > dev_end >> log2_block_size)
+ left = (dev_end >> log2_block_size) - offset;
+ if (!left)
+ return;
+
+ int i;
+ store_offset_t range = 0;
+ off_t base = start_page;
+ for (i = start_page; i < start_page + left; i++)
+ {
+ mutex_lock (&disk_cache_lock);
+ if (disk_cache_info[i].block == DC_NO_BLOCK)
+ {
+ block_t block = disk_cache_info[i - 1].block + 1;
+ if (disk_cache_block_is_cached (block))
+ {
+ mutex_unlock (&disk_cache_lock);
+ supply_data (base, range);
- if (page + vm_page_size > dev_end)
- length = dev_end - page;
+ /* Block already in cache. Do not return it again.*/
+ pager_data_read_error (pager, i, 1, KERN_NO_DATA);
- err = store_read (store, page >> store->log2_block_size, length, buf, &read);
- if (read != length)
- return EIO;
- if (!err && length != vm_page_size)
- bzero ((void *)(*buf + length), vm_page_size - length);
+ base += range;
+ range = 0;
+ continue;
+ }
+ else
+ {
+ void *addr;
+ addr = disk_cache_block_ref_hint_no_block (block, i);
+ assert (bptr_index (addr) == i);
+ }
+ }
- *writelock = 0;
+ assert (disk_cache_info[i].block != DC_NO_BLOCK);
- return err;
+ ext2_debug ("block %x log2 %x", disk_cache_info[i].block, log2_block_size);
+ disk_cache_info[i].flags |= DC_INCORE;
+ disk_cache_info[i].flags &=~ DC_UNTOUCHED;
+#ifndef NDEBUG
+ disk_cache_info[i].last_read = disk_cache_info[i].block;
+ disk_cache_info[i].last_read_xor
+ = disk_cache_info[i].block ^ DISK_CACHE_LAST_READ_XOR;
+#endif
+ ext2_debug ("(%Ld)[%Ld]", base, range);
+ mutex_unlock (&disk_cache_lock);
+
+ if (disk_cache_info[base].block + range !=
+ disk_cache_info[base + range].block)
+ {
+ supply_data (base, range);
+ base += range;
+ range = 0;
+ }
+
+ range++;
+ }
+
+ if (range)
+ supply_data (base, range);
}
-static error_t
-disk_pager_write_page (vm_offset_t page, void *buf)
+static void
+disk_pager_write (struct pager * pager, vm_offset_t start,
+ size_t npages, void *buf)
{
- error_t err = 0;
- size_t length = vm_page_size, amount;
- vm_size_t dev_end = store->size;
+ size_t dev_end = store->size;
+ size_t left = npages;
+
+ assert (block_size == vm_page_size);
- if (page + vm_page_size > dev_end)
- length = dev_end - page;
+ void write_data (off_t write, off_t notice, size_t nblocks, void *buf)
+ {
+ error_t err;
+ size_t amount, length = nblocks << log2_block_size;
- ext2_debug ("writing disk page %ld[%zd]", page, length);
+ err = store_write (store, write, buf, length, &amount);
+
+ if (err)
+ pager_data_write_error (pager, notice, npages, err);
+ if (length != amount)
+ pager_data_write_error (pager, notice, npages, EIO);
+ }
+
+ if (start + left > dev_end >> log2_block_size)
+ left = (dev_end >> log2_block_size) - start;
+
+ ext2_debug ("writing disk page %ld[%zd]", start, npages);
STAT_INC (disk_pageouts);
if (modified_global_blocks)
/* Be picky about which blocks in a page that we write. */
{
- vm_offset_t offs = page;
+ assert ("This code should be dead since only page sized blocks"
+ "are supported." && 0);
+#if 0
struct pending_blocks pb;
pending_blocks_init (&pb, buf);
while (length > 0 && !err)
{
- block_t block = boffs_block (offs);
+ block_t block = boffs_block (offset);
/* We don't clear the block modified bit here because this paging
write request may not be the same one that actually set the bit,
@@ -467,116 +552,182 @@ disk_pager_write_page (vm_offset_t page, void *buf)
/* Otherwise just skip it. */
err = pending_blocks_skip (&pb);
- offs += block_size;
+ offset += block_size;
length -= block_size;
}
if (!err)
err = pending_blocks_write (&pb);
+#endif
}
else
{
- err = store_write (store, page >> store->log2_block_size,
- buf, length, &amount);
- if (!err && length != amount)
- err = EIO;
- }
+ int i;
+ size_t range = 0;
+ store_offset_t offset = start * block_size;
+ store_offset_t start_send = start, start_info = start;
- return err;
+ for (i = start; i < start + left; i++)
+ {
+ store_offset_t new_offset;
+
+ mutex_lock (&disk_cache_lock);
+ assert (disk_cache_info[i].block != DC_NO_BLOCK);
+ new_offset = ((store_offset_t) disk_cache_info[i].block
+ << log2_block_size);
+#ifndef NDEBUG /* Not strictly needed. */
+ assert ((disk_cache_info[i].last_read ^ DISK_CACHE_LAST_READ_XOR)
+ == disk_cache_info[i].last_read_xor);
+ assert (disk_cache_info[i].last_read
+ == disk_cache_info[i].block);
+#endif
+ mutex_unlock (&disk_cache_lock);
+
+ if (range == 0)
+ start_send = new_offset >> store->log2_block_size;
+ else if (new_offset != offset + block_size)
+ {
+ write_data (start_send, start_info, range, buf);
+ buf += range * block_size;
+ start_send = new_offset >> store->log2_block_size;
+ start_info += range;
+ range = 0;
+ }
+
+ offset = new_offset;
+ range++;
+ }
+ write_data (start_send, start_info, range, buf);
+ }
}
+
/* Satisfy a pager read request for either the disk pager or file pager
- PAGER, to the page at offset PAGE into BUF. WRITELOCK should be set if
- the pager should make the page writeable. */
-error_t
-pager_read_page (struct user_pager_info *pager, vm_offset_t page,
- vm_address_t *buf, int *writelock)
+ PAGER, from the NPAGES pages pointed to be BUF starting at page START.
+ WRITELOCK should be set if the pager should make the page writeable. */
+void
+ext2_read_pages (struct pager *pager, struct user_pager_info *upi,
+ off_t start, off_t npages)
{
- if (pager->type == DISK)
- return disk_pager_read_page (page, (void **)buf, writelock);
+ if (upi->type == DISK)
+ disk_pager_read (pager, start, npages);
else
- return file_pager_read_page (pager->node, page, (void **)buf, writelock);
+ file_pager_read (pager, upi->node, start, npages);
}
/* Satisfy a pager write request for either the disk pager or file pager
- PAGER, from the page at offset PAGE from BUF. */
-error_t
-pager_write_page (struct user_pager_info *pager, vm_offset_t page,
- vm_address_t buf)
+ PAGER, from the NPAGES pages pointed to be BUF starting at page START.*/
+void
+ext2_write_pages (struct pager *pager, struct user_pager_info *upi,
+ off_t start, off_t npages, void *buf, int dealloc)
{
- if (pager->type == DISK)
- return disk_pager_write_page (page, (void *)buf);
+ if (upi->type == DISK)
+ disk_pager_write (pager, start, npages, buf);
else
- return file_pager_write_page (pager->node, page, (void *)buf);
+ file_pager_write (pager, upi->node, start, npages, buf);
+
+ if (dealloc)
+ vm_deallocate (mach_task_self (), (vm_address_t) buf,
+ npages * vm_page_size);
}
-
-/* Make page PAGE writable, at least up to ALLOCSIZE. This function and
- diskfs_grow are the only places that blocks are actually added to the
- file. */
-error_t
-pager_unlock_page (struct user_pager_info *pager, vm_offset_t page)
+
+void
+ext2_notify_evict (struct user_pager_info *pager, off_t page,
+ off_t npages)
{
- if (pager->type == DISK)
- return 0;
- else
- {
- error_t err;
- volatile int partial_page;
- struct node *node = pager->node;
- struct disknode *dn = node->dn;
+ unsigned index = page >> log2_block_size;
+ unsigned last_page = index + npages;
- rwlock_writer_lock (&dn->alloc_lock);
+ if (pager->type != DISK)
+ return;
- partial_page = (page + vm_page_size > node->allocsize);
+ ext2_debug ("(block %u %u)", index, last_page);
- err = diskfs_catch_exception ();
- if (!err)
- {
- block_t block = page >> log2_block_size;
- int left = (partial_page ? node->allocsize - page : vm_page_size);
+ mutex_lock (&disk_cache_lock);
+ for (; index < last_page; index ++)
+ disk_cache_info[index].flags &= ~DC_INCORE;
+ mutex_unlock (&disk_cache_lock);
+}
- while (left > 0)
- {
- block_t disk_block;
- err = ext2_getblk (node, block++, 1, &disk_block);
- if (err)
- break;
- left -= block_size;
- }
- }
- diskfs_end_catch_exception ();
-
- if (partial_page)
- /* If an error occurred, this page still isn't writable; otherwise,
- since it's at the end of the file, it's now partially writable. */
- dn->last_page_partially_writable = !err;
- else if (page + vm_page_size == node->allocsize)
- /* This makes the last page writable, which ends exactly at the end
- of the file. If any error occurred, the page still isn't
- writable, and if not, then the whole thing is writable. */
- dn->last_page_partially_writable = 0;
+
+ /* Make the NPAGES pages, starting at page START writable, at least
+ up to ALLOCSIZE. This function and diskfs_grow are the only places
+ that blocks are actually added to the file. */
+void
+ext2_unlock_pages (struct pager *pager,
+ struct user_pager_info *upi,
+ off_t start, off_t npages)
+{
+ if (upi->type == DISK)
+ return;
+ error_t err;
+ volatile int partial_page;
+ struct node *node = upi->node;
+ struct disknode *dn = node->dn;
+ block_t block = (start * vm_page_size) >> log2_block_size;
+ off_t end = (start + npages) * vm_page_size;
+
+ inline error_t process_code (off_t block_offset)
+ {
+ block_t disk_block;
+ return ext2_getblk (node, block++, 1, &disk_block);
+ }
+
+ inline void no_error_code (off_t range_start, off_t range_len)
+ {
+ pager_data_unlock (pager, range_start, range_len / vm_page_size);
#ifdef EXT2FS_DEBUG
- if (dn->last_page_partially_writable)
- ext2_debug ("made page %u[%lu] in inode %d partially writable",
- page, node->allocsize - page, node->cache_id);
- else
- ext2_debug ("made page %u[%u] in inode %d writable",
- page, vm_page_size, node->cache_id);
+ ext2_debug ("made pages %jd[%jd] in inode %llu writable",
+ range_start / vm_page_size, range_len / vm_page_size,
+ node->cache_id);
#endif
+ }
- STAT_INC (file_page_unlocks);
+ inline void error_code (error_t err, off_t range_start, off_t range_len)
+ {
+ off_t err_pages = round_page (range_len) / vm_page_size;
+ pager_data_unlock_error (pager, range_start, err_pages, err);
- rwlock_writer_unlock (&dn->alloc_lock);
+ if (err == ENOSPC)
+ ext2_warning ("This filesystem is out of space, and will now"
+ " crash. Bye!");
+ else if (err)
+ ext2_warning ("inode=%Ld, pages=0x%llx[%lld]: %s",
+ node->cache_id, range_start, range_len, strerror (err));
- if (err == ENOSPC)
- ext2_warning ("This filesystem is out of space, and will now crash. Bye!");
- else if (err)
- ext2_warning ("inode=%Ld, page=0x%zx: %s",
- node->cache_id, page, strerror (err));
+ block += (round_page (range_len) - range_len) / block_size;
+ }
- return err;
- }
+ rwlock_writer_lock (&dn->alloc_lock);
+
+ partial_page = (end > node->allocsize);
+
+ err = diskfs_catch_exception ();
+ if (!err)
+ pager_process_pages (start, npages, block_size,
+ process_code, no_error_code, error_code);
+ diskfs_end_catch_exception ();
+
+ if (partial_page)
+ /* If an error occurred, this page still isn't writable; otherwise,
+ since it's at the end of the file, it's now partially writable. */
+ dn->last_page_partially_writable = !err;
+ else if (end == node->allocsize)
+ /* This makes the last page writable, which ends exactly at the end
+ of the file. If any error occurred, the page still isn't
+ writable, and if not, then the whole thing is writable. */
+ dn->last_page_partially_writable = 0;
+
+#ifdef EXT2FS_DEBUG
+ if (dn->last_page_partially_writable)
+ ext2_debug ("made page %jd[%jd] in inode %llu partially writable",
+ end / vm_page_size, end % vm_page_size, node->cache_id);
+#endif
+
+ STAT_INC (file_page_unlocks);
+
+ rwlock_writer_unlock (&dn->alloc_lock);
}
/* Grow the disk allocated to locked node NODE to be at least SIZE bytes, and
@@ -720,26 +871,24 @@ flush_node_pager (struct node *node)
/* Return in *OFFSET and *SIZE the minimum valid address the pager will
accept and the size of the object. */
-inline error_t
-pager_report_extent (struct user_pager_info *pager,
- vm_address_t *offset, vm_size_t *size)
+void
+ext2_report_extent (struct user_pager_info *upi,
+ off_t *start, off_t *end)
{
- assert (pager->type == DISK || pager->type == FILE_DATA);
+ assert (upi->type == DISK || upi->type == FILE_DATA);
- *offset = 0;
+ *start = 0;
- if (pager->type == DISK)
- *size = store->size;
+ if (upi->type == DISK)
+ *end = round_page (store->size) / vm_page_size;
else
- *size = pager->node->allocsize;
-
- return 0;
+ *end = round_page (upi->node->allocsize) / vm_page_size;
}
/* This is called when a pager is being deallocated after all extant send
rights have been destroyed. */
void
-pager_clear_user_data (struct user_pager_info *upi)
+ext2_clear_user_data (struct user_pager_info *upi)
{
if (upi->type == FILE_DATA)
{
@@ -753,29 +902,540 @@ pager_clear_user_data (struct user_pager_info *upi)
diskfs_nrele_light (upi->node);
}
+}
- free (upi);
+struct pager_ops ext2_ops =
+ {
+ .read = &ext2_read_pages,
+ .write = &ext2_write_pages,
+ .unlock = &ext2_unlock_pages,
+ .report_extent = &ext2_report_extent,
+ .clear_user_data = &ext2_clear_user_data,
+ .notify_evict = &ext2_notify_evict,
+ .dropweak = NULL
+ };
+
+
+/* Cached blocks from disk. */
+void *disk_cache;
+
+/* DISK_CACHE size in bytes and blocks. */
+store_offset_t disk_cache_size;
+int disk_cache_blocks;
+
+/* block num --> pointer to in-memory block */
+hurd_ihash_t disk_cache_bptr;
+/* Cached blocks' info. */
+struct disk_cache_info *disk_cache_info;
+/* Hint index for which cache block to reuse next. */
+int disk_cache_hint;
+/* Lock for these structures. */
+struct mutex disk_cache_lock;
+/* Fired when a re-association is done. */
+struct condition disk_cache_reassociation;
+
+/* Finish mapping initialization. */
+static void
+disk_cache_init (void)
+{
+ if (block_size != vm_page_size)
+ ext2_panic ("Block size %d != vm_page_size %d",
+ block_size, vm_page_size);
+
+ mutex_init (&disk_cache_lock);
+ condition_init (&disk_cache_reassociation);
+
+ /* Allocate space for block num -> in-memory pointer mapping. */
+ if (hurd_ihash_create (&disk_cache_bptr, HURD_IHASH_NO_LOCP))
+ ext2_panic ("Can't allocate memory for disk_pager_bptr");
+
+ /* Allocate space for disk cache blocks' info. */
+ disk_cache_info = malloc ((sizeof *disk_cache_info) * disk_cache_blocks);
+ if (!disk_cache_info)
+ ext2_panic ("Cannot allocate space for disk cache info");
+
+ /* Initialize disk_cache_info. */
+ for (int i = 0; i < disk_cache_blocks; i++)
+ {
+ disk_cache_info[i].block = DC_NO_BLOCK;
+ disk_cache_info[i].flags = 0;
+ disk_cache_info[i].ref_count = 0;
+#ifndef NDEBUG
+ disk_cache_info[i].last_read = DC_NO_BLOCK;
+ disk_cache_info[i].last_read_xor
+ = DC_NO_BLOCK ^ DISK_CACHE_LAST_READ_XOR;
+#endif
+ }
+ disk_cache_hint = 0;
+
+ /* Map the superblock and the block group descriptors. */
+ block_t fixed_first = boffs_block (SBLOCK_OFFS);
+ block_t fixed_last = fixed_first
+ + (round_block ((sizeof *group_desc_image) * groups_count)
+ >> log2_block_size);
+ ext2_debug ("%d-%d\n", fixed_first, fixed_last);
+ assert (fixed_last - fixed_first + 1 <= (block_t)disk_cache_blocks + 3);
+ for (block_t i = fixed_first; i <= fixed_last; i++)
+ {
+ disk_cache_block_ref (i);
+ assert (disk_cache_info[i-fixed_first].block == i);
+ disk_cache_info[i-fixed_first].flags |= DC_FIXED;
+ }
+}
+
+static void
+disk_cache_return_unused (void)
+{
+ int index;
+
+ /* XXX: Touch all pages. It seems that sometimes GNU Mach "forgets"
+ to notify us about evicted pages. Disk cache must be
+ unlocked. */
+ for (vm_offset_t i = 0; i < disk_cache_size; i += vm_page_size)
+ *(volatile char *)(disk_cache + i);
+
+ /* Release some references to cached blocks. */
+ pokel_sync (&global_pokel, 1);
+
+ /* Return unused pages that are in core. */
+ int pending_begin = -1, pending_end = -1;
+ mutex_lock (&disk_cache_lock);
+ for (index = 0; index < disk_cache_blocks; index++)
+ if (! (disk_cache_info[index].flags & (DC_DONT_REUSE & ~DC_INCORE))
+ && ! disk_cache_info[index].ref_count)
+ {
+ ext2_debug ("return %u -> %d",
+ disk_cache_info[index].block, index);
+ if (index != pending_end)
+ {
+ /* Return previous region, if there is such, ... */
+ if (pending_end >= 0)
+ {
+ mutex_unlock (&disk_cache_lock);
+ pager_return_some (diskfs_disk_pager,
+ pending_begin * vm_page_size,
+ (pending_end - pending_begin)
+ * vm_page_size,
+ 1);
+ mutex_lock (&disk_cache_lock);
+ }
+ /* ... and start new region. */
+ pending_begin = index;
+ }
+ pending_end = index + 1;
+ }
+
+ mutex_unlock (&disk_cache_lock);
+
+ /* Return last region, if there is such. */
+ if (pending_end >= 0)
+ pager_return_some (diskfs_disk_pager,
+ pending_begin * vm_page_size,
+ (pending_end - pending_begin) * vm_page_size,
+ 1);
+ else
+ {
+ printf ("ext2fs: disk cache is starving\n");
+
+ /* Give it some time. This should happen rarely. */
+ sleep (1);
+ }
+}
+
+/* Map block and return pointer to it. */
+void *
+disk_cache_block_ref_no_block (block_t block)
+{
+ int index;
+ void *bptr;
+
+ assert (0 <= block && block < store->size >> log2_block_size);
+
+ ext2_debug ("(%u)", block);
+
+ bptr = hurd_ihash_find (disk_cache_bptr, block);
+ ext2_debug ("yaya (%u) %p", block, bptr);
+ if (bptr)
+ /* Already mapped. */
+ {
+ index = bptr_index (bptr);
+
+ /* In process of re-associating? */
+ if (disk_cache_info[index].flags & DC_UNTOUCHED)
+ {
+ /* Wait re-association to finish. */
+ condition_wait (&disk_cache_reassociation, &disk_cache_lock);
+
+#if 0
+ printf ("Re-association -- wait finished.\n");
+#endif
+
+ /* Try again. */
+ return disk_cache_block_ref_no_block (block); /* tail recursion */
+ }
+
+ /* Just increment reference and return. */
+ assert (disk_cache_info[index].ref_count + 1
+ > disk_cache_info[index].ref_count);
+ disk_cache_info[index].ref_count++;
+
+ ext2_debug ("cached %u -> %d (ref_count = %d, flags = 0x%x, ptr = %p)",
+ disk_cache_info[index].block, index,
+ disk_cache_info[index].ref_count,
+ disk_cache_info[index].flags, bptr);
+
+ return bptr;
+ }
+
+ /* Search for a block that is not in core and is not referenced. */
+ index = disk_cache_hint;
+ while ((disk_cache_info[index].flags & DC_DONT_REUSE)
+ || (disk_cache_info[index].ref_count))
+ {
+ ext2_debug ("reject %u -> %d (ref_count = %d, flags = 0x%x)",
+ disk_cache_info[index].block, index,
+ disk_cache_info[index].ref_count,
+ disk_cache_info[index].flags);
+
+ /* Just move to next block. */
+ index++;
+ if (index >= disk_cache_blocks)
+ index -= disk_cache_blocks;
+
+ /* If we return to where we started, than there is no suitable
+ block. */
+ if (index == disk_cache_hint)
+ break;
+ }
+
+ /* The next place in the disk cache becomes the current hint. */
+ disk_cache_hint = index + 1;
+ if (disk_cache_hint >= disk_cache_blocks)
+ disk_cache_hint -= disk_cache_blocks;
+
+ /* Is suitable place found? */
+ if ((disk_cache_info[index].flags & DC_DONT_REUSE)
+ || disk_cache_info[index].ref_count)
+ /* No place is found. Try to release some blocks and try
+ again. */
+ {
+ ext2_debug ("flush %u -> %d", disk_cache_info[index].block, index);
+
+ disk_cache_return_unused ();
+
+ return disk_cache_block_ref_no_block (block); /* tail recursion */
+ }
+
+ /* Suitable place is found. */
+
+ /* Calculate pointer to data. */
+ bptr = (char *)disk_cache + (index << log2_block_size);
+ ext2_debug ("map %u -> %d (%p)", block, index, bptr);
+
+ /* This pager_return_some is used only to set PM_FORCEREAD for the
+ page. DC_UNTOUCHED is set so that we catch if someone has
+ referenced the block while we didn't hold disk_cache_lock. */
+ disk_cache_info[index].flags |= DC_UNTOUCHED;
+
+ /* Re-associate. */
+ if (disk_cache_info[index].block != DC_NO_BLOCK)
+ /* Remove old association. */
+ hurd_ihash_remove (disk_cache_bptr, disk_cache_info[index].block);
+ /* New association. */
+ if (hurd_ihash_add (disk_cache_bptr, block, bptr))
+ ext2_panic ("Couldn't hurd_ihash_add new disk block");
+ assert (! (disk_cache_info[index].flags & DC_DONT_REUSE & ~DC_UNTOUCHED));
+ disk_cache_info[index].block = block;
+ assert (! disk_cache_info[index].ref_count);
+ disk_cache_info[index].ref_count = 1;
+
+ condition_broadcast (&disk_cache_reassociation);
+
+ /* Note that in contrast to blocking version of this function, this
+ function should *NEVER* try to read page, as this function is called
+ when previous page in cache is asked. Thus reading of this page could
+ lead to situation, when kernel will ask next page and ultimately to
+ recursion. */
+
+ ext2_debug ("(%u) = %p", block, bptr);
+ return bptr;
+}
+
+int disk_cache_block_is_cached (block_t block)
+{
+ void *a = hurd_ihash_find (disk_cache_bptr, block);
+ ext2_debug ("%p", a);
+ return (int)a;
+}
+
+/* Map block and return pointer to it. */
+void *
+disk_cache_block_ref (block_t block)
+{
+ int index;
+ void *bptr;
+
+ assert (0 <= block && block < store->size >> log2_block_size);
+
+ ext2_debug ("(%u)", block);
+
+ mutex_lock (&disk_cache_lock);
+
+ bptr = hurd_ihash_find (disk_cache_bptr, block);
+ if (bptr)
+ /* Already mapped. */
+ {
+ index = bptr_index (bptr);
+
+ /* In process of re-associating? */
+ if (disk_cache_info[index].flags & DC_UNTOUCHED)
+ {
+ /* Wait re-association to finish. */
+ condition_wait (&disk_cache_reassociation, &disk_cache_lock);
+ mutex_unlock (&disk_cache_lock);
+
+#if 0
+ printf ("Re-association -- wait finished.\n");
+#endif
+
+ /* Try again. */
+ return disk_cache_block_ref (block); /* tail recursion */
+ }
+
+ /* Just increment reference and return. */
+ assert (disk_cache_info[index].ref_count + 1
+ > disk_cache_info[index].ref_count);
+ disk_cache_info[index].ref_count++;
+
+ ext2_debug ("cached %u -> %d (ref_count = %d, flags = 0x%x, ptr = %p)",
+ disk_cache_info[index].block, index,
+ disk_cache_info[index].ref_count,
+ disk_cache_info[index].flags, bptr);
+
+ mutex_unlock (&disk_cache_lock);
+
+ return bptr;
+ }
+
+ /* Search for a block that is not in core and is not referenced. */
+ index = disk_cache_hint;
+ while ((disk_cache_info[index].flags & DC_DONT_REUSE)
+ || (disk_cache_info[index].ref_count))
+ {
+ ext2_debug ("reject %u -> %d (ref_count = %d, flags = 0x%x)",
+ disk_cache_info[index].block, index,
+ disk_cache_info[index].ref_count,
+ disk_cache_info[index].flags);
+
+ /* Just move to next block. */
+ index++;
+ if (index >= disk_cache_blocks)
+ index -= disk_cache_blocks;
+
+ /* If we return to where we started, than there is no suitable
+ block. */
+ if (index == disk_cache_hint)
+ break;
+ }
+
+ /* The next place in the disk cache becomes the current hint. */
+ disk_cache_hint = index + 1;
+ if (disk_cache_hint >= disk_cache_blocks)
+ disk_cache_hint -= disk_cache_blocks;
+
+ /* Is suitable place found? */
+ if ((disk_cache_info[index].flags & DC_DONT_REUSE)
+ || disk_cache_info[index].ref_count)
+ /* No place is found. Try to release some blocks and try
+ again. */
+ {
+ ext2_debug ("flush %u -> %d", disk_cache_info[index].block, index);
+
+ mutex_unlock (&disk_cache_lock);
+
+ disk_cache_return_unused ();
+
+ return disk_cache_block_ref (block); /* tail recursion */
+ }
+
+ /* Suitable place is found. */
+
+ /* Calculate pointer to data. */
+ bptr = (char *)disk_cache + (index << log2_block_size);
+ ext2_debug ("map %u -> %d (%p)", block, index, bptr);
+
+ /* This pager_return_some is used only to set PM_FORCEREAD for the
+ page. DC_UNTOUCHED is set so that we catch if someone has
+ referenced the block while we didn't hold disk_cache_lock. */
+ disk_cache_info[index].flags |= DC_UNTOUCHED;
+
+#if 0 /* XXX: Let's see if this is needed at all. */
+
+ mutex_unlock (&disk_cache_lock);
+ pager_return_some (diskfs_disk_pager, bptr - disk_cache, vm_page_size, 1);
+ mutex_lock (&disk_cache_lock);
+
+ /* Has someone used our bptr? Has someone mapped requested block
+ while we have unlocked disk_cache_lock? If so, environment has
+ changed and we have to restart operation. */
+ if ((! (disk_cache_info[index].flags & DC_UNTOUCHED))
+ || hurd_ihash_find (disk_cache_bptr, block))
+ {
+ mutex_unlock (&disk_cache_lock);
+ return disk_cache_block_ref (block); /* tail recursion */
+ }
+
+#elif 0
+
+ /* XXX: Use libpager internals. */
+
+ mutex_lock (&diskfs_disk_pager->interlock);
+ int page = (bptr - disk_cache) / vm_page_size;
+ assert (page >= 0);
+ int is_incore = (page < diskfs_disk_pager->pagemapsize
+ && (diskfs_disk_pager->pagemap[page] & PM_INCORE));
+ mutex_unlock (&diskfs_disk_pager->interlock);
+ if (is_incore)
+ {
+ mutex_unlock (&disk_cache_lock);
+ printf ("INCORE\n");
+ return disk_cache_block_ref (block); /* tail recursion */
+ }
+
+#endif
+
+ /* Re-associate. */
+ if (disk_cache_info[index].block != DC_NO_BLOCK)
+ /* Remove old association. */
+ hurd_ihash_remove (disk_cache_bptr, disk_cache_info[index].block);
+ /* New association. */
+ if (hurd_ihash_add (disk_cache_bptr, block, bptr))
+ ext2_panic ("Couldn't hurd_ihash_add new disk block");
+ assert (! (disk_cache_info[index].flags & DC_DONT_REUSE & ~DC_UNTOUCHED));
+ disk_cache_info[index].block = block;
+ assert (! disk_cache_info[index].ref_count);
+ disk_cache_info[index].ref_count = 1;
+
+ /* All data structures are set up. */
+ mutex_unlock (&disk_cache_lock);
+
+#if 0
+ /* Try to read page. */
+ *(volatile char *) bptr;
+
+ /* Check if it's actually read. */
+ mutex_lock (&disk_cache_lock);
+ if (disk_cache_info[index].flags & DC_UNTOUCHED)
+ /* It's not read. */
+ {
+ /* Remove newly created association. */
+ hurd_ihash_remove (disk_cache_bptr, block);
+ disk_cache_info[index].block = DC_NO_BLOCK;
+ disk_cache_info[index].flags &=~ DC_UNTOUCHED;
+ disk_cache_info[index].ref_count = 0;
+ mutex_unlock (&disk_cache_lock);
+
+ /* Prepare next time association of this page to succeed. */
+ pager_flush_some (diskfs_disk_pager, bptr - disk_cache,
+ vm_page_size, 0);
+
+#if 0
+ printf ("Re-association failed.\n");
+#endif
+
+ /* Try again. */
+ return disk_cache_block_ref (block); /* tail recursion */
+ }
+ mutex_unlock (&disk_cache_lock);
+#endif
+
+ /* Re-association was successful. */
+ condition_broadcast (&disk_cache_reassociation);
+
+ ext2_debug ("(%u) = %p", block, bptr);
+ return bptr;
+}
+
+void *
+disk_cache_block_ref_hint_no_block (block_t block, int hint)
+{
+ void * result;
+ int old_hint = disk_cache_hint;
+ disk_cache_hint = hint;
+ result = disk_cache_block_ref_no_block (block);
+ disk_cache_hint = hint == old_hint ? old_hint + 1 : old_hint;
+ return result;
}
-/* This will be called when the ports library wants to drop weak references.
- The pager library creates no weak references itself. If the user doesn't
- either, then it's OK for this function to do nothing. */
void
-pager_dropweak (struct user_pager_info *p __attribute__ ((unused)))
+disk_cache_block_ref_ptr (void *ptr)
{
+ int index;
+
+ mutex_lock (&disk_cache_lock);
+ index = bptr_index (ptr);
+ assert (disk_cache_info[index].ref_count >= 1);
+ assert (disk_cache_info[index].ref_count + 1
+ > disk_cache_info[index].ref_count);
+ disk_cache_info[index].ref_count++;
+ assert (! (disk_cache_info[index].flags & DC_UNTOUCHED));
+ ext2_debug ("(%p) (ref_count = %d, flags = 0x%x)",
+ ptr,
+ disk_cache_info[index].ref_count,
+ disk_cache_info[index].flags);
+ mutex_unlock (&disk_cache_lock);
+}
+
+void
+disk_cache_block_deref (void *ptr)
+{
+ int index;
+
+ assert (disk_cache <= ptr && ptr <= disk_cache + disk_cache_size);
+
+ mutex_lock (&disk_cache_lock);
+ index = bptr_index (ptr);
+ ext2_debug ("(%p) (ref_count = %d, flags = 0x%x)",
+ ptr,
+ disk_cache_info[index].ref_count - 1,
+ disk_cache_info[index].flags);
+ assert (! (disk_cache_info[index].flags & DC_UNTOUCHED));
+ assert (disk_cache_info[index].ref_count >= 1);
+ disk_cache_info[index].ref_count--;
+ mutex_unlock (&disk_cache_lock);
+}
+
+/* Not used. */
+int
+disk_cache_block_is_ref (block_t block)
+{
+ int ref;
+ void *ptr;
+
+ mutex_lock (&disk_cache_lock);
+ ptr = hurd_ihash_find (disk_cache_bptr, block);
+ if (! ptr)
+ ref = 0;
+ else /* XXX: Should check for DC_UNTOUCHED too. */
+ ref = disk_cache_info[bptr_index (ptr)].ref_count;
+ mutex_unlock (&disk_cache_lock);
+
+ return ref;
}
/* Create the DISK pager. */
void
create_disk_pager (void)
{
- struct user_pager_info *upi = malloc (sizeof (struct user_pager_info));
- if (!upi)
- ext2_panic ("can't create disk pager: %s", strerror (errno));
- upi->type = DISK;
pager_bucket = ports_create_bucket ();
- diskfs_start_disk_pager (upi, pager_bucket, MAY_CACHE, store->size,
- &disk_image);
+ get_hypermetadata ();
+ disk_cache_blocks = DISK_CACHE_BLOCKS;
+ disk_cache_size = disk_cache_blocks << log2_block_size;
+ diskfs_start_disk_pager (&ext2_ops, sizeof (struct user_pager_info),
+ pager_bucket, MAY_CACHE, disk_cache_size,
+ &disk_cache);
+ pager_get_upi (diskfs_disk_pager)->type = DISK;
+ disk_cache_init ();
}
/* Call this to create a FILE_DATA pager and return a send right.
@@ -807,23 +1467,23 @@ diskfs_get_filemap (struct node *node, vm_prot_t prot)
}
else
{
- struct user_pager_info *upi =
- malloc (sizeof (struct user_pager_info));
- upi->type = FILE_DATA;
- upi->node = node;
- upi->max_prot = prot;
+ struct user_pager_info *upi;
diskfs_nref_light (node);
- node->dn->pager =
- pager_create (upi, pager_bucket, MAY_CACHE,
- MEMORY_OBJECT_COPY_DELAY);
+ node->dn->pager = pager_create (&ext2_ops, sizeof (*upi),
+ pager_bucket, MAY_CACHE,
+ MEMORY_OBJECT_COPY_DELAY);
if (node->dn->pager == 0)
{
diskfs_nrele_light (node);
- free (upi);
spin_unlock (&node_to_page_lock);
return MACH_PORT_NULL;
}
+ upi = pager_get_upi (node->dn->pager);
+ upi->type = FILE_DATA;
+ upi->node = node;
+ upi->max_prot = prot;
+
right = pager_get_port (node->dn->pager);
ports_port_deref (node->dn->pager);
}
diff --git a/ext2fs/pokel.c b/ext2fs/pokel.c
index 85b4d2d1..acc7c17b 100644
--- a/ext2fs/pokel.c
+++ b/ext2fs/pokel.c
@@ -67,12 +67,27 @@ pokel_add (struct pokel *pokel, void *loc, vm_size_t length)
vm_offset_t p_offs = pl->offset;
vm_size_t p_end = p_offs + pl->length;
- if (p_offs == offset && p_end == end)
- break;
+ if (p_offs <= offset && end <= p_end)
+ {
+ if (pokel->image == disk_cache)
+ for (vm_offset_t i = offset; i < end; i += block_size)
+ disk_cache_block_deref (disk_cache + i);
+
+ break;
+ }
else if (p_end >= offset && end >= p_offs)
{
pl->offset = offset < p_offs ? offset : p_offs;
pl->length = (end > p_end ? end : p_end) - pl->offset;
+
+ if (pokel->image == disk_cache)
+ {
+ vm_offset_t i_begin = p_offs > offset ? p_offs : offset;
+ vm_offset_t i_end = p_end < end ? p_end : end;
+ for (vm_offset_t i = i_begin; i < i_end; i += block_size)
+ disk_cache_block_deref (disk_cache + i);
+ }
+
ext2_debug ("extended 0x%x[%ul] to 0x%x[%ul]",
p_offs, p_end - p_offs, pl->offset, pl->length);
break;
@@ -113,11 +128,22 @@ _pokel_exec (struct pokel *pokel, int sync, int wait)
spin_unlock (&pokel->lock);
for (pl = pokes; pl; last = pl, pl = pl->next)
- if (sync)
- {
- ext2_debug ("syncing 0x%x[%ul]", pl->offset, pl->length);
- pager_sync_some (pokel->pager, pl->offset, pl->length, wait);
- }
+ {
+ if (sync)
+ {
+ ext2_debug ("syncing 0x%x[%ul]", pl->offset, pl->length);
+ pager_sync_some (pokel->pager, pl->offset / vm_page_size,
+ pl->length / vm_page_size, wait);
+ }
+
+ if (pokel->image == disk_cache)
+ {
+ vm_offset_t begin = trunc_block (pl->offset);
+ vm_offset_t end = round_block (pl->offset + pl->length);
+ for (vm_offset_t i = begin; i != end; i += block_size)
+ disk_cache_block_deref (pokel->image + i);
+ }
+ }
if (last)
{
diff --git a/ext2fs/truncate.c b/ext2fs/truncate.c
index 077225b0..ab9c78fa 100644
--- a/ext2fs/truncate.c
+++ b/ext2fs/truncate.c
@@ -124,7 +124,7 @@ trunc_indirect (struct node *node, block_t end,
{
unsigned index;
int modified = 0, all_freed = 1;
- block_t *ind_bh = (block_t *)bptr (*p);
+ block_t *ind_bh = (block_t *)disk_cache_block_ref (*p);
unsigned first = end < offset ? 0 : end - offset;
for (index = first; index < addr_per_block; index++)
@@ -139,11 +139,18 @@ trunc_indirect (struct node *node, block_t end,
if (first == 0 && all_freed)
{
- pager_flush_some (diskfs_disk_pager, boffs (*p), block_size, 1);
+ if (block_size >= vm_page_size)
+ pager_flush_some (diskfs_disk_pager,
+ ((bptr_index (ind_bh) << log2_block_size)
+ / vm_page_size),
+ block_size / vm_page_size, 1);
free_block_run_free_ptr (fbr, p);
+ disk_cache_block_deref (ind_bh);
}
else if (modified)
record_indir_poke (node, ind_bh);
+ else
+ disk_cache_block_deref (ind_bh);
}
}
@@ -241,8 +248,10 @@ force_delayed_copies (struct node *node, off_t length)
/* XXX should cope with errors from diskfs_get_filemap */
poke_pages (obj, round_page (length), round_page (node->allocsize));
mach_port_deallocate (mach_task_self (), obj);
- pager_flush_some (pager, round_page(length),
- node->allocsize - length, 1);
+ pager_flush_some (pager, round_page(length) / vm_page_size,
+ round_page (node->allocsize -
+ round_page (length)) / vm_page_size,
+ 1);
}
ports_port_deref (pager);