[PATCH] OCFS2: The Second Oracle Cluster Filesystem
The OCFS2 file system module. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com> Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
This commit is contained in:
Родитель
8df08c89c6
Коммит
ccd979bdbc
|
@ -36,6 +36,8 @@ ntfs.txt
|
||||||
- info and mount options for the NTFS filesystem (Windows NT).
|
- info and mount options for the NTFS filesystem (Windows NT).
|
||||||
proc.txt
|
proc.txt
|
||||||
- info on Linux's /proc filesystem.
|
- info on Linux's /proc filesystem.
|
||||||
|
ocfs2.txt
|
||||||
|
- info and mount options for the OCFS2 clustered filesystem.
|
||||||
romfs.txt
|
romfs.txt
|
||||||
- Description of the ROMFS filesystem.
|
- Description of the ROMFS filesystem.
|
||||||
smbfs.txt
|
smbfs.txt
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
OCFS2 filesystem
|
||||||
|
==================
|
||||||
|
OCFS2 is a general purpose extent based shared disk cluster file
|
||||||
|
system with many similarities to ext3. It supports 64 bit inode
|
||||||
|
numbers, and has automatically extending metadata groups which may
|
||||||
|
also make it attractive for non-clustered use.
|
||||||
|
|
||||||
|
You'll want to install the ocfs2-tools package in order to at least
|
||||||
|
get "mount.ocfs2" and "ocfs2_hb_ctl".
|
||||||
|
|
||||||
|
Project web page: http://oss.oracle.com/projects/ocfs2
|
||||||
|
Tools web page: http://oss.oracle.com/projects/ocfs2-tools
|
||||||
|
OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
|
||||||
|
|
||||||
|
All code copyright 2005 Oracle except when otherwise noted.
|
||||||
|
|
||||||
|
CREDITS:
|
||||||
|
Lots of code taken from ext3 and other projects.
|
||||||
|
|
||||||
|
Authors in alphabetical order:
|
||||||
|
Joel Becker <joel.becker@oracle.com>
|
||||||
|
Zach Brown <zach.brown@oracle.com>
|
||||||
|
Mark Fasheh <mark.fasheh@oracle.com>
|
||||||
|
Kurt Hackel <kurt.hackel@oracle.com>
|
||||||
|
Sunil Mushran <sunil.mushran@oracle.com>
|
||||||
|
Manish Singh <manish.singh@oracle.com>
|
||||||
|
|
||||||
|
Caveats
|
||||||
|
=======
|
||||||
|
Features which OCFS2 does not support yet:
|
||||||
|
- sparse files
|
||||||
|
- extended attributes
|
||||||
|
- shared writeable mmap
|
||||||
|
- loopback is supported, but data written will not
|
||||||
|
be cluster coherent.
|
||||||
|
- quotas
|
||||||
|
- cluster aware flock
|
||||||
|
- Directory change notification (F_NOTIFY)
|
||||||
|
- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
|
||||||
|
- POSIX ACLs
|
||||||
|
- readpages / writepages (not user visible)
|
||||||
|
|
||||||
|
Mount options
|
||||||
|
=============
|
||||||
|
|
||||||
|
OCFS2 supports the following mount options:
|
||||||
|
(*) == default
|
||||||
|
|
||||||
|
barrier=1 This enables/disables barriers. barrier=0 disables it,
|
||||||
|
barrier=1 enables it.
|
||||||
|
errors=remount-ro(*) Remount the filesystem read-only on an error.
|
||||||
|
errors=panic Panic and halt the machine if an error occurs.
|
||||||
|
intr (*) Allow signals to interrupt cluster operations.
|
||||||
|
nointr Do not allow signals to interrupt cluster
|
||||||
|
operations.
|
|
@ -1905,6 +1905,15 @@ M: ajoshi@shell.unixbox.com
|
||||||
L: linux-nvidia@lists.surfsouth.com
|
L: linux-nvidia@lists.surfsouth.com
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
||||||
|
ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
|
||||||
|
P: Mark Fasheh
|
||||||
|
M: mark.fasheh@oracle.com
|
||||||
|
P: Kurt Hackel
|
||||||
|
M: kurt.hackel@oracle.com
|
||||||
|
L: ocfs2-devel@oss.oracle.com
|
||||||
|
W: http://oss.oracle.com/projects/ocfs2/
|
||||||
|
S: Supported
|
||||||
|
|
||||||
OLYMPIC NETWORK DRIVER
|
OLYMPIC NETWORK DRIVER
|
||||||
P: Peter De Shrijver
|
P: Peter De Shrijver
|
||||||
M: p2@ace.ulyssis.student.kuleuven.ac.be
|
M: p2@ace.ulyssis.student.kuleuven.ac.be
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
EXTRA_CFLAGS += -Ifs/ocfs2
|
||||||
|
|
||||||
|
EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES
|
||||||
|
|
||||||
|
obj-$(CONFIG_OCFS2_FS) += ocfs2.o
|
||||||
|
|
||||||
|
ocfs2-objs := \
|
||||||
|
alloc.o \
|
||||||
|
aops.o \
|
||||||
|
buffer_head_io.o \
|
||||||
|
dcache.o \
|
||||||
|
dir.o \
|
||||||
|
dlmglue.o \
|
||||||
|
export.o \
|
||||||
|
extent_map.o \
|
||||||
|
file.o \
|
||||||
|
heartbeat.o \
|
||||||
|
inode.o \
|
||||||
|
journal.o \
|
||||||
|
localalloc.o \
|
||||||
|
mmap.o \
|
||||||
|
namei.o \
|
||||||
|
slot_map.o \
|
||||||
|
suballoc.o \
|
||||||
|
super.o \
|
||||||
|
symlink.o \
|
||||||
|
sysfile.o \
|
||||||
|
uptodate.o \
|
||||||
|
ver.o \
|
||||||
|
vote.o
|
||||||
|
|
||||||
|
obj-$(CONFIG_OCFS2_FS) += cluster/
|
||||||
|
obj-$(CONFIG_OCFS2_FS) += dlm/
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,82 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* alloc.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_ALLOC_H
|
||||||
|
#define OCFS2_ALLOC_H
|
||||||
|
|
||||||
|
struct ocfs2_alloc_context;
|
||||||
|
int ocfs2_insert_extent(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *inode,
|
||||||
|
struct buffer_head *fe_bh,
|
||||||
|
u64 blkno,
|
||||||
|
u32 new_clusters,
|
||||||
|
struct ocfs2_alloc_context *meta_ac);
|
||||||
|
int ocfs2_num_free_extents(struct ocfs2_super *osb,
|
||||||
|
struct inode *inode,
|
||||||
|
struct ocfs2_dinode *fe);
|
||||||
|
/* how many new metadata chunks would an allocation need at maximum? */
|
||||||
|
static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Rather than do all the work of determining how much we need
|
||||||
|
* (involves a ton of reads and locks), just ask for the
|
||||||
|
* maximal limit. That's a tree depth shift. So, one block for
|
||||||
|
* level of the tree (current l_tree_depth), one block for the
|
||||||
|
* new tree_depth==0 extent_block, and one block at the new
|
||||||
|
* top-of-the tree.
|
||||||
|
*/
|
||||||
|
return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_truncate_log_init(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
|
||||||
|
int cancel);
|
||||||
|
int ocfs2_flush_truncate_log(struct ocfs2_super *osb);
|
||||||
|
int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
|
||||||
|
int slot_num,
|
||||||
|
struct ocfs2_dinode **tl_copy);
|
||||||
|
int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_dinode *tl_copy);
|
||||||
|
|
||||||
|
struct ocfs2_truncate_context {
|
||||||
|
struct inode *tc_ext_alloc_inode;
|
||||||
|
struct buffer_head *tc_ext_alloc_bh;
|
||||||
|
int tc_ext_alloc_locked; /* is it cluster locked? */
|
||||||
|
/* these get destroyed once it's passed to ocfs2_commit_truncate. */
|
||||||
|
struct buffer_head *tc_last_eb_bh;
|
||||||
|
};
|
||||||
|
|
||||||
|
int ocfs2_prepare_truncate(struct ocfs2_super *osb,
|
||||||
|
struct inode *inode,
|
||||||
|
struct buffer_head *fe_bh,
|
||||||
|
struct ocfs2_truncate_context **tc);
|
||||||
|
int ocfs2_commit_truncate(struct ocfs2_super *osb,
|
||||||
|
struct inode *inode,
|
||||||
|
struct buffer_head *fe_bh,
|
||||||
|
struct ocfs2_truncate_context *tc);
|
||||||
|
|
||||||
|
#endif /* OCFS2_ALLOC_H */
|
|
@ -0,0 +1,643 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/highmem.h>
|
||||||
|
#include <linux/pagemap.h>
|
||||||
|
#include <asm/byteorder.h>
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_FILE_IO
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "alloc.h"
|
||||||
|
#include "aops.h"
|
||||||
|
#include "dlmglue.h"
|
||||||
|
#include "extent_map.h"
|
||||||
|
#include "file.h"
|
||||||
|
#include "inode.h"
|
||||||
|
#include "journal.h"
|
||||||
|
#include "super.h"
|
||||||
|
#include "symlink.h"
|
||||||
|
|
||||||
|
#include "buffer_head_io.h"
|
||||||
|
|
||||||
|
static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
|
||||||
|
struct buffer_head *bh_result, int create)
|
||||||
|
{
|
||||||
|
int err = -EIO;
|
||||||
|
int status;
|
||||||
|
struct ocfs2_dinode *fe = NULL;
|
||||||
|
struct buffer_head *bh = NULL;
|
||||||
|
struct buffer_head *buffer_cache_bh = NULL;
|
||||||
|
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||||
|
void *kaddr;
|
||||||
|
|
||||||
|
mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,
|
||||||
|
(unsigned long long)iblock, bh_result, create);
|
||||||
|
|
||||||
|
BUG_ON(ocfs2_inode_is_fast_symlink(inode));
|
||||||
|
|
||||||
|
if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) {
|
||||||
|
mlog(ML_ERROR, "block offset > PATH_MAX: %llu",
|
||||||
|
(unsigned long long)iblock);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
|
||||||
|
OCFS2_I(inode)->ip_blkno,
|
||||||
|
&bh, OCFS2_BH_CACHED, inode);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
fe = (struct ocfs2_dinode *) bh->b_data;
|
||||||
|
|
||||||
|
if (!OCFS2_IS_VALID_DINODE(fe)) {
|
||||||
|
mlog(ML_ERROR, "Invalid dinode #%"MLFu64": signature = %.*s\n",
|
||||||
|
fe->i_blkno, 7, fe->i_signature);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
|
||||||
|
le32_to_cpu(fe->i_clusters))) {
|
||||||
|
mlog(ML_ERROR, "block offset is outside the allocated size: "
|
||||||
|
"%llu\n", (unsigned long long)iblock);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We don't use the page cache to create symlink data, so if
|
||||||
|
* need be, copy it over from the buffer cache. */
|
||||||
|
if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) {
|
||||||
|
u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) +
|
||||||
|
iblock;
|
||||||
|
buffer_cache_bh = sb_getblk(osb->sb, blkno);
|
||||||
|
if (!buffer_cache_bh) {
|
||||||
|
mlog(ML_ERROR, "couldn't getblock for symlink!\n");
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we haven't locked out transactions, so a commit
|
||||||
|
* could've happened. Since we've got a reference on
|
||||||
|
* the bh, even if it commits while we're doing the
|
||||||
|
* copy, the data is still good. */
|
||||||
|
if (buffer_jbd(buffer_cache_bh)
|
||||||
|
&& ocfs2_inode_is_new(inode)) {
|
||||||
|
kaddr = kmap_atomic(bh_result->b_page, KM_USER0);
|
||||||
|
if (!kaddr) {
|
||||||
|
mlog(ML_ERROR, "couldn't kmap!\n");
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
memcpy(kaddr + (bh_result->b_size * iblock),
|
||||||
|
buffer_cache_bh->b_data,
|
||||||
|
bh_result->b_size);
|
||||||
|
kunmap_atomic(kaddr, KM_USER0);
|
||||||
|
set_buffer_uptodate(bh_result);
|
||||||
|
}
|
||||||
|
brelse(buffer_cache_bh);
|
||||||
|
}
|
||||||
|
|
||||||
|
map_bh(bh_result, inode->i_sb,
|
||||||
|
le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock);
|
||||||
|
|
||||||
|
err = 0;
|
||||||
|
|
||||||
|
bail:
|
||||||
|
if (bh)
|
||||||
|
brelse(bh);
|
||||||
|
|
||||||
|
mlog_exit(err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ocfs2_get_block(struct inode *inode, sector_t iblock,
|
||||||
|
struct buffer_head *bh_result, int create)
|
||||||
|
{
|
||||||
|
int err = 0;
|
||||||
|
u64 p_blkno, past_eof;
|
||||||
|
|
||||||
|
mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,
|
||||||
|
(unsigned long long)iblock, bh_result, create);
|
||||||
|
|
||||||
|
if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)
|
||||||
|
mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n",
|
||||||
|
inode, inode->i_ino);
|
||||||
|
|
||||||
|
if (S_ISLNK(inode->i_mode)) {
|
||||||
|
/* this always does I/O for some reason. */
|
||||||
|
err = ocfs2_symlink_get_block(inode, iblock, bh_result, create);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* this can happen if another node truncs after our extend! */
|
||||||
|
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
if (iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
|
||||||
|
OCFS2_I(inode)->ip_clusters))
|
||||||
|
err = -EIO;
|
||||||
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
if (err)
|
||||||
|
goto bail;
|
||||||
|
|
||||||
|
err = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno,
|
||||||
|
NULL);
|
||||||
|
if (err) {
|
||||||
|
mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
|
||||||
|
"%"MLFu64", NULL)\n", err, inode,
|
||||||
|
(unsigned long long)iblock, p_blkno);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
map_bh(bh_result, inode->i_sb, p_blkno);
|
||||||
|
|
||||||
|
if (bh_result->b_blocknr == 0) {
|
||||||
|
err = -EIO;
|
||||||
|
mlog(ML_ERROR, "iblock = %llu p_blkno = %"MLFu64" "
|
||||||
|
"blkno=(%"MLFu64")\n", (unsigned long long)iblock,
|
||||||
|
p_blkno, OCFS2_I(inode)->ip_blkno);
|
||||||
|
}
|
||||||
|
|
||||||
|
past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
|
||||||
|
mlog(0, "Inode %lu, past_eof = %"MLFu64"\n", inode->i_ino, past_eof);
|
||||||
|
|
||||||
|
if (create && (iblock >= past_eof))
|
||||||
|
set_buffer_new(bh_result);
|
||||||
|
|
||||||
|
bail:
|
||||||
|
if (err < 0)
|
||||||
|
err = -EIO;
|
||||||
|
|
||||||
|
mlog_exit(err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ocfs2_readpage(struct file *file, struct page *page)
|
||||||
|
{
|
||||||
|
struct inode *inode = page->mapping->host;
|
||||||
|
loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;
|
||||||
|
int ret, unlock = 1;
|
||||||
|
|
||||||
|
mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0));
|
||||||
|
|
||||||
|
ret = ocfs2_meta_lock_with_page(inode, NULL, NULL, 0, page);
|
||||||
|
if (ret != 0) {
|
||||||
|
if (ret == AOP_TRUNCATED_PAGE)
|
||||||
|
unlock = 0;
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
down_read(&OCFS2_I(inode)->ip_alloc_sem);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* i_size might have just been updated as we grabed the meta lock. We
|
||||||
|
* might now be discovering a truncate that hit on another node.
|
||||||
|
* block_read_full_page->get_block freaks out if it is asked to read
|
||||||
|
* beyond the end of a file, so we check here. Callers
|
||||||
|
* (generic_file_read, fault->nopage) are clever enough to check i_size
|
||||||
|
* and notice that the page they just read isn't needed.
|
||||||
|
*
|
||||||
|
* XXX sys_readahead() seems to get that wrong?
|
||||||
|
*/
|
||||||
|
if (start >= i_size_read(inode)) {
|
||||||
|
char *addr = kmap(page);
|
||||||
|
memset(addr, 0, PAGE_SIZE);
|
||||||
|
flush_dcache_page(page);
|
||||||
|
kunmap(page);
|
||||||
|
SetPageUptodate(page);
|
||||||
|
ret = 0;
|
||||||
|
goto out_alloc;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ocfs2_data_lock_with_page(inode, 0, page);
|
||||||
|
if (ret != 0) {
|
||||||
|
if (ret == AOP_TRUNCATED_PAGE)
|
||||||
|
unlock = 0;
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_alloc;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = block_read_full_page(page, ocfs2_get_block);
|
||||||
|
unlock = 0;
|
||||||
|
|
||||||
|
ocfs2_data_unlock(inode, 0);
|
||||||
|
out_alloc:
|
||||||
|
up_read(&OCFS2_I(inode)->ip_alloc_sem);
|
||||||
|
ocfs2_meta_unlock(inode, 0);
|
||||||
|
out:
|
||||||
|
if (unlock)
|
||||||
|
unlock_page(page);
|
||||||
|
mlog_exit(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Note: Because we don't support holes, our allocation has
|
||||||
|
* already happened (allocation writes zeros to the file data)
|
||||||
|
* so we don't have to worry about ordered writes in
|
||||||
|
* ocfs2_writepage.
|
||||||
|
*
|
||||||
|
* ->writepage is called during the process of invalidating the page cache
|
||||||
|
* during blocked lock processing. It can't block on any cluster locks
|
||||||
|
* to during block mapping. It's relying on the fact that the block
|
||||||
|
* mapping can't have disappeared under the dirty pages that it is
|
||||||
|
* being asked to write back.
|
||||||
|
*/
|
||||||
|
static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
mlog_entry("(0x%p)\n", page);
|
||||||
|
|
||||||
|
ret = block_write_full_page(page, ocfs2_get_block, wbc);
|
||||||
|
|
||||||
|
mlog_exit(ret);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called
|
||||||
|
* from loopback. It must be able to perform its own locking around
|
||||||
|
* ocfs2_get_block().
|
||||||
|
*/
|
||||||
|
int ocfs2_prepare_write(struct file *file, struct page *page,
|
||||||
|
unsigned from, unsigned to)
|
||||||
|
{
|
||||||
|
struct inode *inode = page->mapping->host;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
|
||||||
|
|
||||||
|
ret = ocfs2_meta_lock_with_page(inode, NULL, NULL, 0, page);
|
||||||
|
if (ret != 0) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
down_read(&OCFS2_I(inode)->ip_alloc_sem);
|
||||||
|
|
||||||
|
ret = block_prepare_write(page, from, to, ocfs2_get_block);
|
||||||
|
|
||||||
|
up_read(&OCFS2_I(inode)->ip_alloc_sem);
|
||||||
|
|
||||||
|
ocfs2_meta_unlock(inode, 0);
|
||||||
|
out:
|
||||||
|
mlog_exit(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Taken from ext3. We don't necessarily need the full blown
|
||||||
|
* functionality yet, but IMHO it's better to cut and paste the whole
|
||||||
|
* thing so we can avoid introducing our own bugs (and easily pick up
|
||||||
|
* their fixes when they happen) --Mark */
|
||||||
|
static int walk_page_buffers( handle_t *handle,
|
||||||
|
struct buffer_head *head,
|
||||||
|
unsigned from,
|
||||||
|
unsigned to,
|
||||||
|
int *partial,
|
||||||
|
int (*fn)( handle_t *handle,
|
||||||
|
struct buffer_head *bh))
|
||||||
|
{
|
||||||
|
struct buffer_head *bh;
|
||||||
|
unsigned block_start, block_end;
|
||||||
|
unsigned blocksize = head->b_size;
|
||||||
|
int err, ret = 0;
|
||||||
|
struct buffer_head *next;
|
||||||
|
|
||||||
|
for ( bh = head, block_start = 0;
|
||||||
|
ret == 0 && (bh != head || !block_start);
|
||||||
|
block_start = block_end, bh = next)
|
||||||
|
{
|
||||||
|
next = bh->b_this_page;
|
||||||
|
block_end = block_start + blocksize;
|
||||||
|
if (block_end <= from || block_start >= to) {
|
||||||
|
if (partial && !buffer_uptodate(bh))
|
||||||
|
*partial = 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
err = (*fn)(handle, bh);
|
||||||
|
if (!ret)
|
||||||
|
ret = err;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode,
|
||||||
|
struct page *page,
|
||||||
|
unsigned from,
|
||||||
|
unsigned to)
|
||||||
|
{
|
||||||
|
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||||
|
struct ocfs2_journal_handle *handle = NULL;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
|
||||||
|
if (!handle) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ocfs2_should_order_data(inode)) {
|
||||||
|
ret = walk_page_buffers(handle->k_handle,
|
||||||
|
page_buffers(page),
|
||||||
|
from, to, NULL,
|
||||||
|
ocfs2_journal_dirty_data);
|
||||||
|
if (ret < 0)
|
||||||
|
mlog_errno(ret);
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
if (ret) {
|
||||||
|
if (handle)
|
||||||
|
ocfs2_commit_trans(handle);
|
||||||
|
handle = ERR_PTR(ret);
|
||||||
|
}
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ocfs2_commit_write(struct file *file, struct page *page,
|
||||||
|
unsigned from, unsigned to)
|
||||||
|
{
|
||||||
|
int ret, extending = 0, locklevel = 0;
|
||||||
|
loff_t new_i_size;
|
||||||
|
struct buffer_head *di_bh = NULL;
|
||||||
|
struct inode *inode = page->mapping->host;
|
||||||
|
struct ocfs2_journal_handle *handle = NULL;
|
||||||
|
|
||||||
|
mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
|
||||||
|
|
||||||
|
/* NOTE: ocfs2_file_aio_write has ensured that it's safe for
|
||||||
|
* us to sample inode->i_size here without the metadata lock:
|
||||||
|
*
|
||||||
|
* 1) We're currently holding the inode alloc lock, so no
|
||||||
|
* nodes can change it underneath us.
|
||||||
|
*
|
||||||
|
* 2) We've had to take the metadata lock at least once
|
||||||
|
* already to check for extending writes, hence insuring
|
||||||
|
* that our current copy is also up to date.
|
||||||
|
*/
|
||||||
|
new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
|
||||||
|
if (new_i_size > i_size_read(inode)) {
|
||||||
|
extending = 1;
|
||||||
|
locklevel = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, locklevel, page);
|
||||||
|
if (ret != 0) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ocfs2_data_lock_with_page(inode, 1, page);
|
||||||
|
if (ret != 0) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_unlock_meta;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (extending) {
|
||||||
|
handle = ocfs2_start_walk_page_trans(inode, page, from, to);
|
||||||
|
if (IS_ERR(handle)) {
|
||||||
|
ret = PTR_ERR(handle);
|
||||||
|
handle = NULL;
|
||||||
|
goto out_unlock_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Mark our buffer early. We'd rather catch this error up here
|
||||||
|
* as opposed to after a successful commit_write which would
|
||||||
|
* require us to set back inode->i_size. */
|
||||||
|
ret = ocfs2_journal_access(handle, inode, di_bh,
|
||||||
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||||
|
if (ret < 0) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_commit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* might update i_size */
|
||||||
|
ret = generic_commit_write(file, page, from, to);
|
||||||
|
if (ret < 0) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_commit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (extending) {
|
||||||
|
loff_t size = (u64) i_size_read(inode);
|
||||||
|
struct ocfs2_dinode *di =
|
||||||
|
(struct ocfs2_dinode *)di_bh->b_data;
|
||||||
|
|
||||||
|
/* ocfs2_mark_inode_dirty is too heavy to use here. */
|
||||||
|
inode->i_blocks = ocfs2_align_bytes_to_sectors(size);
|
||||||
|
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
|
||||||
|
|
||||||
|
di->i_size = cpu_to_le64(size);
|
||||||
|
di->i_ctime = di->i_mtime =
|
||||||
|
cpu_to_le64(inode->i_mtime.tv_sec);
|
||||||
|
di->i_ctime_nsec = di->i_mtime_nsec =
|
||||||
|
cpu_to_le32(inode->i_mtime.tv_nsec);
|
||||||
|
|
||||||
|
ret = ocfs2_journal_dirty(handle, di_bh);
|
||||||
|
if (ret < 0) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_commit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BUG_ON(extending && (i_size_read(inode) != new_i_size));
|
||||||
|
|
||||||
|
out_commit:
|
||||||
|
if (handle)
|
||||||
|
ocfs2_commit_trans(handle);
|
||||||
|
out_unlock_data:
|
||||||
|
ocfs2_data_unlock(inode, 1);
|
||||||
|
out_unlock_meta:
|
||||||
|
ocfs2_meta_unlock(inode, locklevel);
|
||||||
|
out:
|
||||||
|
if (di_bh)
|
||||||
|
brelse(di_bh);
|
||||||
|
|
||||||
|
mlog_exit(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
|
||||||
|
{
|
||||||
|
sector_t status;
|
||||||
|
u64 p_blkno = 0;
|
||||||
|
int err = 0;
|
||||||
|
struct inode *inode = mapping->host;
|
||||||
|
|
||||||
|
mlog_entry("(block = %llu)\n", (unsigned long long)block);
|
||||||
|
|
||||||
|
/* We don't need to lock journal system files, since they aren't
|
||||||
|
* accessed concurrently from multiple nodes.
|
||||||
|
*/
|
||||||
|
if (!INODE_JOURNAL(inode)) {
|
||||||
|
err = ocfs2_meta_lock(inode, NULL, NULL, 0);
|
||||||
|
if (err) {
|
||||||
|
if (err != -ENOENT)
|
||||||
|
mlog_errno(err);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
down_read(&OCFS2_I(inode)->ip_alloc_sem);
|
||||||
|
}
|
||||||
|
|
||||||
|
err = ocfs2_extent_map_get_blocks(inode, block, 1, &p_blkno,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if (!INODE_JOURNAL(inode)) {
|
||||||
|
up_read(&OCFS2_I(inode)->ip_alloc_sem);
|
||||||
|
ocfs2_meta_unlock(inode, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
mlog(ML_ERROR, "get_blocks() failed, block = %llu\n",
|
||||||
|
(unsigned long long)block);
|
||||||
|
mlog_errno(err);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bail:
|
||||||
|
status = err ? 0 : p_blkno;
|
||||||
|
|
||||||
|
mlog_exit((int)status);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TODO: Make this into a generic get_blocks function.
|
||||||
|
*
|
||||||
|
* From do_direct_io in direct-io.c:
|
||||||
|
* "So what we do is to permit the ->get_blocks function to populate
|
||||||
|
* bh.b_size with the size of IO which is permitted at this offset and
|
||||||
|
* this i_blkbits."
|
||||||
|
*
|
||||||
|
* This function is called directly from get_more_blocks in direct-io.c.
|
||||||
|
*
|
||||||
|
* called like this: dio->get_blocks(dio->inode, fs_startblk,
|
||||||
|
* fs_count, map_bh, dio->rw == WRITE);
|
||||||
|
*/
|
||||||
|
static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
|
||||||
|
unsigned long max_blocks,
|
||||||
|
struct buffer_head *bh_result, int create)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
u64 vbo_max; /* file offset, max_blocks from iblock */
|
||||||
|
u64 p_blkno;
|
||||||
|
int contig_blocks;
|
||||||
|
unsigned char blocksize_bits;
|
||||||
|
|
||||||
|
if (!inode || !bh_result) {
|
||||||
|
mlog(ML_ERROR, "inode or bh_result is null\n");
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
blocksize_bits = inode->i_sb->s_blocksize_bits;
|
||||||
|
|
||||||
|
/* This function won't even be called if the request isn't all
|
||||||
|
* nicely aligned and of the right size, so there's no need
|
||||||
|
* for us to check any of that. */
|
||||||
|
|
||||||
|
vbo_max = ((u64)iblock + max_blocks) << blocksize_bits;
|
||||||
|
|
||||||
|
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
if ((iblock + max_blocks) >
|
||||||
|
ocfs2_clusters_to_blocks(inode->i_sb,
|
||||||
|
OCFS2_I(inode)->ip_clusters)) {
|
||||||
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
ret = -EIO;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
|
||||||
|
/* This figures out the size of the next contiguous block, and
|
||||||
|
* our logical offset */
|
||||||
|
ret = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno,
|
||||||
|
&contig_blocks);
|
||||||
|
if (ret) {
|
||||||
|
mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
|
||||||
|
(unsigned long long)iblock);
|
||||||
|
ret = -EIO;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
map_bh(bh_result, inode->i_sb, p_blkno);
|
||||||
|
|
||||||
|
/* make sure we don't map more than max_blocks blocks here as
|
||||||
|
that's all the kernel will handle at this point. */
|
||||||
|
if (max_blocks < contig_blocks)
|
||||||
|
contig_blocks = max_blocks;
|
||||||
|
bh_result->b_size = contig_blocks << blocksize_bits;
|
||||||
|
bail:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
|
||||||
|
* particularly interested in the aio/dio case. Like the core uses
|
||||||
|
* i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from
|
||||||
|
* truncation on another.
|
||||||
|
*/
|
||||||
|
static void ocfs2_dio_end_io(struct kiocb *iocb,
|
||||||
|
loff_t offset,
|
||||||
|
ssize_t bytes,
|
||||||
|
void *private)
|
||||||
|
{
|
||||||
|
struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
|
||||||
|
|
||||||
|
/* this io's submitter should not have unlocked this before we could */
|
||||||
|
BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
|
||||||
|
ocfs2_iocb_clear_rw_locked(iocb);
|
||||||
|
up_read(&inode->i_alloc_sem);
|
||||||
|
ocfs2_rw_unlock(inode, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t ocfs2_direct_IO(int rw,
|
||||||
|
struct kiocb *iocb,
|
||||||
|
const struct iovec *iov,
|
||||||
|
loff_t offset,
|
||||||
|
unsigned long nr_segs)
|
||||||
|
{
|
||||||
|
struct file *file = iocb->ki_filp;
|
||||||
|
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
|
||||||
|
inode->i_sb->s_bdev, iov, offset,
|
||||||
|
nr_segs,
|
||||||
|
ocfs2_direct_IO_get_blocks,
|
||||||
|
ocfs2_dio_end_io);
|
||||||
|
mlog_exit(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct address_space_operations ocfs2_aops = {
|
||||||
|
.readpage = ocfs2_readpage,
|
||||||
|
.writepage = ocfs2_writepage,
|
||||||
|
.prepare_write = ocfs2_prepare_write,
|
||||||
|
.commit_write = ocfs2_commit_write,
|
||||||
|
.bmap = ocfs2_bmap,
|
||||||
|
.sync_page = block_sync_page,
|
||||||
|
.direct_IO = ocfs2_direct_IO
|
||||||
|
};
|
|
@ -0,0 +1,41 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004, 2005 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_AOPS_H
|
||||||
|
#define OCFS2_AOPS_H
|
||||||
|
|
||||||
|
int ocfs2_prepare_write(struct file *file, struct page *page,
|
||||||
|
unsigned from, unsigned to);
|
||||||
|
|
||||||
|
struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode,
|
||||||
|
struct page *page,
|
||||||
|
unsigned from,
|
||||||
|
unsigned to);
|
||||||
|
|
||||||
|
/* all ocfs2_dio_end_io()'s fault */
|
||||||
|
#define ocfs2_iocb_is_rw_locked(iocb) \
|
||||||
|
test_bit(0, (unsigned long *)&iocb->private)
|
||||||
|
#define ocfs2_iocb_set_rw_locked(iocb) \
|
||||||
|
set_bit(0, (unsigned long *)&iocb->private)
|
||||||
|
#define ocfs2_iocb_clear_rw_locked(iocb) \
|
||||||
|
clear_bit(0, (unsigned long *)&iocb->private)
|
||||||
|
|
||||||
|
#endif /* OCFS2_FILE_H */
|
|
@ -0,0 +1,232 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* io.c
|
||||||
|
*
|
||||||
|
* Buffer cache handling
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/highmem.h>
|
||||||
|
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "alloc.h"
|
||||||
|
#include "inode.h"
|
||||||
|
#include "journal.h"
|
||||||
|
#include "uptodate.h"
|
||||||
|
|
||||||
|
#include "buffer_head_io.h"
|
||||||
|
|
||||||
|
int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
|
||||||
|
struct inode *inode)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
mlog_entry("(bh->b_blocknr = %llu, inode=%p)\n",
|
||||||
|
(unsigned long long)bh->b_blocknr, inode);
|
||||||
|
|
||||||
|
BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
|
||||||
|
BUG_ON(buffer_jbd(bh));
|
||||||
|
|
||||||
|
/* No need to check for a soft readonly file system here. non
|
||||||
|
* journalled writes are only ever done on system files which
|
||||||
|
* can get modified during recovery even if read-only. */
|
||||||
|
if (ocfs2_is_hard_readonly(osb)) {
|
||||||
|
ret = -EROFS;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
down(&OCFS2_I(inode)->ip_io_sem);
|
||||||
|
|
||||||
|
lock_buffer(bh);
|
||||||
|
set_buffer_uptodate(bh);
|
||||||
|
|
||||||
|
/* remove from dirty list before I/O. */
|
||||||
|
clear_buffer_dirty(bh);
|
||||||
|
|
||||||
|
get_bh(bh); /* for end_buffer_write_sync() */
|
||||||
|
bh->b_end_io = end_buffer_write_sync;
|
||||||
|
submit_bh(WRITE, bh);
|
||||||
|
|
||||||
|
wait_on_buffer(bh);
|
||||||
|
|
||||||
|
if (buffer_uptodate(bh)) {
|
||||||
|
ocfs2_set_buffer_uptodate(inode, bh);
|
||||||
|
} else {
|
||||||
|
/* We don't need to remove the clustered uptodate
|
||||||
|
* information for this bh as it's not marked locally
|
||||||
|
* uptodate. */
|
||||||
|
ret = -EIO;
|
||||||
|
brelse(bh);
|
||||||
|
}
|
||||||
|
|
||||||
|
up(&OCFS2_I(inode)->ip_io_sem);
|
||||||
|
out:
|
||||||
|
mlog_exit(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
|
||||||
|
struct buffer_head *bhs[], int flags,
|
||||||
|
struct inode *inode)
|
||||||
|
{
|
||||||
|
int status = 0;
|
||||||
|
struct super_block *sb;
|
||||||
|
int i, ignore_cache = 0;
|
||||||
|
struct buffer_head *bh;
|
||||||
|
|
||||||
|
mlog_entry("(block=(%"MLFu64"), nr=(%d), flags=%d, inode=%p)\n",
|
||||||
|
block, nr, flags, inode);
|
||||||
|
|
||||||
|
if (osb == NULL || osb->sb == NULL || bhs == NULL) {
|
||||||
|
status = -EINVAL;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nr < 0) {
|
||||||
|
mlog(ML_ERROR, "asked to read %d blocks!\n", nr);
|
||||||
|
status = -EINVAL;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nr == 0) {
|
||||||
|
mlog(ML_BH_IO, "No buffers will be read!\n");
|
||||||
|
status = 0;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
sb = osb->sb;
|
||||||
|
|
||||||
|
if (flags & OCFS2_BH_CACHED && !inode)
|
||||||
|
flags &= ~OCFS2_BH_CACHED;
|
||||||
|
|
||||||
|
if (inode)
|
||||||
|
down(&OCFS2_I(inode)->ip_io_sem);
|
||||||
|
for (i = 0 ; i < nr ; i++) {
|
||||||
|
if (bhs[i] == NULL) {
|
||||||
|
bhs[i] = sb_getblk(sb, block++);
|
||||||
|
if (bhs[i] == NULL) {
|
||||||
|
if (inode)
|
||||||
|
up(&OCFS2_I(inode)->ip_io_sem);
|
||||||
|
status = -EIO;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bh = bhs[i];
|
||||||
|
ignore_cache = 0;
|
||||||
|
|
||||||
|
if (flags & OCFS2_BH_CACHED &&
|
||||||
|
!ocfs2_buffer_uptodate(inode, bh)) {
|
||||||
|
mlog(ML_UPTODATE,
|
||||||
|
"bh (%llu), inode %"MLFu64" not uptodate\n",
|
||||||
|
(unsigned long long)bh->b_blocknr,
|
||||||
|
OCFS2_I(inode)->ip_blkno);
|
||||||
|
ignore_cache = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* XXX: Can we ever get this and *not* have the cached
|
||||||
|
* flag set? */
|
||||||
|
if (buffer_jbd(bh)) {
|
||||||
|
if (!(flags & OCFS2_BH_CACHED) || ignore_cache)
|
||||||
|
mlog(ML_BH_IO, "trying to sync read a jbd "
|
||||||
|
"managed bh (blocknr = %llu)\n",
|
||||||
|
(unsigned long long)bh->b_blocknr);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(flags & OCFS2_BH_CACHED) || ignore_cache) {
|
||||||
|
if (buffer_dirty(bh)) {
|
||||||
|
/* This should probably be a BUG, or
|
||||||
|
* at least return an error. */
|
||||||
|
mlog(ML_BH_IO, "asking me to sync read a dirty "
|
||||||
|
"buffer! (blocknr = %llu)\n",
|
||||||
|
(unsigned long long)bh->b_blocknr);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
lock_buffer(bh);
|
||||||
|
if (buffer_jbd(bh)) {
|
||||||
|
#ifdef CATCH_BH_JBD_RACES
|
||||||
|
mlog(ML_ERROR, "block %llu had the JBD bit set "
|
||||||
|
"while I was in lock_buffer!",
|
||||||
|
(unsigned long long)bh->b_blocknr);
|
||||||
|
BUG();
|
||||||
|
#else
|
||||||
|
unlock_buffer(bh);
|
||||||
|
continue;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
clear_buffer_uptodate(bh);
|
||||||
|
get_bh(bh); /* for end_buffer_read_sync() */
|
||||||
|
bh->b_end_io = end_buffer_read_sync;
|
||||||
|
if (flags & OCFS2_BH_READAHEAD)
|
||||||
|
submit_bh(READA, bh);
|
||||||
|
else
|
||||||
|
submit_bh(READ, bh);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
status = 0;
|
||||||
|
|
||||||
|
for (i = (nr - 1); i >= 0; i--) {
|
||||||
|
bh = bhs[i];
|
||||||
|
|
||||||
|
/* We know this can't have changed as we hold the
|
||||||
|
* inode sem. Avoid doing any work on the bh if the
|
||||||
|
* journal has it. */
|
||||||
|
if (!buffer_jbd(bh))
|
||||||
|
wait_on_buffer(bh);
|
||||||
|
|
||||||
|
if (!buffer_uptodate(bh)) {
|
||||||
|
/* Status won't be cleared from here on out,
|
||||||
|
* so we can safely record this and loop back
|
||||||
|
* to cleanup the other buffers. Don't need to
|
||||||
|
* remove the clustered uptodate information
|
||||||
|
* for this bh as it's not marked locally
|
||||||
|
* uptodate. */
|
||||||
|
status = -EIO;
|
||||||
|
brelse(bh);
|
||||||
|
bhs[i] = NULL;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inode)
|
||||||
|
ocfs2_set_buffer_uptodate(inode, bh);
|
||||||
|
}
|
||||||
|
if (inode)
|
||||||
|
up(&OCFS2_I(inode)->ip_io_sem);
|
||||||
|
|
||||||
|
mlog(ML_BH_IO, "block=(%"MLFu64"), nr=(%d), cached=%s\n", block, nr,
|
||||||
|
(!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
|
||||||
|
|
||||||
|
bail:
|
||||||
|
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
|
@ -0,0 +1,73 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* ocfs2_buffer_head.h
|
||||||
|
*
|
||||||
|
* Buffer cache handling functions defined
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_BUFFER_HEAD_IO_H
|
||||||
|
#define OCFS2_BUFFER_HEAD_IO_H
|
||||||
|
|
||||||
|
#include <linux/buffer_head.h>
|
||||||
|
|
||||||
|
void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
|
||||||
|
int uptodate);
|
||||||
|
|
||||||
|
static inline int ocfs2_read_block(struct ocfs2_super *osb,
|
||||||
|
u64 off,
|
||||||
|
struct buffer_head **bh,
|
||||||
|
int flags,
|
||||||
|
struct inode *inode);
|
||||||
|
|
||||||
|
int ocfs2_write_block(struct ocfs2_super *osb,
|
||||||
|
struct buffer_head *bh,
|
||||||
|
struct inode *inode);
|
||||||
|
int ocfs2_read_blocks(struct ocfs2_super *osb,
|
||||||
|
u64 block,
|
||||||
|
int nr,
|
||||||
|
struct buffer_head *bhs[],
|
||||||
|
int flags,
|
||||||
|
struct inode *inode);
|
||||||
|
|
||||||
|
|
||||||
|
#define OCFS2_BH_CACHED 1
|
||||||
|
#define OCFS2_BH_READAHEAD 8 /* use this to pass READA down to submit_bh */
|
||||||
|
|
||||||
|
static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
|
||||||
|
struct buffer_head **bh, int flags,
|
||||||
|
struct inode *inode)
|
||||||
|
{
|
||||||
|
int status = 0;
|
||||||
|
|
||||||
|
if (bh == NULL) {
|
||||||
|
printk("ocfs2: bh == NULL\n");
|
||||||
|
status = -EINVAL;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ocfs2_read_blocks(osb, off, 1, bh,
|
||||||
|
flags, inode);
|
||||||
|
|
||||||
|
bail:
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* OCFS2_BUFFER_HEAD_IO_H */
|
|
@ -0,0 +1,91 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* dcache.c
|
||||||
|
*
|
||||||
|
* dentry cache handling code
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/namei.h>
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_DCACHE
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "alloc.h"
|
||||||
|
#include "dcache.h"
|
||||||
|
#include "file.h"
|
||||||
|
#include "inode.h"
|
||||||
|
|
||||||
|
static int ocfs2_dentry_revalidate(struct dentry *dentry,
|
||||||
|
struct nameidata *nd)
|
||||||
|
{
|
||||||
|
struct inode *inode = dentry->d_inode;
|
||||||
|
int ret = 0; /* if all else fails, just return false */
|
||||||
|
struct ocfs2_super *osb;
|
||||||
|
|
||||||
|
mlog_entry("(0x%p, '%.*s')\n", dentry,
|
||||||
|
dentry->d_name.len, dentry->d_name.name);
|
||||||
|
|
||||||
|
/* Never trust a negative dentry - force a new lookup. */
|
||||||
|
if (inode == NULL) {
|
||||||
|
mlog(0, "negative dentry: %.*s\n", dentry->d_name.len,
|
||||||
|
dentry->d_name.name);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
osb = OCFS2_SB(inode->i_sb);
|
||||||
|
|
||||||
|
BUG_ON(!osb);
|
||||||
|
|
||||||
|
if (inode != osb->root_inode) {
|
||||||
|
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
/* did we or someone else delete this inode? */
|
||||||
|
if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
|
||||||
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
mlog(0, "inode (%"MLFu64") deleted, returning false\n",
|
||||||
|
OCFS2_I(inode)->ip_blkno);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
|
||||||
|
if (!inode->i_nlink) {
|
||||||
|
mlog(0, "Inode %"MLFu64" orphaned, returning false "
|
||||||
|
"dir = %d\n", OCFS2_I(inode)->ip_blkno,
|
||||||
|
S_ISDIR(inode->i_mode));
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 1;
|
||||||
|
|
||||||
|
bail:
|
||||||
|
mlog_exit(ret);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct dentry_operations ocfs2_dentry_ops = {
|
||||||
|
.d_revalidate = ocfs2_dentry_revalidate,
|
||||||
|
};
|
|
@ -0,0 +1,31 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* dcache.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_DCACHE_H
|
||||||
|
#define OCFS2_DCACHE_H
|
||||||
|
|
||||||
|
extern struct dentry_operations ocfs2_dentry_ops;
|
||||||
|
|
||||||
|
#endif /* OCFS2_DCACHE_H */
|
|
@ -0,0 +1,618 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* dir.c
|
||||||
|
*
|
||||||
|
* Creates, reads, walks and deletes directory-nodes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* Portions of this code from linux/fs/ext3/dir.c
|
||||||
|
*
|
||||||
|
* Copyright (C) 1992, 1993, 1994, 1995
|
||||||
|
* Remy Card (card@masi.ibp.fr)
|
||||||
|
* Laboratoire MASI - Institut Blaise pascal
|
||||||
|
* Universite Pierre et Marie Curie (Paris VI)
|
||||||
|
*
|
||||||
|
* from
|
||||||
|
*
|
||||||
|
* linux/fs/minix/dir.c
|
||||||
|
*
|
||||||
|
* Copyright (C) 1991, 1992 Linux Torvalds
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/highmem.h>
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_NAMEI
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "alloc.h"
|
||||||
|
#include "dir.h"
|
||||||
|
#include "dlmglue.h"
|
||||||
|
#include "extent_map.h"
|
||||||
|
#include "file.h"
|
||||||
|
#include "inode.h"
|
||||||
|
#include "journal.h"
|
||||||
|
#include "namei.h"
|
||||||
|
#include "suballoc.h"
|
||||||
|
#include "uptodate.h"
|
||||||
|
|
||||||
|
#include "buffer_head_io.h"
|
||||||
|
|
||||||
|
static unsigned char ocfs2_filetype_table[] = {
|
||||||
|
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
|
||||||
|
};
|
||||||
|
|
||||||
|
static int ocfs2_extend_dir(struct ocfs2_super *osb,
|
||||||
|
struct inode *dir,
|
||||||
|
struct buffer_head *parent_fe_bh,
|
||||||
|
struct buffer_head **new_de_bh);
|
||||||
|
/*
|
||||||
|
* ocfs2_readdir()
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
|
||||||
|
{
|
||||||
|
int error = 0;
|
||||||
|
unsigned long offset, blk;
|
||||||
|
int i, num, stored;
|
||||||
|
struct buffer_head * bh, * tmp;
|
||||||
|
struct ocfs2_dir_entry * de;
|
||||||
|
int err;
|
||||||
|
struct inode *inode = filp->f_dentry->d_inode;
|
||||||
|
struct super_block * sb = inode->i_sb;
|
||||||
|
int have_disk_lock = 0;
|
||||||
|
|
||||||
|
mlog_entry("dirino=%"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
|
||||||
|
|
||||||
|
stored = 0;
|
||||||
|
bh = NULL;
|
||||||
|
|
||||||
|
error = ocfs2_meta_lock(inode, NULL, NULL, 0);
|
||||||
|
if (error < 0) {
|
||||||
|
if (error != -ENOENT)
|
||||||
|
mlog_errno(error);
|
||||||
|
/* we haven't got any yet, so propagate the error. */
|
||||||
|
stored = error;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
have_disk_lock = 1;
|
||||||
|
|
||||||
|
offset = filp->f_pos & (sb->s_blocksize - 1);
|
||||||
|
|
||||||
|
while (!error && !stored && filp->f_pos < i_size_read(inode)) {
|
||||||
|
blk = (filp->f_pos) >> sb->s_blocksize_bits;
|
||||||
|
bh = ocfs2_bread(inode, blk, &err, 0);
|
||||||
|
if (!bh) {
|
||||||
|
mlog(ML_ERROR, "directory #%"MLFu64" contains a hole "
|
||||||
|
"at offset %lld\n",
|
||||||
|
OCFS2_I(inode)->ip_blkno,
|
||||||
|
filp->f_pos);
|
||||||
|
filp->f_pos += sb->s_blocksize - offset;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do the readahead (8k)
|
||||||
|
*/
|
||||||
|
if (!offset) {
|
||||||
|
for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
|
||||||
|
i > 0; i--) {
|
||||||
|
tmp = ocfs2_bread(inode, ++blk, &err, 1);
|
||||||
|
if (tmp)
|
||||||
|
brelse(tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
revalidate:
|
||||||
|
/* If the dir block has changed since the last call to
|
||||||
|
* readdir(2), then we might be pointing to an invalid
|
||||||
|
* dirent right now. Scan from the start of the block
|
||||||
|
* to make sure. */
|
||||||
|
if (filp->f_version != inode->i_version) {
|
||||||
|
for (i = 0; i < sb->s_blocksize && i < offset; ) {
|
||||||
|
de = (struct ocfs2_dir_entry *) (bh->b_data + i);
|
||||||
|
/* It's too expensive to do a full
|
||||||
|
* dirent test each time round this
|
||||||
|
* loop, but we do have to test at
|
||||||
|
* least that it is non-zero. A
|
||||||
|
* failure will be detected in the
|
||||||
|
* dirent test below. */
|
||||||
|
if (le16_to_cpu(de->rec_len) <
|
||||||
|
OCFS2_DIR_REC_LEN(1))
|
||||||
|
break;
|
||||||
|
i += le16_to_cpu(de->rec_len);
|
||||||
|
}
|
||||||
|
offset = i;
|
||||||
|
filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
|
||||||
|
| offset;
|
||||||
|
filp->f_version = inode->i_version;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!error && filp->f_pos < i_size_read(inode)
|
||||||
|
&& offset < sb->s_blocksize) {
|
||||||
|
de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
|
||||||
|
if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
|
||||||
|
/* On error, skip the f_pos to the
|
||||||
|
next block. */
|
||||||
|
filp->f_pos = (filp->f_pos |
|
||||||
|
(sb->s_blocksize - 1)) + 1;
|
||||||
|
brelse(bh);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
offset += le16_to_cpu(de->rec_len);
|
||||||
|
if (le64_to_cpu(de->inode)) {
|
||||||
|
/* We might block in the next section
|
||||||
|
* if the data destination is
|
||||||
|
* currently swapped out. So, use a
|
||||||
|
* version stamp to detect whether or
|
||||||
|
* not the directory has been modified
|
||||||
|
* during the copy operation.
|
||||||
|
*/
|
||||||
|
unsigned long version = filp->f_version;
|
||||||
|
unsigned char d_type = DT_UNKNOWN;
|
||||||
|
|
||||||
|
if (de->file_type < OCFS2_FT_MAX)
|
||||||
|
d_type = ocfs2_filetype_table[de->file_type];
|
||||||
|
error = filldir(dirent, de->name,
|
||||||
|
de->name_len,
|
||||||
|
filp->f_pos,
|
||||||
|
ino_from_blkno(sb, le64_to_cpu(de->inode)),
|
||||||
|
d_type);
|
||||||
|
if (error)
|
||||||
|
break;
|
||||||
|
if (version != filp->f_version)
|
||||||
|
goto revalidate;
|
||||||
|
stored ++;
|
||||||
|
}
|
||||||
|
filp->f_pos += le16_to_cpu(de->rec_len);
|
||||||
|
}
|
||||||
|
offset = 0;
|
||||||
|
brelse(bh);
|
||||||
|
}
|
||||||
|
|
||||||
|
stored = 0;
|
||||||
|
bail:
|
||||||
|
if (have_disk_lock)
|
||||||
|
ocfs2_meta_unlock(inode, 0);
|
||||||
|
|
||||||
|
mlog_exit(stored);
|
||||||
|
|
||||||
|
return stored;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NOTE: this should always be called with parent dir i_sem taken.
|
||||||
|
*/
|
||||||
|
int ocfs2_find_files_on_disk(const char *name,
|
||||||
|
int namelen,
|
||||||
|
u64 *blkno,
|
||||||
|
struct inode *inode,
|
||||||
|
struct buffer_head **dirent_bh,
|
||||||
|
struct ocfs2_dir_entry **dirent)
|
||||||
|
{
|
||||||
|
int status = -ENOENT;
|
||||||
|
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||||
|
|
||||||
|
mlog_entry("(osb=%p, parent=%"MLFu64", name='%.*s', blkno=%p, "
|
||||||
|
"inode=%p)\n",
|
||||||
|
osb, OCFS2_I(inode)->ip_blkno, namelen, name, blkno, inode);
|
||||||
|
|
||||||
|
*dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent);
|
||||||
|
if (!*dirent_bh || !*dirent) {
|
||||||
|
status = -ENOENT;
|
||||||
|
goto leave;
|
||||||
|
}
|
||||||
|
|
||||||
|
*blkno = le64_to_cpu((*dirent)->inode);
|
||||||
|
|
||||||
|
status = 0;
|
||||||
|
leave:
|
||||||
|
if (status < 0) {
|
||||||
|
*dirent = NULL;
|
||||||
|
if (*dirent_bh) {
|
||||||
|
brelse(*dirent_bh);
|
||||||
|
*dirent_bh = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for a name within a directory.
|
||||||
|
*
|
||||||
|
* Return 0 if the name does not exist
|
||||||
|
* Return -EEXIST if the directory contains the name
|
||||||
|
*
|
||||||
|
* Callers should have i_sem + a cluster lock on dir
|
||||||
|
*/
|
||||||
|
int ocfs2_check_dir_for_entry(struct inode *dir,
|
||||||
|
const char *name,
|
||||||
|
int namelen)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct buffer_head *dirent_bh = NULL;
|
||||||
|
struct ocfs2_dir_entry *dirent = NULL;
|
||||||
|
|
||||||
|
mlog_entry("dir %"MLFu64", name '%.*s'\n", OCFS2_I(dir)->ip_blkno,
|
||||||
|
namelen, name);
|
||||||
|
|
||||||
|
ret = -EEXIST;
|
||||||
|
dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent);
|
||||||
|
if (dirent_bh)
|
||||||
|
goto bail;
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
bail:
|
||||||
|
if (dirent_bh)
|
||||||
|
brelse(dirent_bh);
|
||||||
|
|
||||||
|
mlog_exit(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* routine to check that the specified directory is empty (for rmdir)
|
||||||
|
*/
|
||||||
|
int ocfs2_empty_dir(struct inode *inode)
|
||||||
|
{
|
||||||
|
unsigned long offset;
|
||||||
|
struct buffer_head * bh;
|
||||||
|
struct ocfs2_dir_entry * de, * de1;
|
||||||
|
struct super_block * sb;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
sb = inode->i_sb;
|
||||||
|
if ((i_size_read(inode) <
|
||||||
|
(OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) ||
|
||||||
|
!(bh = ocfs2_bread(inode, 0, &err, 0))) {
|
||||||
|
mlog(ML_ERROR, "bad directory (dir #%"MLFu64") - "
|
||||||
|
"no data block\n",
|
||||||
|
OCFS2_I(inode)->ip_blkno);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
de = (struct ocfs2_dir_entry *) bh->b_data;
|
||||||
|
de1 = (struct ocfs2_dir_entry *)
|
||||||
|
((char *)de + le16_to_cpu(de->rec_len));
|
||||||
|
if ((le64_to_cpu(de->inode) != OCFS2_I(inode)->ip_blkno) ||
|
||||||
|
!le64_to_cpu(de1->inode) ||
|
||||||
|
strcmp(".", de->name) ||
|
||||||
|
strcmp("..", de1->name)) {
|
||||||
|
mlog(ML_ERROR, "bad directory (dir #%"MLFu64") - "
|
||||||
|
"no `.' or `..'\n",
|
||||||
|
OCFS2_I(inode)->ip_blkno);
|
||||||
|
brelse(bh);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
|
||||||
|
de = (struct ocfs2_dir_entry *)((char *)de1 + le16_to_cpu(de1->rec_len));
|
||||||
|
while (offset < i_size_read(inode) ) {
|
||||||
|
if (!bh || (void *)de >= (void *)(bh->b_data + sb->s_blocksize)) {
|
||||||
|
brelse(bh);
|
||||||
|
bh = ocfs2_bread(inode,
|
||||||
|
offset >> sb->s_blocksize_bits, &err, 0);
|
||||||
|
if (!bh) {
|
||||||
|
mlog(ML_ERROR, "directory #%"MLFu64" contains "
|
||||||
|
"a hole at offset %lu\n",
|
||||||
|
OCFS2_I(inode)->ip_blkno, offset);
|
||||||
|
offset += sb->s_blocksize;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
de = (struct ocfs2_dir_entry *) bh->b_data;
|
||||||
|
}
|
||||||
|
if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
|
||||||
|
brelse(bh);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (le64_to_cpu(de->inode)) {
|
||||||
|
brelse(bh);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
offset += le16_to_cpu(de->rec_len);
|
||||||
|
de = (struct ocfs2_dir_entry *)
|
||||||
|
((char *)de + le16_to_cpu(de->rec_len));
|
||||||
|
}
|
||||||
|
brelse(bh);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* returns a bh of the 1st new block in the allocation. */
|
||||||
|
int ocfs2_do_extend_dir(struct super_block *sb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *dir,
|
||||||
|
struct buffer_head *parent_fe_bh,
|
||||||
|
struct ocfs2_alloc_context *data_ac,
|
||||||
|
struct ocfs2_alloc_context *meta_ac,
|
||||||
|
struct buffer_head **new_bh)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
int extend;
|
||||||
|
u64 p_blkno;
|
||||||
|
|
||||||
|
spin_lock(&OCFS2_I(dir)->ip_lock);
|
||||||
|
extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters));
|
||||||
|
spin_unlock(&OCFS2_I(dir)->ip_lock);
|
||||||
|
|
||||||
|
if (extend) {
|
||||||
|
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, 1,
|
||||||
|
parent_fe_bh, handle,
|
||||||
|
data_ac, meta_ac, NULL);
|
||||||
|
BUG_ON(status == -EAGAIN);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ocfs2_extent_map_get_blocks(dir, (dir->i_blocks >>
|
||||||
|
(sb->s_blocksize_bits - 9)),
|
||||||
|
1, &p_blkno, NULL);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
*new_bh = sb_getblk(sb, p_blkno);
|
||||||
|
if (!*new_bh) {
|
||||||
|
status = -EIO;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
status = 0;
|
||||||
|
bail:
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* assumes you already have a cluster lock on the directory. */
|
||||||
|
static int ocfs2_extend_dir(struct ocfs2_super *osb,
|
||||||
|
struct inode *dir,
|
||||||
|
struct buffer_head *parent_fe_bh,
|
||||||
|
struct buffer_head **new_de_bh)
|
||||||
|
{
|
||||||
|
int status = 0;
|
||||||
|
int credits, num_free_extents;
|
||||||
|
loff_t dir_i_size;
|
||||||
|
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
|
||||||
|
struct ocfs2_alloc_context *data_ac = NULL;
|
||||||
|
struct ocfs2_alloc_context *meta_ac = NULL;
|
||||||
|
struct ocfs2_journal_handle *handle = NULL;
|
||||||
|
struct buffer_head *new_bh = NULL;
|
||||||
|
struct ocfs2_dir_entry * de;
|
||||||
|
struct super_block *sb = osb->sb;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
dir_i_size = i_size_read(dir);
|
||||||
|
mlog(0, "extending dir %"MLFu64" (i_size = %lld)\n",
|
||||||
|
OCFS2_I(dir)->ip_blkno, dir_i_size);
|
||||||
|
|
||||||
|
handle = ocfs2_alloc_handle(osb);
|
||||||
|
if (handle == NULL) {
|
||||||
|
status = -ENOMEM;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* dir->i_size is always block aligned. */
|
||||||
|
spin_lock(&OCFS2_I(dir)->ip_lock);
|
||||||
|
if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
|
||||||
|
spin_unlock(&OCFS2_I(dir)->ip_lock);
|
||||||
|
num_free_extents = ocfs2_num_free_extents(osb, dir, fe);
|
||||||
|
if (num_free_extents < 0) {
|
||||||
|
status = num_free_extents;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!num_free_extents) {
|
||||||
|
status = ocfs2_reserve_new_metadata(osb, handle,
|
||||||
|
fe, &meta_ac);
|
||||||
|
if (status < 0) {
|
||||||
|
if (status != -ENOSPC)
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
|
||||||
|
if (status < 0) {
|
||||||
|
if (status != -ENOSPC)
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
credits = ocfs2_calc_extend_credits(sb, fe, 1);
|
||||||
|
} else {
|
||||||
|
spin_unlock(&OCFS2_I(dir)->ip_lock);
|
||||||
|
credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
|
||||||
|
}
|
||||||
|
|
||||||
|
handle = ocfs2_start_trans(osb, handle, credits);
|
||||||
|
if (IS_ERR(handle)) {
|
||||||
|
status = PTR_ERR(handle);
|
||||||
|
handle = NULL;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ocfs2_do_extend_dir(osb->sb, handle, dir, parent_fe_bh,
|
||||||
|
data_ac, meta_ac, &new_bh);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
ocfs2_set_new_buffer_uptodate(dir, new_bh);
|
||||||
|
|
||||||
|
status = ocfs2_journal_access(handle, dir, new_bh,
|
||||||
|
OCFS2_JOURNAL_ACCESS_CREATE);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
memset(new_bh->b_data, 0, sb->s_blocksize);
|
||||||
|
de = (struct ocfs2_dir_entry *) new_bh->b_data;
|
||||||
|
de->inode = 0;
|
||||||
|
de->rec_len = cpu_to_le16(sb->s_blocksize);
|
||||||
|
status = ocfs2_journal_dirty(handle, new_bh);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
dir_i_size += dir->i_sb->s_blocksize;
|
||||||
|
i_size_write(dir, dir_i_size);
|
||||||
|
dir->i_blocks = ocfs2_align_bytes_to_sectors(dir_i_size);
|
||||||
|
status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
*new_de_bh = new_bh;
|
||||||
|
get_bh(*new_de_bh);
|
||||||
|
bail:
|
||||||
|
if (handle)
|
||||||
|
ocfs2_commit_trans(handle);
|
||||||
|
|
||||||
|
if (data_ac)
|
||||||
|
ocfs2_free_alloc_context(data_ac);
|
||||||
|
if (meta_ac)
|
||||||
|
ocfs2_free_alloc_context(meta_ac);
|
||||||
|
|
||||||
|
if (new_bh)
|
||||||
|
brelse(new_bh);
|
||||||
|
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Search the dir for a good spot, extending it if necessary. The
|
||||||
|
* block containing an appropriate record is returned in ret_de_bh.
|
||||||
|
*/
|
||||||
|
int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
|
||||||
|
struct inode *dir,
|
||||||
|
struct buffer_head *parent_fe_bh,
|
||||||
|
const char *name,
|
||||||
|
int namelen,
|
||||||
|
struct buffer_head **ret_de_bh)
|
||||||
|
{
|
||||||
|
unsigned long offset;
|
||||||
|
struct buffer_head * bh = NULL;
|
||||||
|
unsigned short rec_len;
|
||||||
|
struct ocfs2_dinode *fe;
|
||||||
|
struct ocfs2_dir_entry *de;
|
||||||
|
struct super_block *sb;
|
||||||
|
int status;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
mlog(0, "getting ready to insert namelen %d into dir %"MLFu64"\n",
|
||||||
|
namelen, OCFS2_I(dir)->ip_blkno);
|
||||||
|
|
||||||
|
BUG_ON(!S_ISDIR(dir->i_mode));
|
||||||
|
fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
|
||||||
|
BUG_ON(le64_to_cpu(fe->i_size) != i_size_read(dir));
|
||||||
|
|
||||||
|
sb = dir->i_sb;
|
||||||
|
|
||||||
|
if (!namelen) {
|
||||||
|
status = -EINVAL;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
bh = ocfs2_bread(dir, 0, &status, 0);
|
||||||
|
if (!bh) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
rec_len = OCFS2_DIR_REC_LEN(namelen);
|
||||||
|
offset = 0;
|
||||||
|
de = (struct ocfs2_dir_entry *) bh->b_data;
|
||||||
|
while (1) {
|
||||||
|
if ((char *)de >= sb->s_blocksize + bh->b_data) {
|
||||||
|
brelse(bh);
|
||||||
|
bh = NULL;
|
||||||
|
|
||||||
|
if (i_size_read(dir) <= offset) {
|
||||||
|
status = ocfs2_extend_dir(osb,
|
||||||
|
dir,
|
||||||
|
parent_fe_bh,
|
||||||
|
&bh);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
BUG_ON(!bh);
|
||||||
|
*ret_de_bh = bh;
|
||||||
|
get_bh(*ret_de_bh);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
bh = ocfs2_bread(dir,
|
||||||
|
offset >> sb->s_blocksize_bits,
|
||||||
|
&status,
|
||||||
|
0);
|
||||||
|
if (!bh) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
/* move to next block */
|
||||||
|
de = (struct ocfs2_dir_entry *) bh->b_data;
|
||||||
|
}
|
||||||
|
if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
|
||||||
|
status = -ENOENT;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
if (ocfs2_match(namelen, name, de)) {
|
||||||
|
status = -EEXIST;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
if (((le64_to_cpu(de->inode) == 0) &&
|
||||||
|
(le16_to_cpu(de->rec_len) >= rec_len)) ||
|
||||||
|
(le16_to_cpu(de->rec_len) >=
|
||||||
|
(OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
|
||||||
|
/* Ok, we found a spot. Return this bh and let
|
||||||
|
* the caller actually fill it in. */
|
||||||
|
*ret_de_bh = bh;
|
||||||
|
get_bh(*ret_de_bh);
|
||||||
|
status = 0;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
offset += le16_to_cpu(de->rec_len);
|
||||||
|
de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
|
||||||
|
}
|
||||||
|
|
||||||
|
status = 0;
|
||||||
|
bail:
|
||||||
|
if (bh)
|
||||||
|
brelse(bh);
|
||||||
|
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* dir.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_DIR_H
|
||||||
|
#define OCFS2_DIR_H
|
||||||
|
|
||||||
|
int ocfs2_check_dir_for_entry(struct inode *dir,
|
||||||
|
const char *name,
|
||||||
|
int namelen);
|
||||||
|
int ocfs2_empty_dir(struct inode *inode); /* FIXME: to namei.c */
|
||||||
|
int ocfs2_find_files_on_disk(const char *name,
|
||||||
|
int namelen,
|
||||||
|
u64 *blkno,
|
||||||
|
struct inode *inode,
|
||||||
|
struct buffer_head **dirent_bh,
|
||||||
|
struct ocfs2_dir_entry **dirent);
|
||||||
|
int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir);
|
||||||
|
int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
|
||||||
|
struct inode *dir,
|
||||||
|
struct buffer_head *parent_fe_bh,
|
||||||
|
const char *name,
|
||||||
|
int namelen,
|
||||||
|
struct buffer_head **ret_de_bh);
|
||||||
|
struct ocfs2_alloc_context;
|
||||||
|
int ocfs2_do_extend_dir(struct super_block *sb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *dir,
|
||||||
|
struct buffer_head *parent_fe_bh,
|
||||||
|
struct ocfs2_alloc_context *data_ac,
|
||||||
|
struct ocfs2_alloc_context *meta_ac,
|
||||||
|
struct buffer_head **new_bh);
|
||||||
|
#endif /* OCFS2_DIR_H */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,111 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* dlmglue.h
|
||||||
|
*
|
||||||
|
* description here
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef DLMGLUE_H
|
||||||
|
#define DLMGLUE_H
|
||||||
|
|
||||||
|
#define OCFS2_LVB_VERSION 2
|
||||||
|
|
||||||
|
struct ocfs2_meta_lvb {
|
||||||
|
__be32 lvb_version;
|
||||||
|
__be32 lvb_iclusters;
|
||||||
|
__be32 lvb_iuid;
|
||||||
|
__be32 lvb_igid;
|
||||||
|
__be64 lvb_iatime_packed;
|
||||||
|
__be64 lvb_ictime_packed;
|
||||||
|
__be64 lvb_imtime_packed;
|
||||||
|
__be64 lvb_isize;
|
||||||
|
__be16 lvb_imode;
|
||||||
|
__be16 lvb_inlink;
|
||||||
|
__be32 lvb_reserved[3];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
|
||||||
|
/* don't wait on recovery. */
|
||||||
|
#define OCFS2_META_LOCK_RECOVERY (0x01)
|
||||||
|
/* Instruct the dlm not to queue ourselves on the other node. */
|
||||||
|
#define OCFS2_META_LOCK_NOQUEUE (0x02)
|
||||||
|
/* don't block waiting for the vote thread, instead return -EAGAIN */
|
||||||
|
#define OCFS2_LOCK_NONBLOCK (0x04)
|
||||||
|
|
||||||
|
int ocfs2_dlm_init(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_dlm_shutdown(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
|
||||||
|
void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
|
||||||
|
enum ocfs2_lock_type type,
|
||||||
|
struct inode *inode);
|
||||||
|
void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
|
||||||
|
int ocfs2_create_new_inode_locks(struct inode *inode);
|
||||||
|
int ocfs2_drop_inode_locks(struct inode *inode);
|
||||||
|
int ocfs2_data_lock_full(struct inode *inode,
|
||||||
|
int write,
|
||||||
|
int arg_flags);
|
||||||
|
#define ocfs2_data_lock(inode, write) ocfs2_data_lock_full(inode, write, 0)
|
||||||
|
int ocfs2_data_lock_with_page(struct inode *inode,
|
||||||
|
int write,
|
||||||
|
struct page *page);
|
||||||
|
void ocfs2_data_unlock(struct inode *inode,
|
||||||
|
int write);
|
||||||
|
int ocfs2_rw_lock(struct inode *inode, int write);
|
||||||
|
void ocfs2_rw_unlock(struct inode *inode, int write);
|
||||||
|
int ocfs2_meta_lock_full(struct inode *inode,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct buffer_head **ret_bh,
|
||||||
|
int ex,
|
||||||
|
int arg_flags);
|
||||||
|
int ocfs2_meta_lock_with_page(struct inode *inode,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct buffer_head **ret_bh,
|
||||||
|
int ex,
|
||||||
|
struct page *page);
|
||||||
|
/* 99% of the time we don't want to supply any additional flags --
|
||||||
|
* those are for very specific cases only. */
|
||||||
|
#define ocfs2_meta_lock(i, h, b, e) ocfs2_meta_lock_full(i, h, b, e, 0)
|
||||||
|
void ocfs2_meta_unlock(struct inode *inode,
|
||||||
|
int ex);
|
||||||
|
int ocfs2_super_lock(struct ocfs2_super *osb,
|
||||||
|
int ex);
|
||||||
|
void ocfs2_super_unlock(struct ocfs2_super *osb,
|
||||||
|
int ex);
|
||||||
|
int ocfs2_rename_lock(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_rename_unlock(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
|
||||||
|
|
||||||
|
/* for the vote thread */
|
||||||
|
void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_lock_res *lockres);
|
||||||
|
|
||||||
|
struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void);
|
||||||
|
void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);
|
||||||
|
|
||||||
|
/* aids in debugging and tracking lvbs */
|
||||||
|
void ocfs2_dump_meta_lvb_info(u64 level,
|
||||||
|
const char *function,
|
||||||
|
unsigned int line,
|
||||||
|
struct ocfs2_lock_res *lockres);
|
||||||
|
#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
|
||||||
|
|
||||||
|
#endif /* DLMGLUE_H */
|
|
@ -0,0 +1,45 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* Copyright (C) 2005 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_ENDIAN_H
|
||||||
|
#define OCFS2_ENDIAN_H
|
||||||
|
|
||||||
|
static inline void le16_add_cpu(__le16 *var, u16 val)
|
||||||
|
{
|
||||||
|
*var = cpu_to_le16(le16_to_cpu(*var) + val);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void le32_add_cpu(__le32 *var, u32 val)
|
||||||
|
{
|
||||||
|
*var = cpu_to_le32(le32_to_cpu(*var) + val);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void le32_and_cpu(__le32 *var, u32 val)
|
||||||
|
{
|
||||||
|
*var = cpu_to_le32(le32_to_cpu(*var) & val);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void be32_add_cpu(__be32 *var, u32 val)
|
||||||
|
{
|
||||||
|
*var = cpu_to_be32(be32_to_cpu(*var) + val);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* OCFS2_ENDIAN_H */
|
|
@ -0,0 +1,248 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* export.c
|
||||||
|
*
|
||||||
|
* Functions to facilitate NFS exporting
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2005 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_EXPORT
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "dir.h"
|
||||||
|
#include "dlmglue.h"
|
||||||
|
#include "export.h"
|
||||||
|
#include "inode.h"
|
||||||
|
|
||||||
|
#include "buffer_head_io.h"
|
||||||
|
|
||||||
|
struct ocfs2_inode_handle
|
||||||
|
{
|
||||||
|
u64 ih_blkno;
|
||||||
|
u32 ih_generation;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
|
||||||
|
{
|
||||||
|
struct ocfs2_inode_handle *handle = vobjp;
|
||||||
|
struct inode *inode;
|
||||||
|
struct dentry *result;
|
||||||
|
|
||||||
|
mlog_entry("(0x%p, 0x%p)\n", sb, handle);
|
||||||
|
|
||||||
|
if (handle->ih_blkno == 0) {
|
||||||
|
mlog_errno(-ESTALE);
|
||||||
|
return ERR_PTR(-ESTALE);
|
||||||
|
}
|
||||||
|
|
||||||
|
inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno);
|
||||||
|
|
||||||
|
if (IS_ERR(inode)) {
|
||||||
|
mlog_errno(PTR_ERR(inode));
|
||||||
|
return (void *)inode;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (handle->ih_generation != inode->i_generation) {
|
||||||
|
iput(inode);
|
||||||
|
mlog_errno(-ESTALE);
|
||||||
|
return ERR_PTR(-ESTALE);
|
||||||
|
}
|
||||||
|
|
||||||
|
result = d_alloc_anon(inode);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
iput(inode);
|
||||||
|
mlog_errno(-ENOMEM);
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
}
|
||||||
|
|
||||||
|
mlog_exit_ptr(result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct dentry *ocfs2_get_parent(struct dentry *child)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
u64 blkno;
|
||||||
|
struct dentry *parent;
|
||||||
|
struct inode *inode;
|
||||||
|
struct inode *dir = child->d_inode;
|
||||||
|
struct buffer_head *dirent_bh = NULL;
|
||||||
|
struct ocfs2_dir_entry *dirent;
|
||||||
|
|
||||||
|
mlog_entry("(0x%p, '%.*s')\n", child,
|
||||||
|
child->d_name.len, child->d_name.name);
|
||||||
|
|
||||||
|
mlog(0, "find parent of directory %"MLFu64"\n",
|
||||||
|
OCFS2_I(dir)->ip_blkno);
|
||||||
|
|
||||||
|
status = ocfs2_meta_lock(dir, NULL, NULL, 0);
|
||||||
|
if (status < 0) {
|
||||||
|
if (status != -ENOENT)
|
||||||
|
mlog_errno(status);
|
||||||
|
parent = ERR_PTR(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ocfs2_find_files_on_disk("..", 2, &blkno, dir, &dirent_bh,
|
||||||
|
&dirent);
|
||||||
|
if (status < 0) {
|
||||||
|
parent = ERR_PTR(-ENOENT);
|
||||||
|
goto bail_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
|
||||||
|
if (IS_ERR(inode)) {
|
||||||
|
mlog(ML_ERROR, "Unable to create inode %"MLFu64"\n", blkno);
|
||||||
|
parent = ERR_PTR(-EACCES);
|
||||||
|
goto bail_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
parent = d_alloc_anon(inode);
|
||||||
|
if (!parent) {
|
||||||
|
iput(inode);
|
||||||
|
parent = ERR_PTR(-ENOMEM);
|
||||||
|
}
|
||||||
|
|
||||||
|
bail_unlock:
|
||||||
|
ocfs2_meta_unlock(dir, 0);
|
||||||
|
|
||||||
|
if (dirent_bh)
|
||||||
|
brelse(dirent_bh);
|
||||||
|
|
||||||
|
bail:
|
||||||
|
mlog_exit_ptr(parent);
|
||||||
|
|
||||||
|
return parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ocfs2_encode_fh(struct dentry *dentry, __be32 *fh, int *max_len,
|
||||||
|
int connectable)
|
||||||
|
{
|
||||||
|
struct inode *inode = dentry->d_inode;
|
||||||
|
int len = *max_len;
|
||||||
|
int type = 1;
|
||||||
|
u64 blkno;
|
||||||
|
u32 generation;
|
||||||
|
|
||||||
|
mlog_entry("(0x%p, '%.*s', 0x%p, %d, %d)\n", dentry,
|
||||||
|
dentry->d_name.len, dentry->d_name.name,
|
||||||
|
fh, len, connectable);
|
||||||
|
|
||||||
|
if (len < 3 || (connectable && len < 6)) {
|
||||||
|
mlog(ML_ERROR, "fh buffer is too small for encoding\n");
|
||||||
|
type = 255;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
blkno = OCFS2_I(inode)->ip_blkno;
|
||||||
|
generation = inode->i_generation;
|
||||||
|
|
||||||
|
mlog(0, "Encoding fh: blkno: %"MLFu64", generation: %u\n",
|
||||||
|
blkno, generation);
|
||||||
|
|
||||||
|
len = 3;
|
||||||
|
fh[0] = cpu_to_le32((u32)(blkno >> 32));
|
||||||
|
fh[1] = cpu_to_le32((u32)(blkno & 0xffffffff));
|
||||||
|
fh[2] = cpu_to_le32(generation);
|
||||||
|
|
||||||
|
if (connectable && !S_ISDIR(inode->i_mode)) {
|
||||||
|
struct inode *parent;
|
||||||
|
|
||||||
|
spin_lock(&dentry->d_lock);
|
||||||
|
|
||||||
|
parent = dentry->d_parent->d_inode;
|
||||||
|
blkno = OCFS2_I(parent)->ip_blkno;
|
||||||
|
generation = parent->i_generation;
|
||||||
|
|
||||||
|
fh[3] = cpu_to_le32((u32)(blkno >> 32));
|
||||||
|
fh[4] = cpu_to_le32((u32)(blkno & 0xffffffff));
|
||||||
|
fh[5] = cpu_to_le32(generation);
|
||||||
|
|
||||||
|
spin_unlock(&dentry->d_lock);
|
||||||
|
|
||||||
|
len = 6;
|
||||||
|
type = 2;
|
||||||
|
|
||||||
|
mlog(0, "Encoding parent: blkno: %"MLFu64", generation: %u\n",
|
||||||
|
blkno, generation);
|
||||||
|
}
|
||||||
|
|
||||||
|
*max_len = len;
|
||||||
|
|
||||||
|
bail:
|
||||||
|
mlog_exit(type);
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct dentry *ocfs2_decode_fh(struct super_block *sb, __be32 *fh,
|
||||||
|
int fh_len, int fileid_type,
|
||||||
|
int (*acceptable)(void *context,
|
||||||
|
struct dentry *de),
|
||||||
|
void *context)
|
||||||
|
{
|
||||||
|
struct ocfs2_inode_handle handle, parent;
|
||||||
|
struct dentry *ret = NULL;
|
||||||
|
|
||||||
|
mlog_entry("(0x%p, 0x%p, %d, %d, 0x%p, 0x%p)\n",
|
||||||
|
sb, fh, fh_len, fileid_type, acceptable, context);
|
||||||
|
|
||||||
|
if (fh_len < 3 || fileid_type > 2)
|
||||||
|
goto bail;
|
||||||
|
|
||||||
|
if (fileid_type == 2) {
|
||||||
|
if (fh_len < 6)
|
||||||
|
goto bail;
|
||||||
|
|
||||||
|
parent.ih_blkno = (u64)le32_to_cpu(fh[3]) << 32;
|
||||||
|
parent.ih_blkno |= (u64)le32_to_cpu(fh[4]);
|
||||||
|
parent.ih_generation = le32_to_cpu(fh[5]);
|
||||||
|
|
||||||
|
mlog(0, "Decoding parent: blkno: %"MLFu64", generation: %u\n",
|
||||||
|
parent.ih_blkno, parent.ih_generation);
|
||||||
|
}
|
||||||
|
|
||||||
|
handle.ih_blkno = (u64)le32_to_cpu(fh[0]) << 32;
|
||||||
|
handle.ih_blkno |= (u64)le32_to_cpu(fh[1]);
|
||||||
|
handle.ih_generation = le32_to_cpu(fh[2]);
|
||||||
|
|
||||||
|
mlog(0, "Encoding fh: blkno: %"MLFu64", generation: %u\n",
|
||||||
|
handle.ih_blkno, handle.ih_generation);
|
||||||
|
|
||||||
|
ret = ocfs2_export_ops.find_exported_dentry(sb, &handle, &parent,
|
||||||
|
acceptable, context);
|
||||||
|
|
||||||
|
bail:
|
||||||
|
mlog_exit_ptr(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct export_operations ocfs2_export_ops = {
|
||||||
|
.decode_fh = ocfs2_decode_fh,
|
||||||
|
.encode_fh = ocfs2_encode_fh,
|
||||||
|
|
||||||
|
.get_parent = ocfs2_get_parent,
|
||||||
|
.get_dentry = ocfs2_get_dentry,
|
||||||
|
};
|
|
@ -0,0 +1,31 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* export.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2005 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_EXPORT_H
|
||||||
|
#define OCFS2_EXPORT_H
|
||||||
|
|
||||||
|
extern struct export_operations ocfs2_export_ops;
|
||||||
|
|
||||||
|
#endif /* OCFS2_EXPORT_H */
|
|
@ -0,0 +1,994 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* extent_map.c
|
||||||
|
*
|
||||||
|
* In-memory extent map for OCFS2. Man, this code was prettier in
|
||||||
|
* the library.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License, version 2, as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/init.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/rbtree.h>
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_EXTENT_MAP
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "extent_map.h"
|
||||||
|
#include "inode.h"
|
||||||
|
#include "super.h"
|
||||||
|
|
||||||
|
#include "buffer_head_io.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SUCK SUCK SUCK
|
||||||
|
* Our headers are so bad that struct ocfs2_extent_map is in ocfs.h
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct ocfs2_extent_map_entry {
|
||||||
|
struct rb_node e_node;
|
||||||
|
int e_tree_depth;
|
||||||
|
struct ocfs2_extent_rec e_rec;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ocfs2_em_insert_context {
|
||||||
|
int need_left;
|
||||||
|
int need_right;
|
||||||
|
struct ocfs2_extent_map_entry *new_ent;
|
||||||
|
struct ocfs2_extent_map_entry *old_ent;
|
||||||
|
struct ocfs2_extent_map_entry *left_ent;
|
||||||
|
struct ocfs2_extent_map_entry *right_ent;
|
||||||
|
};
|
||||||
|
|
||||||
|
static kmem_cache_t *ocfs2_em_ent_cachep = NULL;
|
||||||
|
|
||||||
|
|
||||||
|
static struct ocfs2_extent_map_entry *
|
||||||
|
ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
|
||||||
|
u32 cpos, u32 clusters,
|
||||||
|
struct rb_node ***ret_p,
|
||||||
|
struct rb_node **ret_parent);
|
||||||
|
static int ocfs2_extent_map_insert(struct inode *inode,
|
||||||
|
struct ocfs2_extent_rec *rec,
|
||||||
|
int tree_depth);
|
||||||
|
static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em,
|
||||||
|
struct ocfs2_extent_map_entry *ent);
|
||||||
|
static int ocfs2_extent_map_find_leaf(struct inode *inode,
|
||||||
|
u32 cpos, u32 clusters,
|
||||||
|
struct ocfs2_extent_list *el);
|
||||||
|
static int ocfs2_extent_map_lookup_read(struct inode *inode,
|
||||||
|
u32 cpos, u32 clusters,
|
||||||
|
struct ocfs2_extent_map_entry **ret_ent);
|
||||||
|
static int ocfs2_extent_map_try_insert(struct inode *inode,
|
||||||
|
struct ocfs2_extent_rec *rec,
|
||||||
|
int tree_depth,
|
||||||
|
struct ocfs2_em_insert_context *ctxt);
|
||||||
|
|
||||||
|
/* returns 1 only if the rec contains all the given clusters -- that is that
|
||||||
|
* rec's cpos is <= the cluster cpos and that the rec endpoint (cpos +
|
||||||
|
* clusters) is >= the argument's endpoint */
|
||||||
|
static int ocfs2_extent_rec_contains_clusters(struct ocfs2_extent_rec *rec,
|
||||||
|
u32 cpos, u32 clusters)
|
||||||
|
{
|
||||||
|
if (le32_to_cpu(rec->e_cpos) > cpos)
|
||||||
|
return 0;
|
||||||
|
if (cpos + clusters > le32_to_cpu(rec->e_cpos) +
|
||||||
|
le32_to_cpu(rec->e_clusters))
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find an entry in the tree that intersects the region passed in.
|
||||||
|
* Note that this will find straddled intervals, it is up to the
|
||||||
|
* callers to enforce any boundary conditions.
|
||||||
|
*
|
||||||
|
* Callers must hold ip_lock. This lookup is not guaranteed to return
|
||||||
|
* a tree_depth 0 match, and as such can race inserts if the lock
|
||||||
|
* were not held.
|
||||||
|
*
|
||||||
|
* The rb_node garbage lets insertion share the search. Trivial
|
||||||
|
* callers pass NULL.
|
||||||
|
*/
|
||||||
|
static struct ocfs2_extent_map_entry *
|
||||||
|
ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
|
||||||
|
u32 cpos, u32 clusters,
|
||||||
|
struct rb_node ***ret_p,
|
||||||
|
struct rb_node **ret_parent)
|
||||||
|
{
|
||||||
|
struct rb_node **p = &em->em_extents.rb_node;
|
||||||
|
struct rb_node *parent = NULL;
|
||||||
|
struct ocfs2_extent_map_entry *ent = NULL;
|
||||||
|
|
||||||
|
while (*p)
|
||||||
|
{
|
||||||
|
parent = *p;
|
||||||
|
ent = rb_entry(parent, struct ocfs2_extent_map_entry,
|
||||||
|
e_node);
|
||||||
|
if ((cpos + clusters) <= le32_to_cpu(ent->e_rec.e_cpos)) {
|
||||||
|
p = &(*p)->rb_left;
|
||||||
|
ent = NULL;
|
||||||
|
} else if (cpos >= (le32_to_cpu(ent->e_rec.e_cpos) +
|
||||||
|
le32_to_cpu(ent->e_rec.e_clusters))) {
|
||||||
|
p = &(*p)->rb_right;
|
||||||
|
ent = NULL;
|
||||||
|
} else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret_p != NULL)
|
||||||
|
*ret_p = p;
|
||||||
|
if (ret_parent != NULL)
|
||||||
|
*ret_parent = parent;
|
||||||
|
return ent;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find the leaf containing the interval we want. While we're on our
|
||||||
|
* way down the tree, fill in every record we see at any depth, because
|
||||||
|
* we might want it later.
|
||||||
|
*
|
||||||
|
* Note that this code is run without ip_lock. That's because it
|
||||||
|
* sleeps while reading. If someone is also filling the extent list at
|
||||||
|
* the same time we are, we might have to restart.
|
||||||
|
*/
|
||||||
|
static int ocfs2_extent_map_find_leaf(struct inode *inode,
|
||||||
|
u32 cpos, u32 clusters,
|
||||||
|
struct ocfs2_extent_list *el)
|
||||||
|
{
|
||||||
|
int i, ret;
|
||||||
|
struct buffer_head *eb_bh = NULL;
|
||||||
|
u64 blkno;
|
||||||
|
u32 rec_end;
|
||||||
|
struct ocfs2_extent_block *eb;
|
||||||
|
struct ocfs2_extent_rec *rec;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The bh data containing the el cannot change here, because
|
||||||
|
* we hold alloc_sem. So we can do this without other
|
||||||
|
* locks.
|
||||||
|
*/
|
||||||
|
while (el->l_tree_depth)
|
||||||
|
{
|
||||||
|
blkno = 0;
|
||||||
|
for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
|
||||||
|
rec = &el->l_recs[i];
|
||||||
|
rec_end = (le32_to_cpu(rec->e_cpos) +
|
||||||
|
le32_to_cpu(rec->e_clusters));
|
||||||
|
|
||||||
|
ret = -EBADR;
|
||||||
|
if (rec_end > OCFS2_I(inode)->ip_clusters) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rec_end <= cpos) {
|
||||||
|
ret = ocfs2_extent_map_insert(inode, rec,
|
||||||
|
le16_to_cpu(el->l_tree_depth));
|
||||||
|
if (ret && (ret != -EEXIST)) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ((cpos + clusters) <= le32_to_cpu(rec->e_cpos)) {
|
||||||
|
ret = ocfs2_extent_map_insert(inode, rec,
|
||||||
|
le16_to_cpu(el->l_tree_depth));
|
||||||
|
if (ret && (ret != -EEXIST)) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We've found a record that matches our
|
||||||
|
* interval. We don't insert it because we're
|
||||||
|
* about to traverse it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Check to see if we're stradling */
|
||||||
|
ret = -ESRCH;
|
||||||
|
if (!ocfs2_extent_rec_contains_clusters(rec,
|
||||||
|
cpos,
|
||||||
|
clusters)) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we've already found a record, the el has
|
||||||
|
* two records covering the same interval.
|
||||||
|
* EEEK!
|
||||||
|
*/
|
||||||
|
ret = -EBADR;
|
||||||
|
if (blkno) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
|
||||||
|
blkno = le64_to_cpu(rec->e_blkno);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't support holes, and we're still up
|
||||||
|
* in the branches, so we'd better have found someone
|
||||||
|
*/
|
||||||
|
ret = -EBADR;
|
||||||
|
if (!blkno) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (eb_bh) {
|
||||||
|
brelse(eb_bh);
|
||||||
|
eb_bh = NULL;
|
||||||
|
}
|
||||||
|
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
|
||||||
|
blkno, &eb_bh, OCFS2_BH_CACHED,
|
||||||
|
inode);
|
||||||
|
if (ret) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
eb = (struct ocfs2_extent_block *)eb_bh->b_data;
|
||||||
|
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
|
||||||
|
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
|
||||||
|
ret = -EIO;
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
el = &eb->h_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (el->l_tree_depth)
|
||||||
|
BUG();
|
||||||
|
|
||||||
|
for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
|
||||||
|
rec = &el->l_recs[i];
|
||||||
|
ret = ocfs2_extent_map_insert(inode, rec,
|
||||||
|
le16_to_cpu(el->l_tree_depth));
|
||||||
|
if (ret) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
|
out_free:
|
||||||
|
if (eb_bh)
|
||||||
|
brelse(eb_bh);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This lookup actually will read from disk. It has one invariant:
|
||||||
|
* It will never re-traverse blocks. This means that all inserts should
|
||||||
|
* be new regions or more granular regions (both allowed by insert).
|
||||||
|
*/
|
||||||
|
static int ocfs2_extent_map_lookup_read(struct inode *inode,
|
||||||
|
u32 cpos,
|
||||||
|
u32 clusters,
|
||||||
|
struct ocfs2_extent_map_entry **ret_ent)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
u64 blkno;
|
||||||
|
struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
|
||||||
|
struct ocfs2_extent_map_entry *ent;
|
||||||
|
struct buffer_head *bh = NULL;
|
||||||
|
struct ocfs2_extent_block *eb;
|
||||||
|
struct ocfs2_dinode *di;
|
||||||
|
struct ocfs2_extent_list *el;
|
||||||
|
|
||||||
|
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);
|
||||||
|
if (ent) {
|
||||||
|
if (!ent->e_tree_depth) {
|
||||||
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
*ret_ent = ent;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
blkno = le64_to_cpu(ent->e_rec.e_blkno);
|
||||||
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
|
||||||
|
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, &bh,
|
||||||
|
OCFS2_BH_CACHED, inode);
|
||||||
|
if (ret) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
if (bh)
|
||||||
|
brelse(bh);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
eb = (struct ocfs2_extent_block *)bh->b_data;
|
||||||
|
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
|
||||||
|
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
|
||||||
|
brelse(bh);
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
el = &eb->h_list;
|
||||||
|
} else {
|
||||||
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
|
||||||
|
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
|
||||||
|
OCFS2_I(inode)->ip_blkno, &bh,
|
||||||
|
OCFS2_BH_CACHED, inode);
|
||||||
|
if (ret) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
if (bh)
|
||||||
|
brelse(bh);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
di = (struct ocfs2_dinode *)bh->b_data;
|
||||||
|
if (!OCFS2_IS_VALID_DINODE(di)) {
|
||||||
|
brelse(bh);
|
||||||
|
OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, di);
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
el = &di->id2.i_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ocfs2_extent_map_find_leaf(inode, cpos, clusters, el);
|
||||||
|
brelse(bh);
|
||||||
|
if (ret) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);
|
||||||
|
if (!ent) {
|
||||||
|
ret = -ESRCH;
|
||||||
|
mlog_errno(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ent->e_tree_depth)
|
||||||
|
BUG(); /* FIXME: Make sure this isn't a corruption */
|
||||||
|
|
||||||
|
*ret_ent = ent;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Callers must hold ip_lock. This can insert pieces of the tree,
|
||||||
|
* thus racing lookup if the lock weren't held.
|
||||||
|
*/
|
||||||
|
static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em,
|
||||||
|
struct ocfs2_extent_map_entry *ent)
|
||||||
|
{
|
||||||
|
struct rb_node **p, *parent;
|
||||||
|
struct ocfs2_extent_map_entry *old_ent;
|
||||||
|
|
||||||
|
old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(ent->e_rec.e_cpos),
|
||||||
|
le32_to_cpu(ent->e_rec.e_clusters),
|
||||||
|
&p, &parent);
|
||||||
|
if (old_ent)
|
||||||
|
return -EEXIST;
|
||||||
|
|
||||||
|
rb_link_node(&ent->e_node, parent, p);
|
||||||
|
rb_insert_color(&ent->e_node, &em->em_extents);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Simple rule: on any return code other than -EAGAIN, anything left
|
||||||
|
* in the insert_context will be freed.
|
||||||
|
*/
|
||||||
|
static int ocfs2_extent_map_try_insert(struct inode *inode,
|
||||||
|
struct ocfs2_extent_rec *rec,
|
||||||
|
int tree_depth,
|
||||||
|
struct ocfs2_em_insert_context *ctxt)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
|
||||||
|
struct ocfs2_extent_map_entry *old_ent;
|
||||||
|
|
||||||
|
ctxt->need_left = 0;
|
||||||
|
ctxt->need_right = 0;
|
||||||
|
ctxt->old_ent = NULL;
|
||||||
|
|
||||||
|
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
ret = ocfs2_extent_map_insert_entry(em, ctxt->new_ent);
|
||||||
|
if (!ret) {
|
||||||
|
ctxt->new_ent = NULL;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos),
|
||||||
|
le32_to_cpu(rec->e_clusters), NULL,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if (!old_ent)
|
||||||
|
BUG();
|
||||||
|
|
||||||
|
ret = -EEXIST;
|
||||||
|
if (old_ent->e_tree_depth < tree_depth)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
if (old_ent->e_tree_depth == tree_depth) {
|
||||||
|
if (!memcmp(rec, &old_ent->e_rec,
|
||||||
|
sizeof(struct ocfs2_extent_rec)))
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
|
/* FIXME: Should this be ESRCH/EBADR??? */
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We do it in this order specifically so that no actual tree
|
||||||
|
* changes occur until we have all the pieces we need. We
|
||||||
|
* don't want malloc failures to leave an inconsistent tree.
|
||||||
|
* Whenever we drop the lock, another process could be
|
||||||
|
* inserting. Also note that, if another process just beat us
|
||||||
|
* to an insert, we might not need the same pieces we needed
|
||||||
|
* the first go round. In the end, the pieces we need will
|
||||||
|
* be used, and the pieces we don't will be freed.
|
||||||
|
*/
|
||||||
|
ctxt->need_left = !!(le32_to_cpu(rec->e_cpos) >
|
||||||
|
le32_to_cpu(old_ent->e_rec.e_cpos));
|
||||||
|
ctxt->need_right = !!((le32_to_cpu(old_ent->e_rec.e_cpos) +
|
||||||
|
le32_to_cpu(old_ent->e_rec.e_clusters)) >
|
||||||
|
(le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)));
|
||||||
|
ret = -EAGAIN;
|
||||||
|
if (ctxt->need_left) {
|
||||||
|
if (!ctxt->left_ent)
|
||||||
|
goto out_unlock;
|
||||||
|
*(ctxt->left_ent) = *old_ent;
|
||||||
|
ctxt->left_ent->e_rec.e_clusters =
|
||||||
|
cpu_to_le32(le32_to_cpu(rec->e_cpos) -
|
||||||
|
le32_to_cpu(ctxt->left_ent->e_rec.e_cpos));
|
||||||
|
}
|
||||||
|
if (ctxt->need_right) {
|
||||||
|
if (!ctxt->right_ent)
|
||||||
|
goto out_unlock;
|
||||||
|
*(ctxt->right_ent) = *old_ent;
|
||||||
|
ctxt->right_ent->e_rec.e_cpos =
|
||||||
|
cpu_to_le32(le32_to_cpu(rec->e_cpos) +
|
||||||
|
le32_to_cpu(rec->e_clusters));
|
||||||
|
ctxt->right_ent->e_rec.e_clusters =
|
||||||
|
cpu_to_le32((le32_to_cpu(old_ent->e_rec.e_cpos) +
|
||||||
|
le32_to_cpu(old_ent->e_rec.e_clusters)) -
|
||||||
|
le32_to_cpu(ctxt->right_ent->e_rec.e_cpos));
|
||||||
|
}
|
||||||
|
|
||||||
|
rb_erase(&old_ent->e_node, &em->em_extents);
|
||||||
|
/* Now that he's erased, set him up for deletion */
|
||||||
|
ctxt->old_ent = old_ent;
|
||||||
|
|
||||||
|
if (ctxt->need_left) {
|
||||||
|
ret = ocfs2_extent_map_insert_entry(em,
|
||||||
|
ctxt->left_ent);
|
||||||
|
if (ret)
|
||||||
|
goto out_unlock;
|
||||||
|
ctxt->left_ent = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctxt->need_right) {
|
||||||
|
ret = ocfs2_extent_map_insert_entry(em,
|
||||||
|
ctxt->right_ent);
|
||||||
|
if (ret)
|
||||||
|
goto out_unlock;
|
||||||
|
ctxt->right_ent = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ocfs2_extent_map_insert_entry(em, ctxt->new_ent);
|
||||||
|
|
||||||
|
if (!ret)
|
||||||
|
ctxt->new_ent = NULL;
|
||||||
|
|
||||||
|
out_unlock:
|
||||||
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int ocfs2_extent_map_insert(struct inode *inode,
|
||||||
|
struct ocfs2_extent_rec *rec,
|
||||||
|
int tree_depth)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct ocfs2_em_insert_context ctxt = {0, };
|
||||||
|
|
||||||
|
if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) >
|
||||||
|
OCFS2_I(inode)->ip_map.em_clusters) {
|
||||||
|
ret = -EBADR;
|
||||||
|
mlog_errno(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Zero e_clusters means a truncated tail record. It better be EOF */
|
||||||
|
if (!rec->e_clusters) {
|
||||||
|
if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) !=
|
||||||
|
OCFS2_I(inode)->ip_map.em_clusters) {
|
||||||
|
ret = -EBADR;
|
||||||
|
mlog_errno(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ignore the truncated tail */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = -ENOMEM;
|
||||||
|
ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep,
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!ctxt.new_ent) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctxt.new_ent->e_rec = *rec;
|
||||||
|
ctxt.new_ent->e_tree_depth = tree_depth;
|
||||||
|
|
||||||
|
do {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
if (ctxt.need_left && !ctxt.left_ent) {
|
||||||
|
ctxt.left_ent =
|
||||||
|
kmem_cache_alloc(ocfs2_em_ent_cachep,
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!ctxt.left_ent)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (ctxt.need_right && !ctxt.right_ent) {
|
||||||
|
ctxt.right_ent =
|
||||||
|
kmem_cache_alloc(ocfs2_em_ent_cachep,
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!ctxt.right_ent)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ocfs2_extent_map_try_insert(inode, rec,
|
||||||
|
tree_depth, &ctxt);
|
||||||
|
} while (ret == -EAGAIN);
|
||||||
|
|
||||||
|
if (ret < 0)
|
||||||
|
mlog_errno(ret);
|
||||||
|
|
||||||
|
if (ctxt.left_ent)
|
||||||
|
kmem_cache_free(ocfs2_em_ent_cachep, ctxt.left_ent);
|
||||||
|
if (ctxt.right_ent)
|
||||||
|
kmem_cache_free(ocfs2_em_ent_cachep, ctxt.right_ent);
|
||||||
|
if (ctxt.old_ent)
|
||||||
|
kmem_cache_free(ocfs2_em_ent_cachep, ctxt.old_ent);
|
||||||
|
if (ctxt.new_ent)
|
||||||
|
kmem_cache_free(ocfs2_em_ent_cachep, ctxt.new_ent);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Append this record to the tail of the extent map. It must be
|
||||||
|
* tree_depth 0. The record might be an extension of an existing
|
||||||
|
* record, and as such that needs to be handled. eg:
|
||||||
|
*
|
||||||
|
* Existing record in the extent map:
|
||||||
|
*
|
||||||
|
* cpos = 10, len = 10
|
||||||
|
* |---------|
|
||||||
|
*
|
||||||
|
* New Record:
|
||||||
|
*
|
||||||
|
* cpos = 10, len = 20
|
||||||
|
* |------------------|
|
||||||
|
*
|
||||||
|
* The passed record is the new on-disk record. The new_clusters value
|
||||||
|
* is how many clusters were added to the file. If the append is a
|
||||||
|
* contiguous append, the new_clusters has been added to
|
||||||
|
* rec->e_clusters. If the append is an entirely new extent, then
|
||||||
|
* rec->e_clusters is == new_clusters.
|
||||||
|
*/
|
||||||
|
int ocfs2_extent_map_append(struct inode *inode,
|
||||||
|
struct ocfs2_extent_rec *rec,
|
||||||
|
u32 new_clusters)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
|
||||||
|
struct ocfs2_extent_map_entry *ent;
|
||||||
|
struct ocfs2_extent_rec *old;
|
||||||
|
|
||||||
|
BUG_ON(!new_clusters);
|
||||||
|
BUG_ON(le32_to_cpu(rec->e_clusters) < new_clusters);
|
||||||
|
|
||||||
|
if (em->em_clusters < OCFS2_I(inode)->ip_clusters) {
|
||||||
|
/*
|
||||||
|
* Size changed underneath us on disk. Drop any
|
||||||
|
* straddling records and update our idea of
|
||||||
|
* i_clusters
|
||||||
|
*/
|
||||||
|
ocfs2_extent_map_drop(inode, em->em_clusters - 1);
|
||||||
|
em->em_clusters = OCFS2_I(inode)->ip_clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
mlog_bug_on_msg((le32_to_cpu(rec->e_cpos) +
|
||||||
|
le32_to_cpu(rec->e_clusters)) !=
|
||||||
|
(em->em_clusters + new_clusters),
|
||||||
|
"Inode %"MLFu64":\n"
|
||||||
|
"rec->e_cpos = %u + rec->e_clusters = %u = %u\n"
|
||||||
|
"em->em_clusters = %u + new_clusters = %u = %u\n",
|
||||||
|
OCFS2_I(inode)->ip_blkno,
|
||||||
|
le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters),
|
||||||
|
le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters),
|
||||||
|
em->em_clusters, new_clusters,
|
||||||
|
em->em_clusters + new_clusters);
|
||||||
|
|
||||||
|
em->em_clusters += new_clusters;
|
||||||
|
|
||||||
|
ret = -ENOENT;
|
||||||
|
if (le32_to_cpu(rec->e_clusters) > new_clusters) {
|
||||||
|
/* This is a contiguous append */
|
||||||
|
ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos), 1,
|
||||||
|
NULL, NULL);
|
||||||
|
if (ent) {
|
||||||
|
old = &ent->e_rec;
|
||||||
|
BUG_ON((le32_to_cpu(rec->e_cpos) +
|
||||||
|
le32_to_cpu(rec->e_clusters)) !=
|
||||||
|
(le32_to_cpu(old->e_cpos) +
|
||||||
|
le32_to_cpu(old->e_clusters) +
|
||||||
|
new_clusters));
|
||||||
|
if (ent->e_tree_depth == 0) {
|
||||||
|
BUG_ON(le32_to_cpu(old->e_cpos) !=
|
||||||
|
le32_to_cpu(rec->e_cpos));
|
||||||
|
BUG_ON(le64_to_cpu(old->e_blkno) !=
|
||||||
|
le64_to_cpu(rec->e_blkno));
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Let non-leafs fall through as -ENOENT to
|
||||||
|
* force insertion of the new leaf.
|
||||||
|
*/
|
||||||
|
le32_add_cpu(&old->e_clusters, new_clusters);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret == -ENOENT)
|
||||||
|
ret = ocfs2_extent_map_insert(inode, rec, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
mlog_errno(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* Code here is included but defined out as it completes the extent
|
||||||
|
* map api and may be used in the future. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look up the record containing this cluster offset. This record is
|
||||||
|
* part of the extent map. Do not free it. Any changes you make to
|
||||||
|
* it will reflect in the extent map. So, if your last extent
|
||||||
|
* is (cpos = 10, clusters = 10) and you truncate the file by 5
|
||||||
|
* clusters, you can do:
|
||||||
|
*
|
||||||
|
* ret = ocfs2_extent_map_get_rec(em, orig_size - 5, &rec);
|
||||||
|
* rec->e_clusters -= 5;
|
||||||
|
*
|
||||||
|
* The lookup does not read from disk. If the map isn't filled in for
|
||||||
|
* an entry, you won't find it.
|
||||||
|
*
|
||||||
|
* Also note that the returned record is valid until alloc_sem is
|
||||||
|
* dropped. After that, truncate and extend can happen. Caveat Emptor.
|
||||||
|
*/
|
||||||
|
int ocfs2_extent_map_get_rec(struct inode *inode, u32 cpos,
|
||||||
|
struct ocfs2_extent_rec **rec,
|
||||||
|
int *tree_depth)
|
||||||
|
{
|
||||||
|
int ret = -ENOENT;
|
||||||
|
struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
|
||||||
|
struct ocfs2_extent_map_entry *ent;
|
||||||
|
|
||||||
|
*rec = NULL;
|
||||||
|
|
||||||
|
if (cpos >= OCFS2_I(inode)->ip_clusters)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (cpos >= em->em_clusters) {
|
||||||
|
/*
|
||||||
|
* Size changed underneath us on disk. Drop any
|
||||||
|
* straddling records and update our idea of
|
||||||
|
* i_clusters
|
||||||
|
*/
|
||||||
|
ocfs2_extent_map_drop(inode, em->em_clusters - 1);
|
||||||
|
em->em_clusters = OCFS2_I(inode)->ip_clusters ;
|
||||||
|
}
|
||||||
|
|
||||||
|
ent = ocfs2_extent_map_lookup(&OCFS2_I(inode)->ip_map, cpos, 1,
|
||||||
|
NULL, NULL);
|
||||||
|
|
||||||
|
if (ent) {
|
||||||
|
*rec = &ent->e_rec;
|
||||||
|
if (tree_depth)
|
||||||
|
*tree_depth = ent->e_tree_depth;
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_extent_map_get_clusters(struct inode *inode,
|
||||||
|
u32 v_cpos, int count,
|
||||||
|
u32 *p_cpos, int *ret_count)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
u32 coff, ccount;
|
||||||
|
struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
|
||||||
|
struct ocfs2_extent_map_entry *ent = NULL;
|
||||||
|
|
||||||
|
*p_cpos = ccount = 0;
|
||||||
|
|
||||||
|
if ((v_cpos + count) > OCFS2_I(inode)->ip_clusters)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if ((v_cpos + count) > em->em_clusters) {
|
||||||
|
/*
|
||||||
|
* Size changed underneath us on disk. Drop any
|
||||||
|
* straddling records and update our idea of
|
||||||
|
* i_clusters
|
||||||
|
*/
|
||||||
|
ocfs2_extent_map_drop(inode, em->em_clusters - 1);
|
||||||
|
em->em_clusters = OCFS2_I(inode)->ip_clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ret = ocfs2_extent_map_lookup_read(inode, v_cpos, count, &ent);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (ent) {
|
||||||
|
/* We should never find ourselves straddling an interval */
|
||||||
|
if (!ocfs2_extent_rec_contains_clusters(&ent->e_rec,
|
||||||
|
v_cpos,
|
||||||
|
count))
|
||||||
|
return -ESRCH;
|
||||||
|
|
||||||
|
coff = v_cpos - le32_to_cpu(ent->e_rec.e_cpos);
|
||||||
|
*p_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
|
||||||
|
le64_to_cpu(ent->e_rec.e_blkno)) +
|
||||||
|
coff;
|
||||||
|
|
||||||
|
if (ret_count)
|
||||||
|
*ret_count = le32_to_cpu(ent->e_rec.e_clusters) - coff;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return -ENOENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* 0 */
|
||||||
|
|
||||||
|
int ocfs2_extent_map_get_blocks(struct inode *inode,
|
||||||
|
u64 v_blkno, int count,
|
||||||
|
u64 *p_blkno, int *ret_count)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
u64 boff;
|
||||||
|
u32 cpos, clusters;
|
||||||
|
int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
|
||||||
|
struct ocfs2_extent_map_entry *ent = NULL;
|
||||||
|
struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
|
||||||
|
struct ocfs2_extent_rec *rec;
|
||||||
|
|
||||||
|
*p_blkno = 0;
|
||||||
|
|
||||||
|
cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
|
||||||
|
clusters = ocfs2_blocks_to_clusters(inode->i_sb,
|
||||||
|
(u64)count + bpc - 1);
|
||||||
|
if ((cpos + clusters) > OCFS2_I(inode)->ip_clusters) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
mlog_errno(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((cpos + clusters) > em->em_clusters) {
|
||||||
|
/*
|
||||||
|
* Size changed underneath us on disk. Drop any
|
||||||
|
* straddling records and update our idea of
|
||||||
|
* i_clusters
|
||||||
|
*/
|
||||||
|
ocfs2_extent_map_drop(inode, em->em_clusters - 1);
|
||||||
|
em->em_clusters = OCFS2_I(inode)->ip_clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ocfs2_extent_map_lookup_read(inode, cpos, clusters, &ent);
|
||||||
|
if (ret) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ent)
|
||||||
|
{
|
||||||
|
rec = &ent->e_rec;
|
||||||
|
|
||||||
|
/* We should never find ourselves straddling an interval */
|
||||||
|
if (!ocfs2_extent_rec_contains_clusters(rec, cpos, clusters)) {
|
||||||
|
ret = -ESRCH;
|
||||||
|
mlog_errno(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
boff = ocfs2_clusters_to_blocks(inode->i_sb, cpos -
|
||||||
|
le32_to_cpu(rec->e_cpos));
|
||||||
|
boff += (v_blkno & (u64)(bpc - 1));
|
||||||
|
*p_blkno = le64_to_cpu(rec->e_blkno) + boff;
|
||||||
|
|
||||||
|
if (ret_count) {
|
||||||
|
*ret_count = ocfs2_clusters_to_blocks(inode->i_sb,
|
||||||
|
le32_to_cpu(rec->e_clusters)) - boff;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -ENOENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_extent_map_init(struct inode *inode)
|
||||||
|
{
|
||||||
|
struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
|
||||||
|
|
||||||
|
em->em_extents = RB_ROOT;
|
||||||
|
em->em_clusters = 0;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Needs the lock */
|
||||||
|
static void __ocfs2_extent_map_drop(struct inode *inode,
|
||||||
|
u32 new_clusters,
|
||||||
|
struct rb_node **free_head,
|
||||||
|
struct ocfs2_extent_map_entry **tail_ent)
|
||||||
|
{
|
||||||
|
struct rb_node *node, *next;
|
||||||
|
struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
|
||||||
|
struct ocfs2_extent_map_entry *ent;
|
||||||
|
|
||||||
|
*free_head = NULL;
|
||||||
|
|
||||||
|
ent = NULL;
|
||||||
|
node = rb_last(&em->em_extents);
|
||||||
|
while (node)
|
||||||
|
{
|
||||||
|
next = rb_prev(node);
|
||||||
|
|
||||||
|
ent = rb_entry(node, struct ocfs2_extent_map_entry,
|
||||||
|
e_node);
|
||||||
|
if (le32_to_cpu(ent->e_rec.e_cpos) < new_clusters)
|
||||||
|
break;
|
||||||
|
|
||||||
|
rb_erase(&ent->e_node, &em->em_extents);
|
||||||
|
|
||||||
|
node->rb_right = *free_head;
|
||||||
|
*free_head = node;
|
||||||
|
|
||||||
|
ent = NULL;
|
||||||
|
node = next;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do we have an entry straddling new_clusters? */
|
||||||
|
if (tail_ent) {
|
||||||
|
if (ent &&
|
||||||
|
((le32_to_cpu(ent->e_rec.e_cpos) +
|
||||||
|
le32_to_cpu(ent->e_rec.e_clusters)) > new_clusters))
|
||||||
|
*tail_ent = ent;
|
||||||
|
else
|
||||||
|
*tail_ent = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __ocfs2_extent_map_drop_cleanup(struct rb_node *free_head)
|
||||||
|
{
|
||||||
|
struct rb_node *node;
|
||||||
|
struct ocfs2_extent_map_entry *ent;
|
||||||
|
|
||||||
|
while (free_head) {
|
||||||
|
node = free_head;
|
||||||
|
free_head = node->rb_right;
|
||||||
|
|
||||||
|
ent = rb_entry(node, struct ocfs2_extent_map_entry,
|
||||||
|
e_node);
|
||||||
|
kmem_cache_free(ocfs2_em_ent_cachep, ent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove all entries past new_clusters, inclusive of an entry that
|
||||||
|
* contains new_clusters. This is effectively a cache forget.
|
||||||
|
*
|
||||||
|
* If you want to also clip the last extent by some number of clusters,
|
||||||
|
* you need to call ocfs2_extent_map_trunc().
|
||||||
|
* This code does not check or modify ip_clusters.
|
||||||
|
*/
|
||||||
|
int ocfs2_extent_map_drop(struct inode *inode, u32 new_clusters)
|
||||||
|
{
|
||||||
|
struct rb_node *free_head = NULL;
|
||||||
|
struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
|
||||||
|
struct ocfs2_extent_map_entry *ent;
|
||||||
|
|
||||||
|
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
|
||||||
|
__ocfs2_extent_map_drop(inode, new_clusters, &free_head, &ent);
|
||||||
|
|
||||||
|
if (ent) {
|
||||||
|
rb_erase(&ent->e_node, &em->em_extents);
|
||||||
|
ent->e_node.rb_right = free_head;
|
||||||
|
free_head = &ent->e_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
|
||||||
|
if (free_head)
|
||||||
|
__ocfs2_extent_map_drop_cleanup(free_head);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove all entries past new_clusters and also clip any extent
|
||||||
|
* straddling new_clusters, if there is one. This does not check
|
||||||
|
* or modify ip_clusters
|
||||||
|
*/
|
||||||
|
int ocfs2_extent_map_trunc(struct inode *inode, u32 new_clusters)
|
||||||
|
{
|
||||||
|
struct rb_node *free_head = NULL;
|
||||||
|
struct ocfs2_extent_map_entry *ent = NULL;
|
||||||
|
|
||||||
|
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
|
||||||
|
__ocfs2_extent_map_drop(inode, new_clusters, &free_head, &ent);
|
||||||
|
|
||||||
|
if (ent)
|
||||||
|
ent->e_rec.e_clusters = cpu_to_le32(new_clusters -
|
||||||
|
le32_to_cpu(ent->e_rec.e_cpos));
|
||||||
|
|
||||||
|
OCFS2_I(inode)->ip_map.em_clusters = new_clusters;
|
||||||
|
|
||||||
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||||
|
|
||||||
|
if (free_head)
|
||||||
|
__ocfs2_extent_map_drop_cleanup(free_head);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int __init init_ocfs2_extent_maps(void)
|
||||||
|
{
|
||||||
|
ocfs2_em_ent_cachep =
|
||||||
|
kmem_cache_create("ocfs2_em_ent",
|
||||||
|
sizeof(struct ocfs2_extent_map_entry),
|
||||||
|
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
|
||||||
|
if (!ocfs2_em_ent_cachep)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void __exit exit_ocfs2_extent_maps(void)
|
||||||
|
{
|
||||||
|
kmem_cache_destroy(ocfs2_em_ent_cachep);
|
||||||
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* extent_map.h
|
||||||
|
*
|
||||||
|
* In-memory file extent mappings for OCFS2.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License, version 2, as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _EXTENT_MAP_H
|
||||||
|
#define _EXTENT_MAP_H
|
||||||
|
|
||||||
|
int init_ocfs2_extent_maps(void);
|
||||||
|
void exit_ocfs2_extent_maps(void);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* EVERY CALL here except _init, _trunc, and _drop expects alloc_sem
|
||||||
|
* to be held. The allocation cannot change at all while the map is
|
||||||
|
* in the process of being updated.
|
||||||
|
*/
|
||||||
|
int ocfs2_extent_map_init(struct inode *inode);
|
||||||
|
int ocfs2_extent_map_append(struct inode *inode,
|
||||||
|
struct ocfs2_extent_rec *rec,
|
||||||
|
u32 new_clusters);
|
||||||
|
int ocfs2_extent_map_get_blocks(struct inode *inode,
|
||||||
|
u64 v_blkno, int count,
|
||||||
|
u64 *p_blkno, int *ret_count);
|
||||||
|
int ocfs2_extent_map_drop(struct inode *inode, u32 new_clusters);
|
||||||
|
int ocfs2_extent_map_trunc(struct inode *inode, u32 new_clusters);
|
||||||
|
|
||||||
|
#endif /* _EXTENT_MAP_H */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,57 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* file.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_FILE_H
|
||||||
|
#define OCFS2_FILE_H
|
||||||
|
|
||||||
|
extern struct file_operations ocfs2_fops;
|
||||||
|
extern struct file_operations ocfs2_dops;
|
||||||
|
extern struct inode_operations ocfs2_file_iops;
|
||||||
|
extern struct inode_operations ocfs2_special_file_iops;
|
||||||
|
struct ocfs2_alloc_context;
|
||||||
|
|
||||||
|
enum ocfs2_alloc_restarted {
|
||||||
|
RESTART_NONE = 0,
|
||||||
|
RESTART_TRANS,
|
||||||
|
RESTART_META
|
||||||
|
};
|
||||||
|
int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
||||||
|
struct inode *inode,
|
||||||
|
u32 clusters_to_add,
|
||||||
|
struct buffer_head *fe_bh,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_alloc_context *data_ac,
|
||||||
|
struct ocfs2_alloc_context *meta_ac,
|
||||||
|
enum ocfs2_alloc_restarted *reason);
|
||||||
|
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
|
||||||
|
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||||
|
struct kstat *stat);
|
||||||
|
|
||||||
|
int ocfs2_set_inode_size(struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *inode,
|
||||||
|
struct buffer_head *fe_bh,
|
||||||
|
u64 new_i_size);
|
||||||
|
|
||||||
|
#endif /* OCFS2_FILE_H */
|
|
@ -0,0 +1,378 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* heartbeat.c
|
||||||
|
*
|
||||||
|
* Register ourselves with the heartbaet service, keep our node maps
|
||||||
|
* up to date, and fire off recovery when needed.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/highmem.h>
|
||||||
|
#include <linux/kmod.h>
|
||||||
|
|
||||||
|
#include <cluster/heartbeat.h>
|
||||||
|
#include <cluster/nodemanager.h>
|
||||||
|
|
||||||
|
#include <dlm/dlmapi.h>
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_SUPER
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "alloc.h"
|
||||||
|
#include "heartbeat.h"
|
||||||
|
#include "inode.h"
|
||||||
|
#include "journal.h"
|
||||||
|
#include "vote.h"
|
||||||
|
|
||||||
|
#include "buffer_head_io.h"
|
||||||
|
|
||||||
|
#define OCFS2_HB_NODE_DOWN_PRI (0x0000002)
|
||||||
|
#define OCFS2_HB_NODE_UP_PRI OCFS2_HB_NODE_DOWN_PRI
|
||||||
|
|
||||||
|
static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
|
||||||
|
int bit);
|
||||||
|
static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map,
|
||||||
|
int bit);
|
||||||
|
static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map);
|
||||||
|
static void __ocfs2_node_map_dup(struct ocfs2_node_map *target,
|
||||||
|
struct ocfs2_node_map *from);
|
||||||
|
static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
|
||||||
|
struct ocfs2_node_map *from);
|
||||||
|
|
||||||
|
void ocfs2_init_node_maps(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
spin_lock_init(&osb->node_map_lock);
|
||||||
|
ocfs2_node_map_init(&osb->mounted_map);
|
||||||
|
ocfs2_node_map_init(&osb->recovery_map);
|
||||||
|
ocfs2_node_map_init(&osb->umount_map);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ocfs2_do_node_down(int node_num,
|
||||||
|
struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
BUG_ON(osb->node_num == node_num);
|
||||||
|
|
||||||
|
mlog(0, "ocfs2: node down event for %d\n", node_num);
|
||||||
|
|
||||||
|
if (!osb->dlm) {
|
||||||
|
/*
|
||||||
|
* No DLM means we're not even ready to participate yet.
|
||||||
|
* We check the slots after the DLM comes up, so we will
|
||||||
|
* notice the node death then. We can safely ignore it
|
||||||
|
* here.
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ocfs2_node_map_test_bit(osb, &osb->umount_map, node_num)) {
|
||||||
|
/* If a node is in the umount map, then we've been
|
||||||
|
* expecting him to go down and we know ahead of time
|
||||||
|
* that recovery is not necessary. */
|
||||||
|
ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ocfs2_recovery_thread(osb, node_num);
|
||||||
|
|
||||||
|
ocfs2_remove_node_from_vote_queues(osb, node_num);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ocfs2_hb_node_down_cb(struct o2nm_node *node,
|
||||||
|
int node_num,
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
ocfs2_do_node_down(node_num, (struct ocfs2_super *) data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Called from the dlm when it's about to evict a node. We may also
|
||||||
|
* get a heartbeat callback later. */
|
||||||
|
static void ocfs2_dlm_eviction_cb(int node_num,
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
struct ocfs2_super *osb = (struct ocfs2_super *) data;
|
||||||
|
struct super_block *sb = osb->sb;
|
||||||
|
|
||||||
|
mlog(ML_NOTICE, "device (%u,%u): dlm has evicted node %d\n",
|
||||||
|
MAJOR(sb->s_dev), MINOR(sb->s_dev), node_num);
|
||||||
|
|
||||||
|
ocfs2_do_node_down(node_num, osb);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ocfs2_hb_node_up_cb(struct o2nm_node *node,
|
||||||
|
int node_num,
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
struct ocfs2_super *osb = data;
|
||||||
|
|
||||||
|
BUG_ON(osb->node_num == node_num);
|
||||||
|
|
||||||
|
mlog(0, "node up event for %d\n", node_num);
|
||||||
|
ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
o2hb_setup_callback(&osb->osb_hb_down, O2HB_NODE_DOWN_CB,
|
||||||
|
ocfs2_hb_node_down_cb, osb,
|
||||||
|
OCFS2_HB_NODE_DOWN_PRI);
|
||||||
|
|
||||||
|
o2hb_setup_callback(&osb->osb_hb_up, O2HB_NODE_UP_CB,
|
||||||
|
ocfs2_hb_node_up_cb, osb, OCFS2_HB_NODE_UP_PRI);
|
||||||
|
|
||||||
|
/* Not exactly a heartbeat callback, but leads to essentially
|
||||||
|
* the same path so we set it up here. */
|
||||||
|
dlm_setup_eviction_cb(&osb->osb_eviction_cb,
|
||||||
|
ocfs2_dlm_eviction_cb,
|
||||||
|
osb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Most functions here are just stubs for now... */
|
||||||
|
int ocfs2_register_hb_callbacks(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
|
||||||
|
status = o2hb_register_callback(&osb->osb_hb_down);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = o2hb_register_callback(&osb->osb_hb_up);
|
||||||
|
if (status < 0)
|
||||||
|
mlog_errno(status);
|
||||||
|
|
||||||
|
bail:
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
|
||||||
|
status = o2hb_unregister_callback(&osb->osb_hb_down);
|
||||||
|
if (status < 0)
|
||||||
|
mlog_errno(status);
|
||||||
|
|
||||||
|
status = o2hb_unregister_callback(&osb->osb_hb_up);
|
||||||
|
if (status < 0)
|
||||||
|
mlog_errno(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
char *argv[5], *envp[3];
|
||||||
|
|
||||||
|
if (!osb->uuid_str) {
|
||||||
|
/* This can happen if we don't get far enough in mount... */
|
||||||
|
mlog(0, "No UUID with which to stop heartbeat!\n\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
argv[0] = (char *)o2nm_get_hb_ctl_path();
|
||||||
|
argv[1] = "-K";
|
||||||
|
argv[2] = "-u";
|
||||||
|
argv[3] = osb->uuid_str;
|
||||||
|
argv[4] = NULL;
|
||||||
|
|
||||||
|
mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]);
|
||||||
|
|
||||||
|
/* minimal command environment taken from cpu_run_sbin_hotplug */
|
||||||
|
envp[0] = "HOME=/";
|
||||||
|
envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
|
||||||
|
envp[2] = NULL;
|
||||||
|
|
||||||
|
ret = call_usermodehelper(argv[0], argv, envp, 1);
|
||||||
|
if (ret < 0)
|
||||||
|
mlog_errno(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* special case -1 for now
|
||||||
|
* TODO: should *really* make sure the calling func never passes -1!! */
|
||||||
|
void ocfs2_node_map_init(struct ocfs2_node_map *map)
|
||||||
|
{
|
||||||
|
map->num_nodes = OCFS2_NODE_MAP_MAX_NODES;
|
||||||
|
memset(map->map, 0, BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES) *
|
||||||
|
sizeof(unsigned long));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
|
||||||
|
int bit)
|
||||||
|
{
|
||||||
|
set_bit(bit, map->map);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ocfs2_node_map_set_bit(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *map,
|
||||||
|
int bit)
|
||||||
|
{
|
||||||
|
if (bit==-1)
|
||||||
|
return;
|
||||||
|
BUG_ON(bit >= map->num_nodes);
|
||||||
|
spin_lock(&osb->node_map_lock);
|
||||||
|
__ocfs2_node_map_set_bit(map, bit);
|
||||||
|
spin_unlock(&osb->node_map_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map,
|
||||||
|
int bit)
|
||||||
|
{
|
||||||
|
clear_bit(bit, map->map);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ocfs2_node_map_clear_bit(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *map,
|
||||||
|
int bit)
|
||||||
|
{
|
||||||
|
if (bit==-1)
|
||||||
|
return;
|
||||||
|
BUG_ON(bit >= map->num_nodes);
|
||||||
|
spin_lock(&osb->node_map_lock);
|
||||||
|
__ocfs2_node_map_clear_bit(map, bit);
|
||||||
|
spin_unlock(&osb->node_map_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_node_map_test_bit(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *map,
|
||||||
|
int bit)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
if (bit >= map->num_nodes) {
|
||||||
|
mlog(ML_ERROR, "bit=%d map->num_nodes=%d\n", bit, map->num_nodes);
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
spin_lock(&osb->node_map_lock);
|
||||||
|
ret = test_bit(bit, map->map);
|
||||||
|
spin_unlock(&osb->node_map_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map)
|
||||||
|
{
|
||||||
|
int bit;
|
||||||
|
bit = find_next_bit(map->map, map->num_nodes, 0);
|
||||||
|
if (bit < map->num_nodes)
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_node_map_is_empty(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *map)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
BUG_ON(map->num_nodes == 0);
|
||||||
|
spin_lock(&osb->node_map_lock);
|
||||||
|
ret = __ocfs2_node_map_is_empty(map);
|
||||||
|
spin_unlock(&osb->node_map_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __ocfs2_node_map_dup(struct ocfs2_node_map *target,
|
||||||
|
struct ocfs2_node_map *from)
|
||||||
|
{
|
||||||
|
BUG_ON(from->num_nodes == 0);
|
||||||
|
ocfs2_node_map_init(target);
|
||||||
|
__ocfs2_node_map_set(target, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* returns 1 if bit is the only bit set in target, 0 otherwise */
|
||||||
|
int ocfs2_node_map_is_only(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *target,
|
||||||
|
int bit)
|
||||||
|
{
|
||||||
|
struct ocfs2_node_map temp;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
spin_lock(&osb->node_map_lock);
|
||||||
|
__ocfs2_node_map_dup(&temp, target);
|
||||||
|
__ocfs2_node_map_clear_bit(&temp, bit);
|
||||||
|
ret = __ocfs2_node_map_is_empty(&temp);
|
||||||
|
spin_unlock(&osb->node_map_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
|
||||||
|
struct ocfs2_node_map *from)
|
||||||
|
{
|
||||||
|
int num_longs, i;
|
||||||
|
|
||||||
|
BUG_ON(target->num_nodes != from->num_nodes);
|
||||||
|
BUG_ON(target->num_nodes == 0);
|
||||||
|
|
||||||
|
num_longs = BITS_TO_LONGS(target->num_nodes);
|
||||||
|
for (i = 0; i < num_longs; i++)
|
||||||
|
target->map[i] = from->map[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns whether the recovery bit was actually set - it may not be
|
||||||
|
* if a node is still marked as needing recovery */
|
||||||
|
int ocfs2_recovery_map_set(struct ocfs2_super *osb,
|
||||||
|
int num)
|
||||||
|
{
|
||||||
|
int set = 0;
|
||||||
|
|
||||||
|
spin_lock(&osb->node_map_lock);
|
||||||
|
|
||||||
|
__ocfs2_node_map_clear_bit(&osb->mounted_map, num);
|
||||||
|
|
||||||
|
if (!test_bit(num, osb->recovery_map.map)) {
|
||||||
|
__ocfs2_node_map_set_bit(&osb->recovery_map, num);
|
||||||
|
set = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&osb->node_map_lock);
|
||||||
|
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
|
||||||
|
int num)
|
||||||
|
{
|
||||||
|
ocfs2_node_map_clear_bit(osb, &osb->recovery_map, num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_node_map_iterate(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *map,
|
||||||
|
int idx)
|
||||||
|
{
|
||||||
|
int i = idx;
|
||||||
|
|
||||||
|
idx = O2NM_INVALID_NODE_NUM;
|
||||||
|
spin_lock(&osb->node_map_lock);
|
||||||
|
if ((i != O2NM_INVALID_NODE_NUM) &&
|
||||||
|
(i >= 0) &&
|
||||||
|
(i < map->num_nodes)) {
|
||||||
|
while(i < map->num_nodes) {
|
||||||
|
if (test_bit(i, map->map)) {
|
||||||
|
idx = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock(&osb->node_map_lock);
|
||||||
|
return idx;
|
||||||
|
}
|
|
@ -0,0 +1,67 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* heartbeat.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_HEARTBEAT_H
|
||||||
|
#define OCFS2_HEARTBEAT_H
|
||||||
|
|
||||||
|
void ocfs2_init_node_maps(struct ocfs2_super *osb);
|
||||||
|
|
||||||
|
void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb);
|
||||||
|
int ocfs2_register_hb_callbacks(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_stop_heartbeat(struct ocfs2_super *osb);
|
||||||
|
|
||||||
|
/* node map functions - used to keep track of mounted and in-recovery
|
||||||
|
* nodes. */
|
||||||
|
void ocfs2_node_map_init(struct ocfs2_node_map *map);
|
||||||
|
int ocfs2_node_map_is_empty(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *map);
|
||||||
|
void ocfs2_node_map_set_bit(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *map,
|
||||||
|
int bit);
|
||||||
|
void ocfs2_node_map_clear_bit(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *map,
|
||||||
|
int bit);
|
||||||
|
int ocfs2_node_map_test_bit(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *map,
|
||||||
|
int bit);
|
||||||
|
int ocfs2_node_map_iterate(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *map,
|
||||||
|
int idx);
|
||||||
|
static inline int ocfs2_node_map_first_set_bit(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *map)
|
||||||
|
{
|
||||||
|
return ocfs2_node_map_iterate(osb, map, 0);
|
||||||
|
}
|
||||||
|
int ocfs2_recovery_map_set(struct ocfs2_super *osb,
|
||||||
|
int num);
|
||||||
|
void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
|
||||||
|
int num);
|
||||||
|
/* returns 1 if bit is the only bit set in target, 0 otherwise */
|
||||||
|
int ocfs2_node_map_is_only(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_node_map *target,
|
||||||
|
int bit);
|
||||||
|
|
||||||
|
#endif /* OCFS2_HEARTBEAT_H */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,145 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* inode.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_INODE_H
|
||||||
|
#define OCFS2_INODE_H
|
||||||
|
|
||||||
|
/* OCFS2 Inode Private Data */
|
||||||
|
struct ocfs2_inode_info
|
||||||
|
{
|
||||||
|
u64 ip_blkno;
|
||||||
|
|
||||||
|
struct ocfs2_lock_res ip_rw_lockres;
|
||||||
|
struct ocfs2_lock_res ip_meta_lockres;
|
||||||
|
struct ocfs2_lock_res ip_data_lockres;
|
||||||
|
|
||||||
|
/* protects allocation changes on this inode. */
|
||||||
|
struct rw_semaphore ip_alloc_sem;
|
||||||
|
|
||||||
|
/* These fields are protected by ip_lock */
|
||||||
|
spinlock_t ip_lock;
|
||||||
|
u32 ip_open_count;
|
||||||
|
u32 ip_clusters;
|
||||||
|
struct ocfs2_extent_map ip_map;
|
||||||
|
struct list_head ip_io_markers;
|
||||||
|
int ip_orphaned_slot;
|
||||||
|
|
||||||
|
struct semaphore ip_io_sem;
|
||||||
|
|
||||||
|
/* Used by the journalling code to attach an inode to a
|
||||||
|
* handle. These are protected by ip_io_sem in order to lock
|
||||||
|
* out other I/O to the inode until we either commit or
|
||||||
|
* abort. */
|
||||||
|
struct list_head ip_handle_list;
|
||||||
|
struct ocfs2_journal_handle *ip_handle;
|
||||||
|
|
||||||
|
u32 ip_flags; /* see below */
|
||||||
|
|
||||||
|
/* protected by recovery_lock. */
|
||||||
|
struct inode *ip_next_orphan;
|
||||||
|
|
||||||
|
u32 ip_dir_start_lookup;
|
||||||
|
|
||||||
|
/* next two are protected by trans_inc_lock */
|
||||||
|
/* which transaction were we created on? Zero if none. */
|
||||||
|
unsigned long ip_created_trans;
|
||||||
|
/* last transaction we were a part of. */
|
||||||
|
unsigned long ip_last_trans;
|
||||||
|
|
||||||
|
struct ocfs2_caching_info ip_metadata_cache;
|
||||||
|
|
||||||
|
struct inode vfs_inode;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flags for the ip_flags field
|
||||||
|
*/
|
||||||
|
/* System file inodes */
|
||||||
|
#define OCFS2_INODE_SYSTEM_FILE 0x00000001
|
||||||
|
#define OCFS2_INODE_JOURNAL 0x00000002
|
||||||
|
#define OCFS2_INODE_BITMAP 0x00000004
|
||||||
|
/* This inode has been wiped from disk */
|
||||||
|
#define OCFS2_INODE_DELETED 0x00000008
|
||||||
|
/* Another node is deleting, so our delete is a nop */
|
||||||
|
#define OCFS2_INODE_SKIP_DELETE 0x00000010
|
||||||
|
/* Has the inode been orphaned on another node?
|
||||||
|
*
|
||||||
|
* This hints to ocfs2_drop_inode that it should clear i_nlink before
|
||||||
|
* continuing.
|
||||||
|
*
|
||||||
|
* We *only* set this on unlink vote from another node. If the inode
|
||||||
|
* was locally orphaned, then we're sure of the state and don't need
|
||||||
|
* to twiddle i_nlink later - it's either zero or not depending on
|
||||||
|
* whether our unlink succeeded. Otherwise we got this from a node
|
||||||
|
* whose intention was to orphan the inode, however he may have
|
||||||
|
* crashed, failed etc, so we let ocfs2_drop_inode zero the value and
|
||||||
|
* rely on ocfs2_delete_inode to sort things out under the proper
|
||||||
|
* cluster locks.
|
||||||
|
*/
|
||||||
|
#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020
|
||||||
|
/* Does someone have the file open O_DIRECT */
|
||||||
|
#define OCFS2_INODE_OPEN_DIRECT 0x00000040
|
||||||
|
/* Indicates that the metadata cache should be used as an array. */
|
||||||
|
#define OCFS2_INODE_CACHE_INLINE 0x00000080
|
||||||
|
|
||||||
|
static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
|
||||||
|
{
|
||||||
|
return container_of(inode, struct ocfs2_inode_info, vfs_inode);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags & OCFS2_INODE_JOURNAL)
|
||||||
|
#define SET_INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags |= OCFS2_INODE_JOURNAL)
|
||||||
|
|
||||||
|
extern kmem_cache_t *ocfs2_inode_cache;
|
||||||
|
|
||||||
|
extern struct address_space_operations ocfs2_aops;
|
||||||
|
|
||||||
|
struct buffer_head *ocfs2_bread(struct inode *inode, int block,
|
||||||
|
int *err, int reada);
|
||||||
|
void ocfs2_clear_inode(struct inode *inode);
|
||||||
|
void ocfs2_delete_inode(struct inode *inode);
|
||||||
|
void ocfs2_drop_inode(struct inode *inode);
|
||||||
|
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff);
|
||||||
|
struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
|
||||||
|
u64 blkno,
|
||||||
|
int delete_vote);
|
||||||
|
int ocfs2_inode_init_private(struct inode *inode);
|
||||||
|
int ocfs2_inode_revalidate(struct dentry *dentry);
|
||||||
|
int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
|
||||||
|
int create_ino);
|
||||||
|
void ocfs2_read_inode(struct inode *inode);
|
||||||
|
void ocfs2_read_inode2(struct inode *inode, void *opaque);
|
||||||
|
ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf,
|
||||||
|
size_t size, loff_t *offp);
|
||||||
|
void ocfs2_sync_blockdev(struct super_block *sb);
|
||||||
|
void ocfs2_refresh_inode(struct inode *inode,
|
||||||
|
struct ocfs2_dinode *fe);
|
||||||
|
int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *inode,
|
||||||
|
struct buffer_head *bh);
|
||||||
|
int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
|
||||||
|
int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
|
||||||
|
|
||||||
|
#endif /* OCFS2_INODE_H */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,457 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* journal.h
|
||||||
|
*
|
||||||
|
* Defines journalling api and structures.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2003, 2005 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_JOURNAL_H
|
||||||
|
#define OCFS2_JOURNAL_H
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/jbd.h>
|
||||||
|
|
||||||
|
#define OCFS2_CHECKPOINT_INTERVAL (8 * HZ)
|
||||||
|
|
||||||
|
enum ocfs2_journal_state {
|
||||||
|
OCFS2_JOURNAL_FREE = 0,
|
||||||
|
OCFS2_JOURNAL_LOADED,
|
||||||
|
OCFS2_JOURNAL_IN_SHUTDOWN,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ocfs2_super;
|
||||||
|
struct ocfs2_dinode;
|
||||||
|
struct ocfs2_journal_handle;
|
||||||
|
|
||||||
|
struct ocfs2_journal {
|
||||||
|
enum ocfs2_journal_state j_state; /* Journals current state */
|
||||||
|
|
||||||
|
journal_t *j_journal; /* The kernels journal type */
|
||||||
|
struct inode *j_inode; /* Kernel inode pointing to
|
||||||
|
* this journal */
|
||||||
|
struct ocfs2_super *j_osb; /* pointer to the super
|
||||||
|
* block for the node
|
||||||
|
* we're currently
|
||||||
|
* running on -- not
|
||||||
|
* necessarily the super
|
||||||
|
* block from the node
|
||||||
|
* which we usually run
|
||||||
|
* from (recovery,
|
||||||
|
* etc) */
|
||||||
|
struct buffer_head *j_bh; /* Journal disk inode block */
|
||||||
|
atomic_t j_num_trans; /* Number of transactions
|
||||||
|
* currently in the system. */
|
||||||
|
unsigned long j_trans_id;
|
||||||
|
struct rw_semaphore j_trans_barrier;
|
||||||
|
wait_queue_head_t j_checkpointed;
|
||||||
|
|
||||||
|
spinlock_t j_lock;
|
||||||
|
struct list_head j_la_cleanups;
|
||||||
|
struct work_struct j_recovery_work;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern spinlock_t trans_inc_lock;
|
||||||
|
|
||||||
|
/* wrap j_trans_id so we never have it equal to zero. */
|
||||||
|
static inline unsigned long ocfs2_inc_trans_id(struct ocfs2_journal *j)
|
||||||
|
{
|
||||||
|
unsigned long old_id;
|
||||||
|
spin_lock(&trans_inc_lock);
|
||||||
|
old_id = j->j_trans_id++;
|
||||||
|
if (unlikely(!j->j_trans_id))
|
||||||
|
j->j_trans_id = 1;
|
||||||
|
spin_unlock(&trans_inc_lock);
|
||||||
|
return old_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ocfs2_set_inode_lock_trans(struct ocfs2_journal *journal,
|
||||||
|
struct inode *inode)
|
||||||
|
{
|
||||||
|
spin_lock(&trans_inc_lock);
|
||||||
|
OCFS2_I(inode)->ip_last_trans = journal->j_trans_id;
|
||||||
|
spin_unlock(&trans_inc_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Used to figure out whether it's safe to drop a metadata lock on an
|
||||||
|
* inode. Returns true if all the inodes changes have been
|
||||||
|
* checkpointed to disk. You should be holding the spinlock on the
|
||||||
|
* metadata lock while calling this to be sure that nobody can take
|
||||||
|
* the lock and put it on another transaction. */
|
||||||
|
static inline int ocfs2_inode_fully_checkpointed(struct inode *inode)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct ocfs2_journal *journal = OCFS2_SB(inode->i_sb)->journal;
|
||||||
|
|
||||||
|
spin_lock(&trans_inc_lock);
|
||||||
|
ret = time_after(journal->j_trans_id, OCFS2_I(inode)->ip_last_trans);
|
||||||
|
spin_unlock(&trans_inc_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* convenience function to check if an inode is still new (has never
|
||||||
|
* hit disk) Will do you a favor and set created_trans = 0 when you've
|
||||||
|
* been checkpointed. returns '1' if the inode is still new. */
|
||||||
|
static inline int ocfs2_inode_is_new(struct inode *inode)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* System files are never "new" as they're written out by
|
||||||
|
* mkfs. This helps us early during mount, before we have the
|
||||||
|
* journal open and j_trans_id could be junk. */
|
||||||
|
if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)
|
||||||
|
return 0;
|
||||||
|
spin_lock(&trans_inc_lock);
|
||||||
|
ret = !(time_after(OCFS2_SB(inode->i_sb)->journal->j_trans_id,
|
||||||
|
OCFS2_I(inode)->ip_created_trans));
|
||||||
|
if (!ret)
|
||||||
|
OCFS2_I(inode)->ip_created_trans = 0;
|
||||||
|
spin_unlock(&trans_inc_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
|
||||||
|
struct inode *inode)
|
||||||
|
{
|
||||||
|
spin_lock(&trans_inc_lock);
|
||||||
|
OCFS2_I(inode)->ip_created_trans = osb->journal->j_trans_id;
|
||||||
|
spin_unlock(&trans_inc_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern kmem_cache_t *ocfs2_lock_cache;
|
||||||
|
|
||||||
|
struct ocfs2_journal_lock {
|
||||||
|
struct inode *jl_inode;
|
||||||
|
struct list_head jl_lock_list;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ocfs2_journal_handle {
|
||||||
|
handle_t *k_handle; /* kernel handle. */
|
||||||
|
struct ocfs2_journal *journal;
|
||||||
|
u32 flags; /* see flags below. */
|
||||||
|
int max_buffs; /* Buffs reserved by this handle */
|
||||||
|
|
||||||
|
/* The following two fields are for ocfs2_handle_add_lock */
|
||||||
|
int num_locks;
|
||||||
|
struct list_head locks; /* A bunch of locks to
|
||||||
|
* release on commit. This
|
||||||
|
* should be a list_head */
|
||||||
|
|
||||||
|
struct list_head inode_list;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define OCFS2_HANDLE_STARTED 1
|
||||||
|
/* should we sync-commit this handle? */
|
||||||
|
#define OCFS2_HANDLE_SYNC 2
|
||||||
|
static inline int ocfs2_handle_started(struct ocfs2_journal_handle *handle)
|
||||||
|
{
|
||||||
|
return handle->flags & OCFS2_HANDLE_STARTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ocfs2_handle_set_sync(struct ocfs2_journal_handle *handle, int sync)
|
||||||
|
{
|
||||||
|
if (sync)
|
||||||
|
handle->flags |= OCFS2_HANDLE_SYNC;
|
||||||
|
else
|
||||||
|
handle->flags &= ~OCFS2_HANDLE_SYNC;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Exported only for the journal struct init code in super.c. Do not call. */
|
||||||
|
void ocfs2_complete_recovery(void *data);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Journal Control:
|
||||||
|
* Initialize, Load, Shutdown, Wipe a journal.
|
||||||
|
*
|
||||||
|
* ocfs2_journal_init - Initialize journal structures in the OSB.
|
||||||
|
* ocfs2_journal_load - Load the given journal off disk. Replay it if
|
||||||
|
* there's transactions still in there.
|
||||||
|
* ocfs2_journal_shutdown - Shutdown a journal, this will flush all
|
||||||
|
* uncommitted, uncheckpointed transactions.
|
||||||
|
* ocfs2_journal_wipe - Wipe transactions from a journal. Optionally
|
||||||
|
* zero out each block.
|
||||||
|
* ocfs2_recovery_thread - Perform recovery on a node. osb is our own osb.
|
||||||
|
* ocfs2_mark_dead_nodes - Start recovery on nodes we won't get a heartbeat
|
||||||
|
* event on.
|
||||||
|
* ocfs2_start_checkpoint - Kick the commit thread to do a checkpoint.
|
||||||
|
*/
|
||||||
|
void ocfs2_set_journal_params(struct ocfs2_super *osb);
|
||||||
|
int ocfs2_journal_init(struct ocfs2_journal *journal,
|
||||||
|
int *dirty);
|
||||||
|
void ocfs2_journal_shutdown(struct ocfs2_super *osb);
|
||||||
|
int ocfs2_journal_wipe(struct ocfs2_journal *journal,
|
||||||
|
int full);
|
||||||
|
int ocfs2_journal_load(struct ocfs2_journal *journal);
|
||||||
|
int ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_recovery_thread(struct ocfs2_super *osb,
|
||||||
|
int node_num);
|
||||||
|
int ocfs2_mark_dead_nodes(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_complete_mount_recovery(struct ocfs2_super *osb);
|
||||||
|
|
||||||
|
static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
atomic_set(&osb->needs_checkpoint, 1);
|
||||||
|
wake_up(&osb->checkpoint_event);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ocfs2_checkpoint_inode(struct inode *inode)
|
||||||
|
{
|
||||||
|
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||||
|
|
||||||
|
if (!ocfs2_inode_fully_checkpointed(inode)) {
|
||||||
|
/* WARNING: This only kicks off a single
|
||||||
|
* checkpoint. If someone races you and adds more
|
||||||
|
* metadata to the journal, you won't know, and will
|
||||||
|
* wind up waiting *alot* longer than necessary. Right
|
||||||
|
* now we only use this in clear_inode so that's
|
||||||
|
* OK. */
|
||||||
|
ocfs2_start_checkpoint(osb);
|
||||||
|
|
||||||
|
wait_event(osb->journal->j_checkpointed,
|
||||||
|
ocfs2_inode_fully_checkpointed(inode));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Transaction Handling:
|
||||||
|
* Manage the lifetime of a transaction handle.
|
||||||
|
*
|
||||||
|
* ocfs2_alloc_handle - Only allocate a handle so we can start putting
|
||||||
|
* cluster locks on it. To actually change blocks,
|
||||||
|
* call ocfs2_start_trans with the handle returned
|
||||||
|
* from this function. You may call ocfs2_commit_trans
|
||||||
|
* at any time in the lifetime of a handle.
|
||||||
|
* ocfs2_start_trans - Begin a transaction. Give it an upper estimate of
|
||||||
|
* the number of blocks that will be changed during
|
||||||
|
* this handle.
|
||||||
|
* ocfs2_commit_trans - Complete a handle.
|
||||||
|
* ocfs2_extend_trans - Extend a handle by nblocks credits. This may
|
||||||
|
* commit the handle to disk in the process, but will
|
||||||
|
* not release any locks taken during the transaction.
|
||||||
|
* ocfs2_journal_access - Notify the handle that we want to journal this
|
||||||
|
* buffer. Will have to call ocfs2_journal_dirty once
|
||||||
|
* we've actually dirtied it. Type is one of . or .
|
||||||
|
* ocfs2_journal_dirty - Mark a journalled buffer as having dirty data.
|
||||||
|
* ocfs2_journal_dirty_data - Indicate that a data buffer should go out before
|
||||||
|
* the current handle commits.
|
||||||
|
* ocfs2_handle_add_lock - Sometimes we need to delay lock release
|
||||||
|
* until after a transaction has been completed. Use
|
||||||
|
* ocfs2_handle_add_lock to indicate that a lock needs
|
||||||
|
* to be released at the end of that handle. Locks
|
||||||
|
* will be released in the order that they are added.
|
||||||
|
* ocfs2_handle_add_inode - Add a locked inode to a transaction.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* You must always start_trans with a number of buffs > 0, but it's
|
||||||
|
* perfectly legal to go through an entire transaction without having
|
||||||
|
* dirtied any buffers. */
|
||||||
|
struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb);
|
||||||
|
struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
int max_buffs);
|
||||||
|
void ocfs2_commit_trans(struct ocfs2_journal_handle *handle);
|
||||||
|
int ocfs2_extend_trans(struct ocfs2_journal_handle *handle,
|
||||||
|
int nblocks);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create access is for when we get a newly created buffer and we're
|
||||||
|
* not gonna read it off disk, but rather fill it ourselves. Right
|
||||||
|
* now, we don't do anything special with this (it turns into a write
|
||||||
|
* request), but this is a good placeholder in case we do...
|
||||||
|
*
|
||||||
|
* Write access is for when we read a block off disk and are going to
|
||||||
|
* modify it. This way the journalling layer knows it may need to make
|
||||||
|
* a copy of that block (if it's part of another, uncommitted
|
||||||
|
* transaction) before we do so.
|
||||||
|
*/
|
||||||
|
#define OCFS2_JOURNAL_ACCESS_CREATE 0
|
||||||
|
#define OCFS2_JOURNAL_ACCESS_WRITE 1
|
||||||
|
#define OCFS2_JOURNAL_ACCESS_UNDO 2
|
||||||
|
|
||||||
|
int ocfs2_journal_access(struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *inode,
|
||||||
|
struct buffer_head *bh,
|
||||||
|
int type);
|
||||||
|
/*
|
||||||
|
* A word about the journal_access/journal_dirty "dance". It is
|
||||||
|
* entirely legal to journal_access a buffer more than once (as long
|
||||||
|
* as the access type is the same -- I'm not sure what will happen if
|
||||||
|
* access type is different but this should never happen anyway) It is
|
||||||
|
* also legal to journal_dirty a buffer more than once. In fact, you
|
||||||
|
* can even journal_access a buffer after you've done a
|
||||||
|
* journal_access/journal_dirty pair. The only thing you cannot do
|
||||||
|
* however, is journal_dirty a buffer which you haven't yet passed to
|
||||||
|
* journal_access at least once.
|
||||||
|
*
|
||||||
|
* That said, 99% of the time this doesn't matter and this is what the
|
||||||
|
* path looks like:
|
||||||
|
*
|
||||||
|
* <read a bh>
|
||||||
|
* ocfs2_journal_access(handle, bh, OCFS2_JOURNAL_ACCESS_WRITE);
|
||||||
|
* <modify the bh>
|
||||||
|
* ocfs2_journal_dirty(handle, bh);
|
||||||
|
*/
|
||||||
|
int ocfs2_journal_dirty(struct ocfs2_journal_handle *handle,
|
||||||
|
struct buffer_head *bh);
|
||||||
|
int ocfs2_journal_dirty_data(handle_t *handle,
|
||||||
|
struct buffer_head *bh);
|
||||||
|
int ocfs2_handle_add_lock(struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *inode);
|
||||||
|
/*
|
||||||
|
* Use this to protect from other processes reading buffer state while
|
||||||
|
* it's in flight.
|
||||||
|
*/
|
||||||
|
void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *inode);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Credit Macros:
|
||||||
|
* Convenience macros to calculate number of credits needed.
|
||||||
|
*
|
||||||
|
* For convenience sake, I have a set of macros here which calculate
|
||||||
|
* the *maximum* number of sectors which will be changed for various
|
||||||
|
* metadata updates.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* simple file updates like chmod, etc. */
|
||||||
|
#define OCFS2_INODE_UPDATE_CREDITS 1
|
||||||
|
|
||||||
|
/* get one bit out of a suballocator: dinode + group descriptor +
|
||||||
|
* prev. group desc. if we relink. */
|
||||||
|
#define OCFS2_SUBALLOC_ALLOC (3)
|
||||||
|
|
||||||
|
/* dinode + group descriptor update. We don't relink on free yet. */
|
||||||
|
#define OCFS2_SUBALLOC_FREE (2)
|
||||||
|
|
||||||
|
#define OCFS2_TRUNCATE_LOG_UPDATE OCFS2_INODE_UPDATE_CREDITS
|
||||||
|
#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \
|
||||||
|
+ OCFS2_TRUNCATE_LOG_UPDATE)
|
||||||
|
|
||||||
|
/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
|
||||||
|
* bitmap block for the new bit) */
|
||||||
|
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
|
||||||
|
|
||||||
|
/* parent fe, parent block, new file entry, inode alloc fe, inode alloc
|
||||||
|
* group descriptor + mkdir/symlink blocks */
|
||||||
|
#define OCFS2_MKNOD_CREDITS (3 + OCFS2_SUBALLOC_ALLOC \
|
||||||
|
+ OCFS2_DIR_LINK_ADDITIONAL_CREDITS)
|
||||||
|
|
||||||
|
/* local alloc metadata change + main bitmap updates */
|
||||||
|
#define OCFS2_WINDOW_MOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS \
|
||||||
|
+ OCFS2_SUBALLOC_ALLOC + OCFS2_SUBALLOC_FREE)
|
||||||
|
|
||||||
|
/* used when we don't need an allocation change for a dir extend. One
|
||||||
|
* for the dinode, one for the new block. */
|
||||||
|
#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
|
||||||
|
|
||||||
|
/* file update (nlink, etc) + dir entry block */
|
||||||
|
#define OCFS2_LINK_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
|
||||||
|
|
||||||
|
/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
|
||||||
|
* dir inode link */
|
||||||
|
#define OCFS2_UNLINK_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 1 \
|
||||||
|
+ OCFS2_LINK_CREDITS)
|
||||||
|
|
||||||
|
/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
|
||||||
|
* inode alloc group descriptor */
|
||||||
|
#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 1 + 1)
|
||||||
|
|
||||||
|
/* dinode update, old dir dinode update, new dir dinode update, old
|
||||||
|
* dir dir entry, new dir dir entry, dir entry update for renaming
|
||||||
|
* directory + target unlink */
|
||||||
|
#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \
|
||||||
|
+ OCFS2_UNLINK_CREDITS)
|
||||||
|
|
||||||
|
static inline int ocfs2_calc_extend_credits(struct super_block *sb,
|
||||||
|
struct ocfs2_dinode *fe,
|
||||||
|
u32 bits_wanted)
|
||||||
|
{
|
||||||
|
int bitmap_blocks, sysfile_bitmap_blocks, dinode_blocks;
|
||||||
|
|
||||||
|
/* bitmap dinode, group desc. + relinked group. */
|
||||||
|
bitmap_blocks = OCFS2_SUBALLOC_ALLOC;
|
||||||
|
|
||||||
|
/* we might need to shift tree depth so lets assume an
|
||||||
|
* absolute worst case of complete fragmentation. Even with
|
||||||
|
* that, we only need one update for the dinode, and then
|
||||||
|
* however many metadata chunks needed * a remaining suballoc
|
||||||
|
* alloc. */
|
||||||
|
sysfile_bitmap_blocks = 1 +
|
||||||
|
(OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(fe);
|
||||||
|
|
||||||
|
/* this does not include *new* metadata blocks, which are
|
||||||
|
* accounted for in sysfile_bitmap_blocks. fe +
|
||||||
|
* prev. last_eb_blk + blocks along edge of tree.
|
||||||
|
* calc_symlink_credits passes because we just need 1
|
||||||
|
* credit for the dinode there. */
|
||||||
|
dinode_blocks = 1 + 1 + le16_to_cpu(fe->id2.i_list.l_tree_depth);
|
||||||
|
|
||||||
|
return bitmap_blocks + sysfile_bitmap_blocks + dinode_blocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
|
||||||
|
{
|
||||||
|
int blocks = OCFS2_MKNOD_CREDITS;
|
||||||
|
|
||||||
|
/* links can be longer than one block so we may update many
|
||||||
|
* within our single allocated extent. */
|
||||||
|
blocks += ocfs2_clusters_to_blocks(sb, 1);
|
||||||
|
|
||||||
|
return blocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
|
||||||
|
unsigned int cpg)
|
||||||
|
{
|
||||||
|
int blocks;
|
||||||
|
int bitmap_blocks = OCFS2_SUBALLOC_ALLOC + 1;
|
||||||
|
/* parent inode update + new block group header + bitmap inode update
|
||||||
|
+ bitmap blocks affected */
|
||||||
|
blocks = 1 + 1 + 1 + bitmap_blocks;
|
||||||
|
return blocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
|
||||||
|
unsigned int clusters_to_del,
|
||||||
|
struct ocfs2_dinode *fe,
|
||||||
|
struct ocfs2_extent_list *last_el)
|
||||||
|
{
|
||||||
|
/* for dinode + all headers in this pass + update to next leaf */
|
||||||
|
u16 next_free = le16_to_cpu(last_el->l_next_free_rec);
|
||||||
|
u16 tree_depth = le16_to_cpu(fe->id2.i_list.l_tree_depth);
|
||||||
|
int credits = 1 + tree_depth + 1;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
i = next_free - 1;
|
||||||
|
BUG_ON(i < 0);
|
||||||
|
|
||||||
|
/* We may be deleting metadata blocks, so metadata alloc dinode +
|
||||||
|
one desc. block for each possible delete. */
|
||||||
|
if (tree_depth && next_free == 1 &&
|
||||||
|
le32_to_cpu(last_el->l_recs[i].e_clusters) == clusters_to_del)
|
||||||
|
credits += 1 + tree_depth;
|
||||||
|
|
||||||
|
/* update to the truncate log. */
|
||||||
|
credits += OCFS2_TRUNCATE_LOG_UPDATE;
|
||||||
|
|
||||||
|
return credits;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* OCFS2_JOURNAL_H */
|
|
@ -0,0 +1,983 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* localalloc.c
|
||||||
|
*
|
||||||
|
* Node local data allocation
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/highmem.h>
|
||||||
|
#include <linux/bitops.h>
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_DISK_ALLOC
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "alloc.h"
|
||||||
|
#include "dlmglue.h"
|
||||||
|
#include "inode.h"
|
||||||
|
#include "journal.h"
|
||||||
|
#include "localalloc.h"
|
||||||
|
#include "suballoc.h"
|
||||||
|
#include "super.h"
|
||||||
|
#include "sysfile.h"
|
||||||
|
|
||||||
|
#include "buffer_head_io.h"
|
||||||
|
|
||||||
|
#define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
|
||||||
|
|
||||||
|
static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb);
|
||||||
|
|
||||||
|
static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
|
||||||
|
|
||||||
|
static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_dinode *alloc,
|
||||||
|
u32 numbits);
|
||||||
|
|
||||||
|
static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
|
||||||
|
|
||||||
|
static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_dinode *alloc,
|
||||||
|
struct inode *main_bm_inode,
|
||||||
|
struct buffer_head *main_bm_bh);
|
||||||
|
|
||||||
|
static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_alloc_context **ac,
|
||||||
|
struct inode **bitmap_inode,
|
||||||
|
struct buffer_head **bitmap_bh);
|
||||||
|
|
||||||
|
static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_alloc_context *ac);
|
||||||
|
|
||||||
|
static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
|
||||||
|
struct inode *local_alloc_inode);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine how large our local alloc window should be, in bits.
|
||||||
|
*
|
||||||
|
* These values (and the behavior in ocfs2_alloc_should_use_local) have
|
||||||
|
* been chosen so that most allocations, including new block groups go
|
||||||
|
* through local alloc.
|
||||||
|
*/
|
||||||
|
static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
BUG_ON(osb->s_clustersize_bits < 12);
|
||||||
|
|
||||||
|
return 2048 >> (osb->s_clustersize_bits - 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tell us whether a given allocation should use the local alloc
|
||||||
|
* file. Otherwise, it has to go to the main bitmap.
|
||||||
|
*/
|
||||||
|
int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
|
||||||
|
{
|
||||||
|
int la_bits = ocfs2_local_alloc_window_bits(osb);
|
||||||
|
|
||||||
|
if (osb->local_alloc_state != OCFS2_LA_ENABLED)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* la_bits should be at least twice the size (in clusters) of
|
||||||
|
* a new block group. We want to be sure block group
|
||||||
|
* allocations go through the local alloc, so allow an
|
||||||
|
* allocation to take up to half the bitmap. */
|
||||||
|
if (bits > (la_bits / 2))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_load_local_alloc(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
int status = 0;
|
||||||
|
struct ocfs2_dinode *alloc = NULL;
|
||||||
|
struct buffer_head *alloc_bh = NULL;
|
||||||
|
u32 num_used;
|
||||||
|
struct inode *inode = NULL;
|
||||||
|
struct ocfs2_local_alloc *la;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
/* read the alloc off disk */
|
||||||
|
inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
|
||||||
|
osb->slot_num);
|
||||||
|
if (!inode) {
|
||||||
|
status = -EINVAL;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
|
||||||
|
&alloc_bh, 0, inode);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
|
||||||
|
la = OCFS2_LOCAL_ALLOC(alloc);
|
||||||
|
|
||||||
|
if (!(le32_to_cpu(alloc->i_flags) &
|
||||||
|
(OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
|
||||||
|
mlog(ML_ERROR, "Invalid local alloc inode, %"MLFu64"\n",
|
||||||
|
OCFS2_I(inode)->ip_blkno);
|
||||||
|
status = -EINVAL;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((la->la_size == 0) ||
|
||||||
|
(le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
|
||||||
|
mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
|
||||||
|
le16_to_cpu(la->la_size));
|
||||||
|
status = -EINVAL;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* do a little verification. */
|
||||||
|
num_used = ocfs2_local_alloc_count_bits(alloc);
|
||||||
|
|
||||||
|
/* hopefully the local alloc has always been recovered before
|
||||||
|
* we load it. */
|
||||||
|
if (num_used
|
||||||
|
|| alloc->id1.bitmap1.i_used
|
||||||
|
|| alloc->id1.bitmap1.i_total
|
||||||
|
|| la->la_bm_off)
|
||||||
|
mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
|
||||||
|
"found = %u, set = %u, taken = %u, off = %u\n",
|
||||||
|
num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
|
||||||
|
le32_to_cpu(alloc->id1.bitmap1.i_total),
|
||||||
|
OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
|
||||||
|
|
||||||
|
osb->local_alloc_bh = alloc_bh;
|
||||||
|
osb->local_alloc_state = OCFS2_LA_ENABLED;
|
||||||
|
|
||||||
|
bail:
|
||||||
|
if (status < 0)
|
||||||
|
if (alloc_bh)
|
||||||
|
brelse(alloc_bh);
|
||||||
|
if (inode)
|
||||||
|
iput(inode);
|
||||||
|
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* return any unused bits to the bitmap and write out a clean
|
||||||
|
* local_alloc.
|
||||||
|
*
|
||||||
|
* local_alloc_bh is optional. If not passed, we will simply use the
|
||||||
|
* one off osb. If you do pass it however, be warned that it *will* be
|
||||||
|
* returned brelse'd and NULL'd out.*/
|
||||||
|
void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
struct ocfs2_journal_handle *handle = NULL;
|
||||||
|
struct inode *local_alloc_inode = NULL;
|
||||||
|
struct buffer_head *bh = NULL;
|
||||||
|
struct buffer_head *main_bm_bh = NULL;
|
||||||
|
struct inode *main_bm_inode = NULL;
|
||||||
|
struct ocfs2_dinode *alloc_copy = NULL;
|
||||||
|
struct ocfs2_dinode *alloc = NULL;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
if (osb->local_alloc_state == OCFS2_LA_UNUSED)
|
||||||
|
goto bail;
|
||||||
|
|
||||||
|
local_alloc_inode =
|
||||||
|
ocfs2_get_system_file_inode(osb,
|
||||||
|
LOCAL_ALLOC_SYSTEM_INODE,
|
||||||
|
osb->slot_num);
|
||||||
|
if (!local_alloc_inode) {
|
||||||
|
status = -ENOENT;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
osb->local_alloc_state = OCFS2_LA_DISABLED;
|
||||||
|
|
||||||
|
handle = ocfs2_alloc_handle(osb);
|
||||||
|
if (!handle) {
|
||||||
|
status = -ENOMEM;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
main_bm_inode = ocfs2_get_system_file_inode(osb,
|
||||||
|
GLOBAL_BITMAP_SYSTEM_INODE,
|
||||||
|
OCFS2_INVALID_SLOT);
|
||||||
|
if (!main_bm_inode) {
|
||||||
|
status = -EINVAL;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
ocfs2_handle_add_inode(handle, main_bm_inode);
|
||||||
|
status = ocfs2_meta_lock(main_bm_inode, handle, &main_bm_bh, 1);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* WINDOW_MOVE_CREDITS is a bit heavy... */
|
||||||
|
handle = ocfs2_start_trans(osb, handle, OCFS2_WINDOW_MOVE_CREDITS);
|
||||||
|
if (IS_ERR(handle)) {
|
||||||
|
mlog_errno(PTR_ERR(handle));
|
||||||
|
handle = NULL;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
bh = osb->local_alloc_bh;
|
||||||
|
alloc = (struct ocfs2_dinode *) bh->b_data;
|
||||||
|
|
||||||
|
alloc_copy = kmalloc(bh->b_size, GFP_KERNEL);
|
||||||
|
if (!alloc_copy) {
|
||||||
|
status = -ENOMEM;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
memcpy(alloc_copy, alloc, bh->b_size);
|
||||||
|
|
||||||
|
status = ocfs2_journal_access(handle, local_alloc_inode, bh,
|
||||||
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
ocfs2_clear_local_alloc(alloc);
|
||||||
|
|
||||||
|
status = ocfs2_journal_dirty(handle, bh);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
brelse(bh);
|
||||||
|
osb->local_alloc_bh = NULL;
|
||||||
|
osb->local_alloc_state = OCFS2_LA_UNUSED;
|
||||||
|
|
||||||
|
status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
|
||||||
|
main_bm_inode, main_bm_bh);
|
||||||
|
if (status < 0)
|
||||||
|
mlog_errno(status);
|
||||||
|
|
||||||
|
bail:
|
||||||
|
if (handle)
|
||||||
|
ocfs2_commit_trans(handle);
|
||||||
|
|
||||||
|
if (main_bm_bh)
|
||||||
|
brelse(main_bm_bh);
|
||||||
|
|
||||||
|
if (main_bm_inode)
|
||||||
|
iput(main_bm_inode);
|
||||||
|
|
||||||
|
if (local_alloc_inode)
|
||||||
|
iput(local_alloc_inode);
|
||||||
|
|
||||||
|
if (alloc_copy)
|
||||||
|
kfree(alloc_copy);
|
||||||
|
|
||||||
|
mlog_exit_void();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We want to free the bitmap bits outside of any recovery context as
|
||||||
|
* we'll need a cluster lock to do so, but we must clear the local
|
||||||
|
* alloc before giving up the recovered nodes journal. To solve this,
|
||||||
|
* we kmalloc a copy of the local alloc before it's change for the
|
||||||
|
* caller to process with ocfs2_complete_local_alloc_recovery
|
||||||
|
*/
|
||||||
|
int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
|
||||||
|
int slot_num,
|
||||||
|
struct ocfs2_dinode **alloc_copy)
|
||||||
|
{
|
||||||
|
int status = 0;
|
||||||
|
struct buffer_head *alloc_bh = NULL;
|
||||||
|
struct inode *inode = NULL;
|
||||||
|
struct ocfs2_dinode *alloc;
|
||||||
|
|
||||||
|
mlog_entry("(slot_num = %d)\n", slot_num);
|
||||||
|
|
||||||
|
*alloc_copy = NULL;
|
||||||
|
|
||||||
|
inode = ocfs2_get_system_file_inode(osb,
|
||||||
|
LOCAL_ALLOC_SYSTEM_INODE,
|
||||||
|
slot_num);
|
||||||
|
if (!inode) {
|
||||||
|
status = -EINVAL;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
down(&inode->i_sem);
|
||||||
|
|
||||||
|
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
|
||||||
|
&alloc_bh, 0, inode);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
*alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
|
||||||
|
if (!(*alloc_copy)) {
|
||||||
|
status = -ENOMEM;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
|
||||||
|
|
||||||
|
alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
|
||||||
|
ocfs2_clear_local_alloc(alloc);
|
||||||
|
|
||||||
|
status = ocfs2_write_block(osb, alloc_bh, inode);
|
||||||
|
if (status < 0)
|
||||||
|
mlog_errno(status);
|
||||||
|
|
||||||
|
bail:
|
||||||
|
if ((status < 0) && (*alloc_copy)) {
|
||||||
|
kfree(*alloc_copy);
|
||||||
|
*alloc_copy = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (alloc_bh)
|
||||||
|
brelse(alloc_bh);
|
||||||
|
|
||||||
|
if (inode) {
|
||||||
|
up(&inode->i_sem);
|
||||||
|
iput(inode);
|
||||||
|
}
|
||||||
|
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Step 2: By now, we've completed the journal recovery, we've stamped
|
||||||
|
* a clean local alloc on disk and dropped the node out of the
|
||||||
|
* recovery map. Dlm locks will no longer stall, so lets clear out the
|
||||||
|
* main bitmap.
|
||||||
|
*/
|
||||||
|
int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_dinode *alloc)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
struct ocfs2_journal_handle *handle = NULL;
|
||||||
|
struct buffer_head *main_bm_bh = NULL;
|
||||||
|
struct inode *main_bm_inode = NULL;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
handle = ocfs2_alloc_handle(osb);
|
||||||
|
if (!handle) {
|
||||||
|
status = -ENOMEM;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
main_bm_inode = ocfs2_get_system_file_inode(osb,
|
||||||
|
GLOBAL_BITMAP_SYSTEM_INODE,
|
||||||
|
OCFS2_INVALID_SLOT);
|
||||||
|
if (!main_bm_inode) {
|
||||||
|
status = -EINVAL;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
ocfs2_handle_add_inode(handle, main_bm_inode);
|
||||||
|
status = ocfs2_meta_lock(main_bm_inode, handle, &main_bm_bh, 1);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
handle = ocfs2_start_trans(osb, handle, OCFS2_WINDOW_MOVE_CREDITS);
|
||||||
|
if (IS_ERR(handle)) {
|
||||||
|
status = PTR_ERR(handle);
|
||||||
|
handle = NULL;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we want the bitmap change to be recorded on disk asap */
|
||||||
|
ocfs2_handle_set_sync(handle, 1);
|
||||||
|
|
||||||
|
status = ocfs2_sync_local_to_main(osb, handle, alloc,
|
||||||
|
main_bm_inode, main_bm_bh);
|
||||||
|
if (status < 0)
|
||||||
|
mlog_errno(status);
|
||||||
|
|
||||||
|
bail:
|
||||||
|
if (handle)
|
||||||
|
ocfs2_commit_trans(handle);
|
||||||
|
|
||||||
|
if (main_bm_bh)
|
||||||
|
brelse(main_bm_bh);
|
||||||
|
|
||||||
|
if (main_bm_inode)
|
||||||
|
iput(main_bm_inode);
|
||||||
|
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* make sure we've got at least bitswanted contiguous bits in the
|
||||||
|
* local alloc. You lose them when you drop i_sem.
|
||||||
|
*
|
||||||
|
* We will add ourselves to the transaction passed in, but may start
|
||||||
|
* our own in order to shift windows.
|
||||||
|
*/
|
||||||
|
int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *passed_handle,
|
||||||
|
u32 bits_wanted,
|
||||||
|
struct ocfs2_alloc_context *ac)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
struct ocfs2_dinode *alloc;
|
||||||
|
struct inode *local_alloc_inode;
|
||||||
|
unsigned int free_bits;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
BUG_ON(!passed_handle);
|
||||||
|
BUG_ON(!ac);
|
||||||
|
BUG_ON(passed_handle->flags & OCFS2_HANDLE_STARTED);
|
||||||
|
|
||||||
|
local_alloc_inode =
|
||||||
|
ocfs2_get_system_file_inode(osb,
|
||||||
|
LOCAL_ALLOC_SYSTEM_INODE,
|
||||||
|
osb->slot_num);
|
||||||
|
if (!local_alloc_inode) {
|
||||||
|
status = -ENOENT;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
ocfs2_handle_add_inode(passed_handle, local_alloc_inode);
|
||||||
|
|
||||||
|
if (osb->local_alloc_state != OCFS2_LA_ENABLED) {
|
||||||
|
status = -ENOSPC;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bits_wanted > ocfs2_local_alloc_window_bits(osb)) {
|
||||||
|
mlog(0, "Asking for more than my max window size!\n");
|
||||||
|
status = -ENOSPC;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
|
||||||
|
|
||||||
|
if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
|
||||||
|
ocfs2_local_alloc_count_bits(alloc)) {
|
||||||
|
ocfs2_error(osb->sb, "local alloc inode %"MLFu64" says it has "
|
||||||
|
"%u free bits, but a count shows %u",
|
||||||
|
le64_to_cpu(alloc->i_blkno),
|
||||||
|
le32_to_cpu(alloc->id1.bitmap1.i_used),
|
||||||
|
ocfs2_local_alloc_count_bits(alloc));
|
||||||
|
status = -EIO;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
|
||||||
|
le32_to_cpu(alloc->id1.bitmap1.i_used);
|
||||||
|
if (bits_wanted > free_bits) {
|
||||||
|
/* uhoh, window change time. */
|
||||||
|
status =
|
||||||
|
ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
|
||||||
|
if (status < 0) {
|
||||||
|
if (status != -ENOSPC)
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ac->ac_inode = igrab(local_alloc_inode);
|
||||||
|
get_bh(osb->local_alloc_bh);
|
||||||
|
ac->ac_bh = osb->local_alloc_bh;
|
||||||
|
ac->ac_which = OCFS2_AC_USE_LOCAL;
|
||||||
|
status = 0;
|
||||||
|
bail:
|
||||||
|
if (local_alloc_inode)
|
||||||
|
iput(local_alloc_inode);
|
||||||
|
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_alloc_context *ac,
|
||||||
|
u32 min_bits,
|
||||||
|
u32 *bit_off,
|
||||||
|
u32 *num_bits)
|
||||||
|
{
|
||||||
|
int status, start;
|
||||||
|
struct inode *local_alloc_inode;
|
||||||
|
u32 bits_wanted;
|
||||||
|
void *bitmap;
|
||||||
|
struct ocfs2_dinode *alloc;
|
||||||
|
struct ocfs2_local_alloc *la;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
|
||||||
|
|
||||||
|
bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
|
||||||
|
local_alloc_inode = ac->ac_inode;
|
||||||
|
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
|
||||||
|
la = OCFS2_LOCAL_ALLOC(alloc);
|
||||||
|
|
||||||
|
start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
|
||||||
|
if (start == -1) {
|
||||||
|
/* TODO: Shouldn't we just BUG here? */
|
||||||
|
status = -ENOSPC;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
bitmap = la->la_bitmap;
|
||||||
|
*bit_off = le32_to_cpu(la->la_bm_off) + start;
|
||||||
|
/* local alloc is always contiguous by nature -- we never
|
||||||
|
* delete bits from it! */
|
||||||
|
*num_bits = bits_wanted;
|
||||||
|
|
||||||
|
status = ocfs2_journal_access(handle, local_alloc_inode,
|
||||||
|
osb->local_alloc_bh,
|
||||||
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(bits_wanted--)
|
||||||
|
ocfs2_set_bit(start++, bitmap);
|
||||||
|
|
||||||
|
alloc->id1.bitmap1.i_used = cpu_to_le32(*num_bits +
|
||||||
|
le32_to_cpu(alloc->id1.bitmap1.i_used));
|
||||||
|
|
||||||
|
status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = 0;
|
||||||
|
bail:
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
u8 *buffer;
|
||||||
|
u32 count = 0;
|
||||||
|
struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
buffer = la->la_bitmap;
|
||||||
|
for (i = 0; i < le16_to_cpu(la->la_size); i++)
|
||||||
|
count += hweight8(buffer[i]);
|
||||||
|
|
||||||
|
mlog_exit(count);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_dinode *alloc,
|
||||||
|
u32 numbits)
|
||||||
|
{
|
||||||
|
int numfound, bitoff, left, startoff, lastzero;
|
||||||
|
void *bitmap = NULL;
|
||||||
|
|
||||||
|
mlog_entry("(numbits wanted = %u)\n", numbits);
|
||||||
|
|
||||||
|
if (!alloc->id1.bitmap1.i_total) {
|
||||||
|
mlog(0, "No bits in my window!\n");
|
||||||
|
bitoff = -1;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
|
||||||
|
|
||||||
|
numfound = bitoff = startoff = 0;
|
||||||
|
lastzero = -1;
|
||||||
|
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
|
||||||
|
while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
|
||||||
|
if (bitoff == left) {
|
||||||
|
/* mlog(0, "bitoff (%d) == left", bitoff); */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
|
||||||
|
"numfound = %d\n", bitoff, startoff, numfound);*/
|
||||||
|
|
||||||
|
/* Ok, we found a zero bit... is it contig. or do we
|
||||||
|
* start over?*/
|
||||||
|
if (bitoff == startoff) {
|
||||||
|
/* we found a zero */
|
||||||
|
numfound++;
|
||||||
|
startoff++;
|
||||||
|
} else {
|
||||||
|
/* got a zero after some ones */
|
||||||
|
numfound = 1;
|
||||||
|
startoff = bitoff+1;
|
||||||
|
}
|
||||||
|
/* we got everything we needed */
|
||||||
|
if (numfound == numbits) {
|
||||||
|
/* mlog(0, "Found it all!\n"); */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
|
||||||
|
numfound);
|
||||||
|
|
||||||
|
if (numfound == numbits)
|
||||||
|
bitoff = startoff - numfound;
|
||||||
|
else
|
||||||
|
bitoff = -1;
|
||||||
|
|
||||||
|
bail:
|
||||||
|
mlog_exit(bitoff);
|
||||||
|
return bitoff;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
|
||||||
|
{
|
||||||
|
struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
|
||||||
|
int i;
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
alloc->id1.bitmap1.i_total = 0;
|
||||||
|
alloc->id1.bitmap1.i_used = 0;
|
||||||
|
la->la_bm_off = 0;
|
||||||
|
for(i = 0; i < le16_to_cpu(la->la_size); i++)
|
||||||
|
la->la_bitmap[i] = 0;
|
||||||
|
|
||||||
|
mlog_exit_void();
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* turn this on and uncomment below to aid debugging window shifts. */
|
||||||
|
static void ocfs2_verify_zero_bits(unsigned long *bitmap,
|
||||||
|
unsigned int start,
|
||||||
|
unsigned int count)
|
||||||
|
{
|
||||||
|
unsigned int tmp = count;
|
||||||
|
while(tmp--) {
|
||||||
|
if (ocfs2_test_bit(start + tmp, bitmap)) {
|
||||||
|
printk("ocfs2_verify_zero_bits: start = %u, count = "
|
||||||
|
"%u\n", start, count);
|
||||||
|
printk("ocfs2_verify_zero_bits: bit %u is set!",
|
||||||
|
start + tmp);
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sync the local alloc to main bitmap.
|
||||||
|
*
|
||||||
|
* assumes you've already locked the main bitmap -- the bitmap inode
|
||||||
|
* passed is used for caching.
|
||||||
|
*/
|
||||||
|
static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_dinode *alloc,
|
||||||
|
struct inode *main_bm_inode,
|
||||||
|
struct buffer_head *main_bm_bh)
|
||||||
|
{
|
||||||
|
int status = 0;
|
||||||
|
int bit_off, left, count, start;
|
||||||
|
u64 la_start_blk;
|
||||||
|
u64 blkno;
|
||||||
|
void *bitmap;
|
||||||
|
struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
|
||||||
|
|
||||||
|
mlog_entry("total = %u, COUNT = %u, used = %u\n",
|
||||||
|
le32_to_cpu(alloc->id1.bitmap1.i_total),
|
||||||
|
ocfs2_local_alloc_count_bits(alloc),
|
||||||
|
le32_to_cpu(alloc->id1.bitmap1.i_used));
|
||||||
|
|
||||||
|
if (!alloc->id1.bitmap1.i_total) {
|
||||||
|
mlog(0, "nothing to sync!\n");
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
|
||||||
|
le32_to_cpu(alloc->id1.bitmap1.i_total)) {
|
||||||
|
mlog(0, "all bits were taken!\n");
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
|
||||||
|
le32_to_cpu(la->la_bm_off));
|
||||||
|
bitmap = la->la_bitmap;
|
||||||
|
start = count = bit_off = 0;
|
||||||
|
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
|
||||||
|
|
||||||
|
while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
|
||||||
|
!= -1) {
|
||||||
|
if ((bit_off < left) && (bit_off == start)) {
|
||||||
|
count++;
|
||||||
|
start++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (count) {
|
||||||
|
blkno = la_start_blk +
|
||||||
|
ocfs2_clusters_to_blocks(osb->sb,
|
||||||
|
start - count);
|
||||||
|
|
||||||
|
mlog(0, "freeing %u bits starting at local "
|
||||||
|
"alloc bit %u (la_start_blk = %"MLFu64", "
|
||||||
|
"blkno = %"MLFu64")\n", count, start - count,
|
||||||
|
la_start_blk, blkno);
|
||||||
|
|
||||||
|
status = ocfs2_free_clusters(handle, main_bm_inode,
|
||||||
|
main_bm_bh, blkno, count);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bit_off >= left)
|
||||||
|
break;
|
||||||
|
count = 1;
|
||||||
|
start = bit_off + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bail:
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_alloc_context **ac,
|
||||||
|
struct inode **bitmap_inode,
|
||||||
|
struct buffer_head **bitmap_bh)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
|
||||||
|
*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
|
||||||
|
if (!(*ac)) {
|
||||||
|
status = -ENOMEM;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
(*ac)->ac_handle = handle;
|
||||||
|
(*ac)->ac_bits_wanted = ocfs2_local_alloc_window_bits(osb);
|
||||||
|
|
||||||
|
status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
|
||||||
|
if (status < 0) {
|
||||||
|
if (status != -ENOSPC)
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
*bitmap_inode = (*ac)->ac_inode;
|
||||||
|
igrab(*bitmap_inode);
|
||||||
|
*bitmap_bh = (*ac)->ac_bh;
|
||||||
|
get_bh(*bitmap_bh);
|
||||||
|
status = 0;
|
||||||
|
bail:
|
||||||
|
if ((status < 0) && *ac) {
|
||||||
|
ocfs2_free_alloc_context(*ac);
|
||||||
|
*ac = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pass it the bitmap lock in lock_bh if you have it.
|
||||||
|
*/
|
||||||
|
static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_alloc_context *ac)
|
||||||
|
{
|
||||||
|
int status = 0;
|
||||||
|
u32 cluster_off, cluster_count;
|
||||||
|
struct ocfs2_dinode *alloc = NULL;
|
||||||
|
struct ocfs2_local_alloc *la;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
|
||||||
|
la = OCFS2_LOCAL_ALLOC(alloc);
|
||||||
|
|
||||||
|
if (alloc->id1.bitmap1.i_total)
|
||||||
|
mlog(0, "asking me to alloc a new window over a non-empty "
|
||||||
|
"one\n");
|
||||||
|
|
||||||
|
mlog(0, "Allocating %u clusters for a new window.\n",
|
||||||
|
ocfs2_local_alloc_window_bits(osb));
|
||||||
|
/* we used the generic suballoc reserve function, but we set
|
||||||
|
* everything up nicely, so there's no reason why we can't use
|
||||||
|
* the more specific cluster api to claim bits. */
|
||||||
|
status = ocfs2_claim_clusters(osb, handle, ac,
|
||||||
|
ocfs2_local_alloc_window_bits(osb),
|
||||||
|
&cluster_off, &cluster_count);
|
||||||
|
if (status < 0) {
|
||||||
|
if (status != -ENOSPC)
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
la->la_bm_off = cpu_to_le32(cluster_off);
|
||||||
|
alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
|
||||||
|
/* just in case... In the future when we find space ourselves,
|
||||||
|
* we don't have to get all contiguous -- but we'll have to
|
||||||
|
* set all previously used bits in bitmap and update
|
||||||
|
* la_bits_set before setting the bits in the main bitmap. */
|
||||||
|
alloc->id1.bitmap1.i_used = 0;
|
||||||
|
memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
|
||||||
|
le16_to_cpu(la->la_size));
|
||||||
|
|
||||||
|
mlog(0, "New window allocated:\n");
|
||||||
|
mlog(0, "window la_bm_off = %u\n",
|
||||||
|
OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
|
||||||
|
mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
|
||||||
|
|
||||||
|
bail:
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Note that we do *NOT* lock the local alloc inode here as
|
||||||
|
* it's been locked already for us. */
|
||||||
|
static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
|
||||||
|
struct inode *local_alloc_inode)
|
||||||
|
{
|
||||||
|
int status = 0;
|
||||||
|
struct buffer_head *main_bm_bh = NULL;
|
||||||
|
struct inode *main_bm_inode = NULL;
|
||||||
|
struct ocfs2_journal_handle *handle = NULL;
|
||||||
|
struct ocfs2_dinode *alloc;
|
||||||
|
struct ocfs2_dinode *alloc_copy = NULL;
|
||||||
|
struct ocfs2_alloc_context *ac = NULL;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
handle = ocfs2_alloc_handle(osb);
|
||||||
|
if (!handle) {
|
||||||
|
status = -ENOMEM;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This will lock the main bitmap for us. */
|
||||||
|
status = ocfs2_local_alloc_reserve_for_window(osb,
|
||||||
|
handle,
|
||||||
|
&ac,
|
||||||
|
&main_bm_inode,
|
||||||
|
&main_bm_bh);
|
||||||
|
if (status < 0) {
|
||||||
|
if (status != -ENOSPC)
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
handle = ocfs2_start_trans(osb, handle, OCFS2_WINDOW_MOVE_CREDITS);
|
||||||
|
if (IS_ERR(handle)) {
|
||||||
|
status = PTR_ERR(handle);
|
||||||
|
handle = NULL;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
|
||||||
|
|
||||||
|
/* We want to clear the local alloc before doing anything
|
||||||
|
* else, so that if we error later during this operation,
|
||||||
|
* local alloc shutdown won't try to double free main bitmap
|
||||||
|
* bits. Make a copy so the sync function knows which bits to
|
||||||
|
* free. */
|
||||||
|
alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_KERNEL);
|
||||||
|
if (!alloc_copy) {
|
||||||
|
status = -ENOMEM;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
|
||||||
|
|
||||||
|
status = ocfs2_journal_access(handle, local_alloc_inode,
|
||||||
|
osb->local_alloc_bh,
|
||||||
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
ocfs2_clear_local_alloc(alloc);
|
||||||
|
|
||||||
|
status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
|
||||||
|
main_bm_inode, main_bm_bh);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ocfs2_local_alloc_new_window(osb, handle, ac);
|
||||||
|
if (status < 0) {
|
||||||
|
if (status != -ENOSPC)
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
atomic_inc(&osb->alloc_stats.moves);
|
||||||
|
|
||||||
|
status = 0;
|
||||||
|
bail:
|
||||||
|
if (handle)
|
||||||
|
ocfs2_commit_trans(handle);
|
||||||
|
|
||||||
|
if (main_bm_bh)
|
||||||
|
brelse(main_bm_bh);
|
||||||
|
|
||||||
|
if (main_bm_inode)
|
||||||
|
iput(main_bm_inode);
|
||||||
|
|
||||||
|
if (alloc_copy)
|
||||||
|
kfree(alloc_copy);
|
||||||
|
|
||||||
|
if (ac)
|
||||||
|
ocfs2_free_alloc_context(ac);
|
||||||
|
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* localalloc.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_LOCALALLOC_H
|
||||||
|
#define OCFS2_LOCALALLOC_H
|
||||||
|
|
||||||
|
int ocfs2_load_local_alloc(struct ocfs2_super *osb);
|
||||||
|
|
||||||
|
void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb);
|
||||||
|
|
||||||
|
int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
|
||||||
|
int node_num,
|
||||||
|
struct ocfs2_dinode **alloc_copy);
|
||||||
|
|
||||||
|
int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_dinode *alloc);
|
||||||
|
|
||||||
|
int ocfs2_alloc_should_use_local(struct ocfs2_super *osb,
|
||||||
|
u64 bits);
|
||||||
|
|
||||||
|
struct ocfs2_alloc_context;
|
||||||
|
int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *passed_handle,
|
||||||
|
u32 bits_wanted,
|
||||||
|
struct ocfs2_alloc_context *ac);
|
||||||
|
|
||||||
|
int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_alloc_context *ac,
|
||||||
|
u32 min_bits,
|
||||||
|
u32 *bit_off,
|
||||||
|
u32 *num_bits);
|
||||||
|
|
||||||
|
#endif /* OCFS2_LOCALALLOC_H */
|
|
@ -0,0 +1,102 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* mmap.c
|
||||||
|
*
|
||||||
|
* Code to deal with the mess that is clustered mmap.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/highmem.h>
|
||||||
|
#include <linux/pagemap.h>
|
||||||
|
#include <linux/uio.h>
|
||||||
|
#include <linux/signal.h>
|
||||||
|
#include <linux/rbtree.h>
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_FILE_IO
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "dlmglue.h"
|
||||||
|
#include "file.h"
|
||||||
|
#include "inode.h"
|
||||||
|
#include "mmap.h"
|
||||||
|
|
||||||
|
static struct page *ocfs2_nopage(struct vm_area_struct * area,
|
||||||
|
unsigned long address,
|
||||||
|
int *type)
|
||||||
|
{
|
||||||
|
struct inode *inode = area->vm_file->f_dentry->d_inode;
|
||||||
|
struct page *page = NOPAGE_SIGBUS;
|
||||||
|
sigset_t blocked, oldset;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
mlog_entry("(inode %lu, address %lu)\n", inode->i_ino, address);
|
||||||
|
|
||||||
|
/* The best way to deal with signals in this path is
|
||||||
|
* to block them upfront, rather than allowing the
|
||||||
|
* locking paths to return -ERESTARTSYS. */
|
||||||
|
sigfillset(&blocked);
|
||||||
|
|
||||||
|
/* We should technically never get a bad ret return
|
||||||
|
* from sigprocmask */
|
||||||
|
ret = sigprocmask(SIG_BLOCK, &blocked, &oldset);
|
||||||
|
if (ret < 0) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
page = filemap_nopage(area, address, type);
|
||||||
|
|
||||||
|
ret = sigprocmask(SIG_SETMASK, &oldset, NULL);
|
||||||
|
if (ret < 0)
|
||||||
|
mlog_errno(ret);
|
||||||
|
out:
|
||||||
|
mlog_exit_ptr(page);
|
||||||
|
return page;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct vm_operations_struct ocfs2_file_vm_ops = {
|
||||||
|
.nopage = ocfs2_nopage,
|
||||||
|
};
|
||||||
|
|
||||||
|
int ocfs2_mmap(struct file *file,
|
||||||
|
struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
|
||||||
|
struct inode *inode = mapping->host;
|
||||||
|
|
||||||
|
/* We don't want to support shared writable mappings yet. */
|
||||||
|
if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE))
|
||||||
|
&& ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
|
||||||
|
mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);
|
||||||
|
/* This is -EINVAL because generic_file_readonly_mmap
|
||||||
|
* returns it in a similar situation. */
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
update_atime(inode);
|
||||||
|
vma->vm_ops = &ocfs2_file_vm_ops;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
#ifndef OCFS2_MMAP_H
|
||||||
|
#define OCFS2_MMAP_H
|
||||||
|
|
||||||
|
int ocfs2_mmap(struct file *file, struct vm_area_struct *vma);
|
||||||
|
|
||||||
|
#endif /* OCFS2_MMAP_H */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,58 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* namei.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_NAMEI_H
|
||||||
|
#define OCFS2_NAMEI_H
|
||||||
|
|
||||||
|
extern struct inode_operations ocfs2_dir_iops;
|
||||||
|
|
||||||
|
struct dentry *ocfs2_get_parent(struct dentry *child);
|
||||||
|
|
||||||
|
int ocfs2_check_dir_entry (struct inode *dir,
|
||||||
|
struct ocfs2_dir_entry *de,
|
||||||
|
struct buffer_head *bh,
|
||||||
|
unsigned long offset);
|
||||||
|
struct buffer_head *ocfs2_find_entry(const char *name,
|
||||||
|
int namelen,
|
||||||
|
struct inode *dir,
|
||||||
|
struct ocfs2_dir_entry **res_dir);
|
||||||
|
int ocfs2_orphan_del(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *orphan_dir_inode,
|
||||||
|
struct inode *inode,
|
||||||
|
struct buffer_head *orphan_dir_bh);
|
||||||
|
|
||||||
|
static inline int ocfs2_match(int len,
|
||||||
|
const char * const name,
|
||||||
|
struct ocfs2_dir_entry *de)
|
||||||
|
{
|
||||||
|
if (len != de->name_len)
|
||||||
|
return 0;
|
||||||
|
if (!de->inode)
|
||||||
|
return 0;
|
||||||
|
return !memcmp(name, de->name, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* OCFS2_NAMEI_H */
|
|
@ -0,0 +1,109 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* ocfs1_fs_compat.h
|
||||||
|
*
|
||||||
|
* OCFS1 volume header definitions. OCFS2 creates valid but unmountable
|
||||||
|
* OCFS1 volume headers on the first two sectors of an OCFS2 volume.
|
||||||
|
* This allows an OCFS1 volume to see the partition and cleanly fail to
|
||||||
|
* mount it.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License, version 2, as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _OCFS1_FS_COMPAT_H
|
||||||
|
#define _OCFS1_FS_COMPAT_H
|
||||||
|
|
||||||
|
#define OCFS1_MAX_VOL_SIGNATURE_LEN 128
|
||||||
|
#define OCFS1_MAX_MOUNT_POINT_LEN 128
|
||||||
|
#define OCFS1_MAX_VOL_ID_LENGTH 16
|
||||||
|
#define OCFS1_MAX_VOL_LABEL_LEN 64
|
||||||
|
#define OCFS1_MAX_CLUSTER_NAME_LEN 64
|
||||||
|
|
||||||
|
#define OCFS1_MAJOR_VERSION (2)
|
||||||
|
#define OCFS1_MINOR_VERSION (0)
|
||||||
|
#define OCFS1_VOLUME_SIGNATURE "OracleCFS"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* OCFS1 superblock. Lives at sector 0.
|
||||||
|
*/
|
||||||
|
struct ocfs1_vol_disk_hdr
|
||||||
|
{
|
||||||
|
/*00*/ __u32 minor_version;
|
||||||
|
__u32 major_version;
|
||||||
|
/*08*/ __u8 signature[OCFS1_MAX_VOL_SIGNATURE_LEN];
|
||||||
|
/*88*/ __u8 mount_point[OCFS1_MAX_MOUNT_POINT_LEN];
|
||||||
|
/*108*/ __u64 serial_num;
|
||||||
|
/*110*/ __u64 device_size;
|
||||||
|
__u64 start_off;
|
||||||
|
/*120*/ __u64 bitmap_off;
|
||||||
|
__u64 publ_off;
|
||||||
|
/*130*/ __u64 vote_off;
|
||||||
|
__u64 root_bitmap_off;
|
||||||
|
/*140*/ __u64 data_start_off;
|
||||||
|
__u64 root_bitmap_size;
|
||||||
|
/*150*/ __u64 root_off;
|
||||||
|
__u64 root_size;
|
||||||
|
/*160*/ __u64 cluster_size;
|
||||||
|
__u64 num_nodes;
|
||||||
|
/*170*/ __u64 num_clusters;
|
||||||
|
__u64 dir_node_size;
|
||||||
|
/*180*/ __u64 file_node_size;
|
||||||
|
__u64 internal_off;
|
||||||
|
/*190*/ __u64 node_cfg_off;
|
||||||
|
__u64 node_cfg_size;
|
||||||
|
/*1A0*/ __u64 new_cfg_off;
|
||||||
|
__u32 prot_bits;
|
||||||
|
__s32 excl_mount;
|
||||||
|
/*1B0*/
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct ocfs1_disk_lock
|
||||||
|
{
|
||||||
|
/*00*/ __u32 curr_master;
|
||||||
|
__u8 file_lock;
|
||||||
|
__u8 compat_pad[3]; /* Not in orignal definition. Used to
|
||||||
|
make the already existing alignment
|
||||||
|
explicit */
|
||||||
|
__u64 last_write_time;
|
||||||
|
/*10*/ __u64 last_read_time;
|
||||||
|
__u32 writer_node_num;
|
||||||
|
__u32 reader_node_num;
|
||||||
|
/*20*/ __u64 oin_node_map;
|
||||||
|
__u64 dlock_seq_num;
|
||||||
|
/*30*/
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* OCFS1 volume label. Lives at sector 1.
|
||||||
|
*/
|
||||||
|
struct ocfs1_vol_label
|
||||||
|
{
|
||||||
|
/*00*/ struct ocfs1_disk_lock disk_lock;
|
||||||
|
/*30*/ __u8 label[OCFS1_MAX_VOL_LABEL_LEN];
|
||||||
|
/*70*/ __u16 label_len;
|
||||||
|
/*72*/ __u8 vol_id[OCFS1_MAX_VOL_ID_LENGTH];
|
||||||
|
/*82*/ __u16 vol_id_len;
|
||||||
|
/*84*/ __u8 cluster_name[OCFS1_MAX_CLUSTER_NAME_LEN];
|
||||||
|
/*A4*/ __u16 cluster_name_len;
|
||||||
|
/*A6*/
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* _OCFS1_FS_COMPAT_H */
|
||||||
|
|
|
@ -0,0 +1,464 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* ocfs2.h
|
||||||
|
*
|
||||||
|
* Defines macros and structures used in OCFS2
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_H
|
||||||
|
#define OCFS2_H
|
||||||
|
|
||||||
|
#include <linux/spinlock.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
|
#include <linux/wait.h>
|
||||||
|
#include <linux/list.h>
|
||||||
|
#include <linux/rbtree.h>
|
||||||
|
#include <linux/workqueue.h>
|
||||||
|
#include <linux/kref.h>
|
||||||
|
|
||||||
|
#include "cluster/nodemanager.h"
|
||||||
|
#include "cluster/heartbeat.h"
|
||||||
|
#include "cluster/tcp.h"
|
||||||
|
|
||||||
|
#include "dlm/dlmapi.h"
|
||||||
|
|
||||||
|
#include "ocfs2_fs.h"
|
||||||
|
#include "endian.h"
|
||||||
|
#include "ocfs2_lockid.h"
|
||||||
|
|
||||||
|
struct ocfs2_extent_map {
|
||||||
|
u32 em_clusters;
|
||||||
|
struct rb_root em_extents;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Most user visible OCFS2 inodes will have very few pieces of
|
||||||
|
* metadata, but larger files (including bitmaps, etc) must be taken
|
||||||
|
* into account when designing an access scheme. We allow a small
|
||||||
|
* amount of inlined blocks to be stored on an array and grow the
|
||||||
|
* structure into a rb tree when necessary. */
|
||||||
|
#define OCFS2_INODE_MAX_CACHE_ARRAY 2
|
||||||
|
|
||||||
|
struct ocfs2_caching_info {
|
||||||
|
unsigned int ci_num_cached;
|
||||||
|
union {
|
||||||
|
sector_t ci_array[OCFS2_INODE_MAX_CACHE_ARRAY];
|
||||||
|
struct rb_root ci_tree;
|
||||||
|
} ci_cache;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* this limits us to 256 nodes
|
||||||
|
* if we need more, we can do a kmalloc for the map */
|
||||||
|
#define OCFS2_NODE_MAP_MAX_NODES 256
|
||||||
|
struct ocfs2_node_map {
|
||||||
|
u16 num_nodes;
|
||||||
|
unsigned long map[BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES)];
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ocfs2_ast_action {
|
||||||
|
OCFS2_AST_INVALID = 0,
|
||||||
|
OCFS2_AST_ATTACH,
|
||||||
|
OCFS2_AST_CONVERT,
|
||||||
|
OCFS2_AST_DOWNCONVERT,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* actions for an unlockast function to take. */
|
||||||
|
enum ocfs2_unlock_action {
|
||||||
|
OCFS2_UNLOCK_INVALID = 0,
|
||||||
|
OCFS2_UNLOCK_CANCEL_CONVERT,
|
||||||
|
OCFS2_UNLOCK_DROP_LOCK,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ocfs2_lock_res->l_flags flags. */
|
||||||
|
#define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized
|
||||||
|
* the lvb */
|
||||||
|
#define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in
|
||||||
|
* dlm_lock */
|
||||||
|
#define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to
|
||||||
|
* downconvert*/
|
||||||
|
#define OCFS2_LOCK_LOCAL (0x00000008) /* newly created inode */
|
||||||
|
#define OCFS2_LOCK_NEEDS_REFRESH (0x00000010)
|
||||||
|
#define OCFS2_LOCK_REFRESHING (0x00000020)
|
||||||
|
#define OCFS2_LOCK_INITIALIZED (0x00000040) /* track initialization
|
||||||
|
* for shutdown paths */
|
||||||
|
#define OCFS2_LOCK_FREEING (0x00000080) /* help dlmglue track
|
||||||
|
* when to skip queueing
|
||||||
|
* a lock because it's
|
||||||
|
* about to be
|
||||||
|
* dropped. */
|
||||||
|
#define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */
|
||||||
|
|
||||||
|
struct ocfs2_lock_res_ops;
|
||||||
|
|
||||||
|
typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
|
||||||
|
|
||||||
|
struct ocfs2_lock_res {
|
||||||
|
void *l_priv;
|
||||||
|
struct ocfs2_lock_res_ops *l_ops;
|
||||||
|
spinlock_t l_lock;
|
||||||
|
|
||||||
|
struct list_head l_blocked_list;
|
||||||
|
struct list_head l_mask_waiters;
|
||||||
|
|
||||||
|
enum ocfs2_lock_type l_type;
|
||||||
|
unsigned long l_flags;
|
||||||
|
char l_name[OCFS2_LOCK_ID_MAX_LEN];
|
||||||
|
int l_level;
|
||||||
|
unsigned int l_ro_holders;
|
||||||
|
unsigned int l_ex_holders;
|
||||||
|
struct dlm_lockstatus l_lksb;
|
||||||
|
|
||||||
|
/* used from AST/BAST funcs. */
|
||||||
|
enum ocfs2_ast_action l_action;
|
||||||
|
enum ocfs2_unlock_action l_unlock_action;
|
||||||
|
int l_requested;
|
||||||
|
int l_blocking;
|
||||||
|
|
||||||
|
wait_queue_head_t l_event;
|
||||||
|
|
||||||
|
struct list_head l_debug_list;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ocfs2_dlm_debug {
|
||||||
|
struct kref d_refcnt;
|
||||||
|
struct dentry *d_locking_state;
|
||||||
|
struct list_head d_lockres_tracking;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ocfs2_vol_state
|
||||||
|
{
|
||||||
|
VOLUME_INIT = 0,
|
||||||
|
VOLUME_MOUNTED,
|
||||||
|
VOLUME_DISMOUNTED,
|
||||||
|
VOLUME_DISABLED
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ocfs2_alloc_stats
|
||||||
|
{
|
||||||
|
atomic_t moves;
|
||||||
|
atomic_t local_data;
|
||||||
|
atomic_t bitmap_data;
|
||||||
|
atomic_t bg_allocs;
|
||||||
|
atomic_t bg_extends;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ocfs2_local_alloc_state
|
||||||
|
{
|
||||||
|
OCFS2_LA_UNUSED = 0,
|
||||||
|
OCFS2_LA_ENABLED,
|
||||||
|
OCFS2_LA_DISABLED
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ocfs2_mount_options
|
||||||
|
{
|
||||||
|
OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */
|
||||||
|
OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */
|
||||||
|
OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */
|
||||||
|
OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
|
||||||
|
OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */
|
||||||
|
#ifdef OCFS2_ORACORE_WORKAROUNDS
|
||||||
|
OCFS2_MOUNT_COMPAT_OCFS = 1 << 30, /* ocfs1 compatibility mode */
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
#define OCFS2_OSB_SOFT_RO 0x0001
|
||||||
|
#define OCFS2_OSB_HARD_RO 0x0002
|
||||||
|
#define OCFS2_OSB_ERROR_FS 0x0004
|
||||||
|
|
||||||
|
struct ocfs2_journal;
|
||||||
|
struct ocfs2_journal_handle;
|
||||||
|
struct ocfs2_super
|
||||||
|
{
|
||||||
|
u32 osb_id; /* id used by the proc interface */
|
||||||
|
struct task_struct *commit_task;
|
||||||
|
struct super_block *sb;
|
||||||
|
struct inode *root_inode;
|
||||||
|
struct inode *sys_root_inode;
|
||||||
|
struct inode *system_inodes[NUM_SYSTEM_INODES];
|
||||||
|
|
||||||
|
struct ocfs2_slot_info *slot_info;
|
||||||
|
|
||||||
|
spinlock_t node_map_lock;
|
||||||
|
struct ocfs2_node_map mounted_map;
|
||||||
|
struct ocfs2_node_map recovery_map;
|
||||||
|
struct ocfs2_node_map umount_map;
|
||||||
|
|
||||||
|
u32 num_clusters;
|
||||||
|
u64 root_blkno;
|
||||||
|
u64 system_dir_blkno;
|
||||||
|
u64 bitmap_blkno;
|
||||||
|
u32 bitmap_cpg;
|
||||||
|
u8 *uuid;
|
||||||
|
char *uuid_str;
|
||||||
|
u8 *vol_label;
|
||||||
|
u64 first_cluster_group_blkno;
|
||||||
|
u32 fs_generation;
|
||||||
|
|
||||||
|
u32 s_feature_compat;
|
||||||
|
u32 s_feature_incompat;
|
||||||
|
u32 s_feature_ro_compat;
|
||||||
|
|
||||||
|
/* Protects s_next_generaion, osb_flags. Could protect more on
|
||||||
|
* osb as it's very short lived. */
|
||||||
|
spinlock_t osb_lock;
|
||||||
|
u32 s_next_generation;
|
||||||
|
unsigned long osb_flags;
|
||||||
|
|
||||||
|
unsigned long s_mount_opt;
|
||||||
|
|
||||||
|
u16 max_slots;
|
||||||
|
u16 num_nodes;
|
||||||
|
s16 node_num;
|
||||||
|
s16 slot_num;
|
||||||
|
int s_sectsize_bits;
|
||||||
|
int s_clustersize;
|
||||||
|
int s_clustersize_bits;
|
||||||
|
struct proc_dir_entry *proc_sub_dir; /* points to /proc/fs/ocfs2/<maj_min> */
|
||||||
|
|
||||||
|
atomic_t vol_state;
|
||||||
|
struct semaphore recovery_lock;
|
||||||
|
struct task_struct *recovery_thread_task;
|
||||||
|
int disable_recovery;
|
||||||
|
wait_queue_head_t checkpoint_event;
|
||||||
|
atomic_t needs_checkpoint;
|
||||||
|
struct ocfs2_journal *journal;
|
||||||
|
|
||||||
|
enum ocfs2_local_alloc_state local_alloc_state;
|
||||||
|
struct buffer_head *local_alloc_bh;
|
||||||
|
|
||||||
|
/* Next two fields are for local node slot recovery during
|
||||||
|
* mount. */
|
||||||
|
int dirty;
|
||||||
|
struct ocfs2_dinode *local_alloc_copy;
|
||||||
|
|
||||||
|
struct ocfs2_alloc_stats alloc_stats;
|
||||||
|
char dev_str[20]; /* "major,minor" of the device */
|
||||||
|
|
||||||
|
struct dlm_ctxt *dlm;
|
||||||
|
struct ocfs2_lock_res osb_super_lockres;
|
||||||
|
struct ocfs2_lock_res osb_rename_lockres;
|
||||||
|
struct dlm_eviction_cb osb_eviction_cb;
|
||||||
|
struct ocfs2_dlm_debug *osb_dlm_debug;
|
||||||
|
|
||||||
|
struct dentry *osb_debug_root;
|
||||||
|
|
||||||
|
wait_queue_head_t recovery_event;
|
||||||
|
|
||||||
|
spinlock_t vote_task_lock;
|
||||||
|
struct task_struct *vote_task;
|
||||||
|
wait_queue_head_t vote_event;
|
||||||
|
unsigned long vote_wake_sequence;
|
||||||
|
unsigned long vote_work_sequence;
|
||||||
|
|
||||||
|
struct list_head blocked_lock_list;
|
||||||
|
unsigned long blocked_lock_count;
|
||||||
|
|
||||||
|
struct list_head vote_list;
|
||||||
|
int vote_count;
|
||||||
|
|
||||||
|
u32 net_key;
|
||||||
|
spinlock_t net_response_lock;
|
||||||
|
unsigned int net_response_ids;
|
||||||
|
struct list_head net_response_list;
|
||||||
|
|
||||||
|
struct o2hb_callback_func osb_hb_up;
|
||||||
|
struct o2hb_callback_func osb_hb_down;
|
||||||
|
|
||||||
|
struct list_head osb_net_handlers;
|
||||||
|
|
||||||
|
wait_queue_head_t osb_mount_event;
|
||||||
|
|
||||||
|
/* Truncate log info */
|
||||||
|
struct inode *osb_tl_inode;
|
||||||
|
struct buffer_head *osb_tl_bh;
|
||||||
|
struct work_struct osb_truncate_log_wq;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
|
||||||
|
#define OCFS2_MAX_OSB_ID 65536
|
||||||
|
|
||||||
|
static inline int ocfs2_should_order_data(struct inode *inode)
|
||||||
|
{
|
||||||
|
if (!S_ISREG(inode->i_mode))
|
||||||
|
return 0;
|
||||||
|
if (OCFS2_SB(inode->i_sb)->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* set / clear functions because cluster events can make these happen
|
||||||
|
* in parallel so we want the transitions to be atomic. this also
|
||||||
|
* means that any future flags osb_flags must be protected by spinlock
|
||||||
|
* too! */
|
||||||
|
static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
|
||||||
|
unsigned long flag)
|
||||||
|
{
|
||||||
|
spin_lock(&osb->osb_lock);
|
||||||
|
osb->osb_flags |= flag;
|
||||||
|
spin_unlock(&osb->osb_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
|
||||||
|
int hard)
|
||||||
|
{
|
||||||
|
spin_lock(&osb->osb_lock);
|
||||||
|
osb->osb_flags &= ~(OCFS2_OSB_SOFT_RO|OCFS2_OSB_HARD_RO);
|
||||||
|
if (hard)
|
||||||
|
osb->osb_flags |= OCFS2_OSB_HARD_RO;
|
||||||
|
else
|
||||||
|
osb->osb_flags |= OCFS2_OSB_SOFT_RO;
|
||||||
|
spin_unlock(&osb->osb_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_is_hard_readonly(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
spin_lock(&osb->osb_lock);
|
||||||
|
ret = osb->osb_flags & OCFS2_OSB_HARD_RO;
|
||||||
|
spin_unlock(&osb->osb_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
spin_lock(&osb->osb_lock);
|
||||||
|
ret = osb->osb_flags & OCFS2_OSB_SOFT_RO;
|
||||||
|
spin_unlock(&osb->osb_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OCFS2_IS_VALID_DINODE(ptr) \
|
||||||
|
(!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
|
||||||
|
|
||||||
|
#define OCFS2_RO_ON_INVALID_DINODE(__sb, __di) do { \
|
||||||
|
typeof(__di) ____di = (__di); \
|
||||||
|
ocfs2_error((__sb), \
|
||||||
|
"Dinode # %"MLFu64" has bad signature %.*s", \
|
||||||
|
(____di)->i_blkno, 7, \
|
||||||
|
(____di)->i_signature); \
|
||||||
|
} while (0);
|
||||||
|
|
||||||
|
#define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \
|
||||||
|
(!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
|
||||||
|
|
||||||
|
#define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb) do { \
|
||||||
|
typeof(__eb) ____eb = (__eb); \
|
||||||
|
ocfs2_error((__sb), \
|
||||||
|
"Extent Block # %"MLFu64" has bad signature %.*s", \
|
||||||
|
(____eb)->h_blkno, 7, \
|
||||||
|
(____eb)->h_signature); \
|
||||||
|
} while (0);
|
||||||
|
|
||||||
|
#define OCFS2_IS_VALID_GROUP_DESC(ptr) \
|
||||||
|
(!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
|
||||||
|
|
||||||
|
#define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd) do { \
|
||||||
|
typeof(__gd) ____gd = (__gd); \
|
||||||
|
ocfs2_error((__sb), \
|
||||||
|
"Group Descriptor # %"MLFu64" has bad signature %.*s", \
|
||||||
|
(____gd)->bg_blkno, 7, \
|
||||||
|
(____gd)->bg_signature); \
|
||||||
|
} while (0);
|
||||||
|
|
||||||
|
static inline unsigned long ino_from_blkno(struct super_block *sb,
|
||||||
|
u64 blkno)
|
||||||
|
{
|
||||||
|
return (unsigned long)(blkno & (u64)ULONG_MAX);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 ocfs2_clusters_to_blocks(struct super_block *sb,
|
||||||
|
u32 clusters)
|
||||||
|
{
|
||||||
|
int c_to_b_bits = OCFS2_SB(sb)->s_clustersize_bits -
|
||||||
|
sb->s_blocksize_bits;
|
||||||
|
|
||||||
|
return (u64)clusters << c_to_b_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u32 ocfs2_blocks_to_clusters(struct super_block *sb,
|
||||||
|
u64 blocks)
|
||||||
|
{
|
||||||
|
int b_to_c_bits = OCFS2_SB(sb)->s_clustersize_bits -
|
||||||
|
sb->s_blocksize_bits;
|
||||||
|
|
||||||
|
return (u32)(blocks >> b_to_c_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb,
|
||||||
|
u64 bytes)
|
||||||
|
{
|
||||||
|
int cl_bits = OCFS2_SB(sb)->s_clustersize_bits;
|
||||||
|
unsigned int clusters;
|
||||||
|
|
||||||
|
bytes += OCFS2_SB(sb)->s_clustersize - 1;
|
||||||
|
/* OCFS2 just cannot have enough clusters to overflow this */
|
||||||
|
clusters = (unsigned int)(bytes >> cl_bits);
|
||||||
|
|
||||||
|
return clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb,
|
||||||
|
u64 bytes)
|
||||||
|
{
|
||||||
|
bytes += sb->s_blocksize - 1;
|
||||||
|
return bytes >> sb->s_blocksize_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb,
|
||||||
|
u32 clusters)
|
||||||
|
{
|
||||||
|
return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb,
|
||||||
|
u64 bytes)
|
||||||
|
{
|
||||||
|
int cl_bits = OCFS2_SB(sb)->s_clustersize_bits;
|
||||||
|
unsigned int clusters;
|
||||||
|
|
||||||
|
clusters = ocfs2_clusters_for_bytes(sb, bytes);
|
||||||
|
return (u64)clusters << cl_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 ocfs2_align_bytes_to_blocks(struct super_block *sb,
|
||||||
|
u64 bytes)
|
||||||
|
{
|
||||||
|
u64 blocks;
|
||||||
|
|
||||||
|
blocks = ocfs2_blocks_for_bytes(sb, bytes);
|
||||||
|
return blocks << sb->s_blocksize_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned long ocfs2_align_bytes_to_sectors(u64 bytes)
|
||||||
|
{
|
||||||
|
return (unsigned long)((bytes + 511) >> 9);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ocfs2_set_bit ext2_set_bit
|
||||||
|
#define ocfs2_clear_bit ext2_clear_bit
|
||||||
|
#define ocfs2_test_bit ext2_test_bit
|
||||||
|
#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit
|
||||||
|
#endif /* OCFS2_H */
|
||||||
|
|
|
@ -0,0 +1,638 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* ocfs2_fs.h
|
||||||
|
*
|
||||||
|
* On-disk structures for OCFS2.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License, version 2, as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _OCFS2_FS_H
|
||||||
|
#define _OCFS2_FS_H
|
||||||
|
|
||||||
|
/* Version */
|
||||||
|
#define OCFS2_MAJOR_REV_LEVEL 0
|
||||||
|
#define OCFS2_MINOR_REV_LEVEL 90
|
||||||
|
|
||||||
|
/*
|
||||||
|
* An OCFS2 volume starts this way:
|
||||||
|
* Sector 0: Valid ocfs1_vol_disk_hdr that cleanly fails to mount OCFS.
|
||||||
|
* Sector 1: Valid ocfs1_vol_label that cleanly fails to mount OCFS.
|
||||||
|
* Block OCFS2_SUPER_BLOCK_BLKNO: OCFS2 superblock.
|
||||||
|
*
|
||||||
|
* All other structures are found from the superblock information.
|
||||||
|
*
|
||||||
|
* OCFS2_SUPER_BLOCK_BLKNO is in blocks, not sectors. eg, for a
|
||||||
|
* blocksize of 2K, it is 4096 bytes into disk.
|
||||||
|
*/
|
||||||
|
#define OCFS2_SUPER_BLOCK_BLKNO 2
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cluster size limits. The maximum is kept arbitrarily at 1 MB, and could
|
||||||
|
* grow if needed.
|
||||||
|
*/
|
||||||
|
#define OCFS2_MIN_CLUSTERSIZE 4096
|
||||||
|
#define OCFS2_MAX_CLUSTERSIZE 1048576
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Blocks cannot be bigger than clusters, so the maximum blocksize is the
|
||||||
|
* minimum cluster size.
|
||||||
|
*/
|
||||||
|
#define OCFS2_MIN_BLOCKSIZE 512
|
||||||
|
#define OCFS2_MAX_BLOCKSIZE OCFS2_MIN_CLUSTERSIZE
|
||||||
|
|
||||||
|
/* Filesystem magic number */
|
||||||
|
#define OCFS2_SUPER_MAGIC 0x7461636f
|
||||||
|
|
||||||
|
/* Object signatures */
|
||||||
|
#define OCFS2_SUPER_BLOCK_SIGNATURE "OCFSV2"
|
||||||
|
#define OCFS2_INODE_SIGNATURE "INODE01"
|
||||||
|
#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
|
||||||
|
#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
|
||||||
|
|
||||||
|
/* Compatibility flags */
|
||||||
|
#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
|
||||||
|
( OCFS2_SB(sb)->s_feature_compat & (mask) )
|
||||||
|
#define OCFS2_HAS_RO_COMPAT_FEATURE(sb,mask) \
|
||||||
|
( OCFS2_SB(sb)->s_feature_ro_compat & (mask) )
|
||||||
|
#define OCFS2_HAS_INCOMPAT_FEATURE(sb,mask) \
|
||||||
|
( OCFS2_SB(sb)->s_feature_incompat & (mask) )
|
||||||
|
#define OCFS2_SET_COMPAT_FEATURE(sb,mask) \
|
||||||
|
OCFS2_SB(sb)->s_feature_compat |= (mask)
|
||||||
|
#define OCFS2_SET_RO_COMPAT_FEATURE(sb,mask) \
|
||||||
|
OCFS2_SB(sb)->s_feature_ro_compat |= (mask)
|
||||||
|
#define OCFS2_SET_INCOMPAT_FEATURE(sb,mask) \
|
||||||
|
OCFS2_SB(sb)->s_feature_incompat |= (mask)
|
||||||
|
#define OCFS2_CLEAR_COMPAT_FEATURE(sb,mask) \
|
||||||
|
OCFS2_SB(sb)->s_feature_compat &= ~(mask)
|
||||||
|
#define OCFS2_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
|
||||||
|
OCFS2_SB(sb)->s_feature_ro_compat &= ~(mask)
|
||||||
|
#define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask) \
|
||||||
|
OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
|
||||||
|
|
||||||
|
#define OCFS2_FEATURE_COMPAT_SUPP 0
|
||||||
|
#define OCFS2_FEATURE_INCOMPAT_SUPP 0
|
||||||
|
#define OCFS2_FEATURE_RO_COMPAT_SUPP 0
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Heartbeat-only devices are missing journals and other files. The
|
||||||
|
* filesystem driver can't load them, but the library can. Never put
|
||||||
|
* this in OCFS2_FEATURE_INCOMPAT_SUPP, *ever*.
|
||||||
|
*/
|
||||||
|
#define OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV 0x0002
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flags on ocfs2_dinode.i_flags
|
||||||
|
*/
|
||||||
|
#define OCFS2_VALID_FL (0x00000001) /* Inode is valid */
|
||||||
|
#define OCFS2_UNUSED2_FL (0x00000002)
|
||||||
|
#define OCFS2_ORPHANED_FL (0x00000004) /* On the orphan list */
|
||||||
|
#define OCFS2_UNUSED3_FL (0x00000008)
|
||||||
|
/* System inode flags */
|
||||||
|
#define OCFS2_SYSTEM_FL (0x00000010) /* System inode */
|
||||||
|
#define OCFS2_SUPER_BLOCK_FL (0x00000020) /* Super block */
|
||||||
|
#define OCFS2_LOCAL_ALLOC_FL (0x00000040) /* Slot local alloc bitmap */
|
||||||
|
#define OCFS2_BITMAP_FL (0x00000080) /* Allocation bitmap */
|
||||||
|
#define OCFS2_JOURNAL_FL (0x00000100) /* Slot local journal */
|
||||||
|
#define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */
|
||||||
|
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
|
||||||
|
#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
|
||||||
|
*/
|
||||||
|
#define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* superblock s_state flags
|
||||||
|
*/
|
||||||
|
#define OCFS2_ERROR_FS (0x00000001) /* FS saw errors */
|
||||||
|
|
||||||
|
/* Limit of space in ocfs2_dir_entry */
|
||||||
|
#define OCFS2_MAX_FILENAME_LEN 255
|
||||||
|
|
||||||
|
/* Maximum slots on an ocfs2 file system */
|
||||||
|
#define OCFS2_MAX_SLOTS 255
|
||||||
|
|
||||||
|
/* Slot map indicator for an empty slot */
|
||||||
|
#define OCFS2_INVALID_SLOT -1
|
||||||
|
|
||||||
|
#define OCFS2_VOL_UUID_LEN 16
|
||||||
|
#define OCFS2_MAX_VOL_LABEL_LEN 64
|
||||||
|
|
||||||
|
/* Journal limits (in bytes) */
|
||||||
|
#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024)
|
||||||
|
#define OCFS2_MAX_JOURNAL_SIZE (500 * 1024 * 1024)
|
||||||
|
|
||||||
|
struct ocfs2_system_inode_info {
|
||||||
|
char *si_name;
|
||||||
|
int si_iflags;
|
||||||
|
int si_mode;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* System file index */
|
||||||
|
enum {
|
||||||
|
BAD_BLOCK_SYSTEM_INODE = 0,
|
||||||
|
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
|
||||||
|
SLOT_MAP_SYSTEM_INODE,
|
||||||
|
#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
|
||||||
|
HEARTBEAT_SYSTEM_INODE,
|
||||||
|
GLOBAL_BITMAP_SYSTEM_INODE,
|
||||||
|
#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GLOBAL_BITMAP_SYSTEM_INODE
|
||||||
|
ORPHAN_DIR_SYSTEM_INODE,
|
||||||
|
EXTENT_ALLOC_SYSTEM_INODE,
|
||||||
|
INODE_ALLOC_SYSTEM_INODE,
|
||||||
|
JOURNAL_SYSTEM_INODE,
|
||||||
|
LOCAL_ALLOC_SYSTEM_INODE,
|
||||||
|
TRUNCATE_LOG_SYSTEM_INODE,
|
||||||
|
NUM_SYSTEM_INODES
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
|
||||||
|
/* Global system inodes (single copy) */
|
||||||
|
/* The first two are only used from userspace mfks/tunefs */
|
||||||
|
[BAD_BLOCK_SYSTEM_INODE] = { "bad_blocks", 0, S_IFREG | 0644 },
|
||||||
|
[GLOBAL_INODE_ALLOC_SYSTEM_INODE] = { "global_inode_alloc", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
|
||||||
|
|
||||||
|
/* These are used by the running filesystem */
|
||||||
|
[SLOT_MAP_SYSTEM_INODE] = { "slot_map", 0, S_IFREG | 0644 },
|
||||||
|
[HEARTBEAT_SYSTEM_INODE] = { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 },
|
||||||
|
[GLOBAL_BITMAP_SYSTEM_INODE] = { "global_bitmap", 0, S_IFREG | 0644 },
|
||||||
|
|
||||||
|
/* Slot-specific system inodes (one copy per slot) */
|
||||||
|
[ORPHAN_DIR_SYSTEM_INODE] = { "orphan_dir:%04d", 0, S_IFDIR | 0755 },
|
||||||
|
[EXTENT_ALLOC_SYSTEM_INODE] = { "extent_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
|
||||||
|
[INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
|
||||||
|
[JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 },
|
||||||
|
[LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 },
|
||||||
|
[TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 }
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Parameter passed from mount.ocfs2 to module */
|
||||||
|
#define OCFS2_HB_NONE "heartbeat=none"
|
||||||
|
#define OCFS2_HB_LOCAL "heartbeat=local"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* OCFS2 directory file types. Only the low 3 bits are used. The
|
||||||
|
* other bits are reserved for now.
|
||||||
|
*/
|
||||||
|
#define OCFS2_FT_UNKNOWN 0
|
||||||
|
#define OCFS2_FT_REG_FILE 1
|
||||||
|
#define OCFS2_FT_DIR 2
|
||||||
|
#define OCFS2_FT_CHRDEV 3
|
||||||
|
#define OCFS2_FT_BLKDEV 4
|
||||||
|
#define OCFS2_FT_FIFO 5
|
||||||
|
#define OCFS2_FT_SOCK 6
|
||||||
|
#define OCFS2_FT_SYMLINK 7
|
||||||
|
|
||||||
|
#define OCFS2_FT_MAX 8
|
||||||
|
|
||||||
|
/*
|
||||||
|
* OCFS2_DIR_PAD defines the directory entries boundaries
|
||||||
|
*
|
||||||
|
* NOTE: It must be a multiple of 4
|
||||||
|
*/
|
||||||
|
#define OCFS2_DIR_PAD 4
|
||||||
|
#define OCFS2_DIR_ROUND (OCFS2_DIR_PAD - 1)
|
||||||
|
#define OCFS2_DIR_MEMBER_LEN offsetof(struct ocfs2_dir_entry, name)
|
||||||
|
#define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \
|
||||||
|
OCFS2_DIR_ROUND) & \
|
||||||
|
~OCFS2_DIR_ROUND)
|
||||||
|
|
||||||
|
#define OCFS2_LINK_MAX 32000
|
||||||
|
|
||||||
|
#define S_SHIFT 12
|
||||||
|
static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
|
||||||
|
[S_IFREG >> S_SHIFT] = OCFS2_FT_REG_FILE,
|
||||||
|
[S_IFDIR >> S_SHIFT] = OCFS2_FT_DIR,
|
||||||
|
[S_IFCHR >> S_SHIFT] = OCFS2_FT_CHRDEV,
|
||||||
|
[S_IFBLK >> S_SHIFT] = OCFS2_FT_BLKDEV,
|
||||||
|
[S_IFIFO >> S_SHIFT] = OCFS2_FT_FIFO,
|
||||||
|
[S_IFSOCK >> S_SHIFT] = OCFS2_FT_SOCK,
|
||||||
|
[S_IFLNK >> S_SHIFT] = OCFS2_FT_SYMLINK,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convenience casts
|
||||||
|
*/
|
||||||
|
#define OCFS2_RAW_SB(dinode) (&((dinode)->id2.i_super))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On disk extent record for OCFS2
|
||||||
|
* It describes a range of clusters on disk.
|
||||||
|
*/
|
||||||
|
struct ocfs2_extent_rec {
|
||||||
|
/*00*/ __le32 e_cpos; /* Offset into the file, in clusters */
|
||||||
|
__le32 e_clusters; /* Clusters covered by this extent */
|
||||||
|
__le64 e_blkno; /* Physical disk offset, in blocks */
|
||||||
|
/*10*/
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ocfs2_chain_rec {
|
||||||
|
__le32 c_free; /* Number of free bits in this chain. */
|
||||||
|
__le32 c_total; /* Number of total bits in this chain */
|
||||||
|
__le64 c_blkno; /* Physical disk offset (blocks) of 1st group */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ocfs2_truncate_rec {
|
||||||
|
__le32 t_start; /* 1st cluster in this log */
|
||||||
|
__le32 t_clusters; /* Number of total clusters covered */
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On disk extent list for OCFS2 (node in the tree). Note that this
|
||||||
|
* is contained inside ocfs2_dinode or ocfs2_extent_block, so the
|
||||||
|
* offsets are relative to ocfs2_dinode.id2.i_list or
|
||||||
|
* ocfs2_extent_block.h_list, respectively.
|
||||||
|
*/
|
||||||
|
struct ocfs2_extent_list {
|
||||||
|
/*00*/ __le16 l_tree_depth; /* Extent tree depth from this
|
||||||
|
point. 0 means data extents
|
||||||
|
hang directly off this
|
||||||
|
header (a leaf) */
|
||||||
|
__le16 l_count; /* Number of extent records */
|
||||||
|
__le16 l_next_free_rec; /* Next unused extent slot */
|
||||||
|
__le16 l_reserved1;
|
||||||
|
__le64 l_reserved2; /* Pad to
|
||||||
|
sizeof(ocfs2_extent_rec) */
|
||||||
|
/*10*/ struct ocfs2_extent_rec l_recs[0]; /* Extent records */
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On disk allocation chain list for OCFS2. Note that this is
|
||||||
|
* contained inside ocfs2_dinode, so the offsets are relative to
|
||||||
|
* ocfs2_dinode.id2.i_chain.
|
||||||
|
*/
|
||||||
|
struct ocfs2_chain_list {
|
||||||
|
/*00*/ __le16 cl_cpg; /* Clusters per Block Group */
|
||||||
|
__le16 cl_bpc; /* Bits per cluster */
|
||||||
|
__le16 cl_count; /* Total chains in this list */
|
||||||
|
__le16 cl_next_free_rec; /* Next unused chain slot */
|
||||||
|
__le64 cl_reserved1;
|
||||||
|
/*10*/ struct ocfs2_chain_rec cl_recs[0]; /* Chain records */
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On disk deallocation log for OCFS2. Note that this is
|
||||||
|
* contained inside ocfs2_dinode, so the offsets are relative to
|
||||||
|
* ocfs2_dinode.id2.i_dealloc.
|
||||||
|
*/
|
||||||
|
struct ocfs2_truncate_log {
|
||||||
|
/*00*/ __le16 tl_count; /* Total records in this log */
|
||||||
|
__le16 tl_used; /* Number of records in use */
|
||||||
|
__le32 tl_reserved1;
|
||||||
|
/*08*/ struct ocfs2_truncate_rec tl_recs[0]; /* Truncate records */
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On disk extent block (indirect block) for OCFS2
|
||||||
|
*/
|
||||||
|
struct ocfs2_extent_block
|
||||||
|
{
|
||||||
|
/*00*/ __u8 h_signature[8]; /* Signature for verification */
|
||||||
|
__le64 h_reserved1;
|
||||||
|
/*10*/ __le16 h_suballoc_slot; /* Slot suballocator this
|
||||||
|
extent_header belongs to */
|
||||||
|
__le16 h_suballoc_bit; /* Bit offset in suballocator
|
||||||
|
block group */
|
||||||
|
__le32 h_fs_generation; /* Must match super block */
|
||||||
|
__le64 h_blkno; /* Offset on disk, in blocks */
|
||||||
|
/*20*/ __le64 h_reserved3;
|
||||||
|
__le64 h_next_leaf_blk; /* Offset on disk, in blocks,
|
||||||
|
of next leaf header pointing
|
||||||
|
to data */
|
||||||
|
/*30*/ struct ocfs2_extent_list h_list; /* Extent record list */
|
||||||
|
/* Actual on-disk size is one block */
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On disk superblock for OCFS2
|
||||||
|
* Note that it is contained inside an ocfs2_dinode, so all offsets
|
||||||
|
* are relative to the start of ocfs2_dinode.id2.
|
||||||
|
*/
|
||||||
|
struct ocfs2_super_block {
|
||||||
|
/*00*/ __le16 s_major_rev_level;
|
||||||
|
__le16 s_minor_rev_level;
|
||||||
|
__le16 s_mnt_count;
|
||||||
|
__le16 s_max_mnt_count;
|
||||||
|
__le16 s_state; /* File system state */
|
||||||
|
__le16 s_errors; /* Behaviour when detecting errors */
|
||||||
|
__le32 s_checkinterval; /* Max time between checks */
|
||||||
|
/*10*/ __le64 s_lastcheck; /* Time of last check */
|
||||||
|
__le32 s_creator_os; /* OS */
|
||||||
|
__le32 s_feature_compat; /* Compatible feature set */
|
||||||
|
/*20*/ __le32 s_feature_incompat; /* Incompatible feature set */
|
||||||
|
__le32 s_feature_ro_compat; /* Readonly-compatible feature set */
|
||||||
|
__le64 s_root_blkno; /* Offset, in blocks, of root directory
|
||||||
|
dinode */
|
||||||
|
/*30*/ __le64 s_system_dir_blkno; /* Offset, in blocks, of system
|
||||||
|
directory dinode */
|
||||||
|
__le32 s_blocksize_bits; /* Blocksize for this fs */
|
||||||
|
__le32 s_clustersize_bits; /* Clustersize for this fs */
|
||||||
|
/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts
|
||||||
|
before tunefs required */
|
||||||
|
__le16 s_reserved1;
|
||||||
|
__le32 s_reserved2;
|
||||||
|
__le64 s_first_cluster_group; /* Block offset of 1st cluster
|
||||||
|
* group header */
|
||||||
|
/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
|
||||||
|
/*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */
|
||||||
|
/*A0*/
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Local allocation bitmap for OCFS2 slots
|
||||||
|
* Note that it exists inside an ocfs2_dinode, so all offsets are
|
||||||
|
* relative to the start of ocfs2_dinode.id2.
|
||||||
|
*/
|
||||||
|
struct ocfs2_local_alloc
|
||||||
|
{
|
||||||
|
/*00*/ __le32 la_bm_off; /* Starting bit offset in main bitmap */
|
||||||
|
__le16 la_size; /* Size of included bitmap, in bytes */
|
||||||
|
__le16 la_reserved1;
|
||||||
|
__le64 la_reserved2;
|
||||||
|
/*10*/ __u8 la_bitmap[0];
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On disk inode for OCFS2
|
||||||
|
*/
|
||||||
|
struct ocfs2_dinode {
|
||||||
|
/*00*/ __u8 i_signature[8]; /* Signature for validation */
|
||||||
|
__le32 i_generation; /* Generation number */
|
||||||
|
__le16 i_suballoc_slot; /* Slot suballocator this inode
|
||||||
|
belongs to */
|
||||||
|
__le16 i_suballoc_bit; /* Bit offset in suballocator
|
||||||
|
block group */
|
||||||
|
/*10*/ __le32 i_reserved0;
|
||||||
|
__le32 i_clusters; /* Cluster count */
|
||||||
|
__le32 i_uid; /* Owner UID */
|
||||||
|
__le32 i_gid; /* Owning GID */
|
||||||
|
/*20*/ __le64 i_size; /* Size in bytes */
|
||||||
|
__le16 i_mode; /* File mode */
|
||||||
|
__le16 i_links_count; /* Links count */
|
||||||
|
__le32 i_flags; /* File flags */
|
||||||
|
/*30*/ __le64 i_atime; /* Access time */
|
||||||
|
__le64 i_ctime; /* Creation time */
|
||||||
|
/*40*/ __le64 i_mtime; /* Modification time */
|
||||||
|
__le64 i_dtime; /* Deletion time */
|
||||||
|
/*50*/ __le64 i_blkno; /* Offset on disk, in blocks */
|
||||||
|
__le64 i_last_eb_blk; /* Pointer to last extent
|
||||||
|
block */
|
||||||
|
/*60*/ __le32 i_fs_generation; /* Generation per fs-instance */
|
||||||
|
__le32 i_atime_nsec;
|
||||||
|
__le32 i_ctime_nsec;
|
||||||
|
__le32 i_mtime_nsec;
|
||||||
|
/*70*/ __le64 i_reserved1[9];
|
||||||
|
/*B8*/ union {
|
||||||
|
__le64 i_pad1; /* Generic way to refer to this
|
||||||
|
64bit union */
|
||||||
|
struct {
|
||||||
|
__le64 i_rdev; /* Device number */
|
||||||
|
} dev1;
|
||||||
|
struct { /* Info for bitmap system
|
||||||
|
inodes */
|
||||||
|
__le32 i_used; /* Bits (ie, clusters) used */
|
||||||
|
__le32 i_total; /* Total bits (clusters)
|
||||||
|
available */
|
||||||
|
} bitmap1;
|
||||||
|
struct { /* Info for journal system
|
||||||
|
inodes */
|
||||||
|
__le32 ij_flags; /* Mounted, version, etc. */
|
||||||
|
__le32 ij_pad;
|
||||||
|
} journal1;
|
||||||
|
} id1; /* Inode type dependant 1 */
|
||||||
|
/*C0*/ union {
|
||||||
|
struct ocfs2_super_block i_super;
|
||||||
|
struct ocfs2_local_alloc i_lab;
|
||||||
|
struct ocfs2_chain_list i_chain;
|
||||||
|
struct ocfs2_extent_list i_list;
|
||||||
|
struct ocfs2_truncate_log i_dealloc;
|
||||||
|
__u8 i_symlink[0];
|
||||||
|
} id2;
|
||||||
|
/* Actual on-disk size is one block */
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On-disk directory entry structure for OCFS2
|
||||||
|
*
|
||||||
|
* Packed as this structure could be accessed unaligned on 64-bit platforms
|
||||||
|
*/
|
||||||
|
struct ocfs2_dir_entry {
|
||||||
|
/*00*/ __le64 inode; /* Inode number */
|
||||||
|
__le16 rec_len; /* Directory entry length */
|
||||||
|
__u8 name_len; /* Name length */
|
||||||
|
__u8 file_type;
|
||||||
|
/*0C*/ char name[OCFS2_MAX_FILENAME_LEN]; /* File name */
|
||||||
|
/* Actual on-disk length specified by rec_len */
|
||||||
|
} __attribute__ ((packed));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On disk allocator group structure for OCFS2
|
||||||
|
*/
|
||||||
|
struct ocfs2_group_desc
|
||||||
|
{
|
||||||
|
/*00*/ __u8 bg_signature[8]; /* Signature for validation */
|
||||||
|
__le16 bg_size; /* Size of included bitmap in
|
||||||
|
bytes. */
|
||||||
|
__le16 bg_bits; /* Bits represented by this
|
||||||
|
group. */
|
||||||
|
__le16 bg_free_bits_count; /* Free bits count */
|
||||||
|
__le16 bg_chain; /* What chain I am in. */
|
||||||
|
/*10*/ __le32 bg_generation;
|
||||||
|
__le32 bg_reserved1;
|
||||||
|
__le64 bg_next_group; /* Next group in my list, in
|
||||||
|
blocks */
|
||||||
|
/*20*/ __le64 bg_parent_dinode; /* dinode which owns me, in
|
||||||
|
blocks */
|
||||||
|
__le64 bg_blkno; /* Offset on disk, in blocks */
|
||||||
|
/*30*/ __le64 bg_reserved2[2];
|
||||||
|
/*40*/ __u8 bg_bitmap[0];
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef __KERNEL__
|
||||||
|
static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
|
||||||
|
{
|
||||||
|
return sb->s_blocksize -
|
||||||
|
offsetof(struct ocfs2_dinode, id2.i_symlink);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = sb->s_blocksize -
|
||||||
|
offsetof(struct ocfs2_dinode, id2.i_list.l_recs);
|
||||||
|
|
||||||
|
return size / sizeof(struct ocfs2_extent_rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_chain_recs_per_inode(struct super_block *sb)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = sb->s_blocksize -
|
||||||
|
offsetof(struct ocfs2_dinode, id2.i_chain.cl_recs);
|
||||||
|
|
||||||
|
return size / sizeof(struct ocfs2_chain_rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = sb->s_blocksize -
|
||||||
|
offsetof(struct ocfs2_extent_block, h_list.l_recs);
|
||||||
|
|
||||||
|
return size / sizeof(struct ocfs2_extent_rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
|
||||||
|
{
|
||||||
|
u16 size;
|
||||||
|
|
||||||
|
size = sb->s_blocksize -
|
||||||
|
offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap);
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_group_bitmap_size(struct super_block *sb)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = sb->s_blocksize -
|
||||||
|
offsetof(struct ocfs2_group_desc, bg_bitmap);
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_truncate_recs_per_inode(struct super_block *sb)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = sb->s_blocksize -
|
||||||
|
offsetof(struct ocfs2_dinode, id2.i_dealloc.tl_recs);
|
||||||
|
|
||||||
|
return size / sizeof(struct ocfs2_truncate_rec);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline int ocfs2_fast_symlink_chars(int blocksize)
|
||||||
|
{
|
||||||
|
return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_extent_recs_per_inode(int blocksize)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = blocksize -
|
||||||
|
offsetof(struct ocfs2_dinode, id2.i_list.l_recs);
|
||||||
|
|
||||||
|
return size / sizeof(struct ocfs2_extent_rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_chain_recs_per_inode(int blocksize)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = blocksize -
|
||||||
|
offsetof(struct ocfs2_dinode, id2.i_chain.cl_recs);
|
||||||
|
|
||||||
|
return size / sizeof(struct ocfs2_chain_rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_extent_recs_per_eb(int blocksize)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = blocksize -
|
||||||
|
offsetof(struct ocfs2_extent_block, h_list.l_recs);
|
||||||
|
|
||||||
|
return size / sizeof(struct ocfs2_extent_rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_local_alloc_size(int blocksize)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = blocksize -
|
||||||
|
offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap);
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_group_bitmap_size(int blocksize)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = blocksize -
|
||||||
|
offsetof(struct ocfs2_group_desc, bg_bitmap);
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_truncate_recs_per_inode(int blocksize)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = blocksize -
|
||||||
|
offsetof(struct ocfs2_dinode, id2.i_dealloc.tl_recs);
|
||||||
|
|
||||||
|
return size / sizeof(struct ocfs2_truncate_rec);
|
||||||
|
}
|
||||||
|
#endif /* __KERNEL__ */
|
||||||
|
|
||||||
|
|
||||||
|
static inline int ocfs2_system_inode_is_global(int type)
|
||||||
|
{
|
||||||
|
return ((type >= 0) &&
|
||||||
|
(type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_sprintf_system_inode_name(char *buf, int len,
|
||||||
|
int type, int slot)
|
||||||
|
{
|
||||||
|
int chars;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Global system inodes can only have one copy. Everything
|
||||||
|
* after OCFS2_LAST_GLOBAL_SYSTEM_INODE in the system inode
|
||||||
|
* list has a copy per slot.
|
||||||
|
*/
|
||||||
|
if (type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE)
|
||||||
|
chars = snprintf(buf, len,
|
||||||
|
ocfs2_system_inodes[type].si_name);
|
||||||
|
else
|
||||||
|
chars = snprintf(buf, len,
|
||||||
|
ocfs2_system_inodes[type].si_name,
|
||||||
|
slot);
|
||||||
|
|
||||||
|
return chars;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ocfs2_set_de_type(struct ocfs2_dir_entry *de,
|
||||||
|
umode_t mode)
|
||||||
|
{
|
||||||
|
de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _OCFS2_FS_H */
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* ocfs2_lockid.h
|
||||||
|
*
|
||||||
|
* Defines OCFS2 lockid bits.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2005 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_LOCKID_H
|
||||||
|
#define OCFS2_LOCKID_H
|
||||||
|
|
||||||
|
/* lock ids are made up in the following manner:
|
||||||
|
* name[0] --> type
|
||||||
|
* name[1-6] --> 6 pad characters, reserved for now
|
||||||
|
* name[7-22] --> block number, expressed in hex as 16 chars
|
||||||
|
* name[23-30] --> i_generation, expressed in hex 8 chars
|
||||||
|
* name[31] --> '\0' */
|
||||||
|
#define OCFS2_LOCK_ID_MAX_LEN 32
|
||||||
|
#define OCFS2_LOCK_ID_PAD "000000"
|
||||||
|
|
||||||
|
enum ocfs2_lock_type {
|
||||||
|
OCFS2_LOCK_TYPE_META = 0,
|
||||||
|
OCFS2_LOCK_TYPE_DATA,
|
||||||
|
OCFS2_LOCK_TYPE_SUPER,
|
||||||
|
OCFS2_LOCK_TYPE_RENAME,
|
||||||
|
OCFS2_LOCK_TYPE_RW,
|
||||||
|
OCFS2_NUM_LOCK_TYPES
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
|
||||||
|
{
|
||||||
|
char c;
|
||||||
|
switch (type) {
|
||||||
|
case OCFS2_LOCK_TYPE_META:
|
||||||
|
c = 'M';
|
||||||
|
break;
|
||||||
|
case OCFS2_LOCK_TYPE_DATA:
|
||||||
|
c = 'D';
|
||||||
|
break;
|
||||||
|
case OCFS2_LOCK_TYPE_SUPER:
|
||||||
|
c = 'S';
|
||||||
|
break;
|
||||||
|
case OCFS2_LOCK_TYPE_RENAME:
|
||||||
|
c = 'R';
|
||||||
|
break;
|
||||||
|
case OCFS2_LOCK_TYPE_RW:
|
||||||
|
c = 'W';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
c = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* OCFS2_LOCKID_H */
|
|
@ -0,0 +1,303 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* slot_map.c
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/highmem.h>
|
||||||
|
#include <linux/smp_lock.h>
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_SUPER
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "dlmglue.h"
|
||||||
|
#include "extent_map.h"
|
||||||
|
#include "heartbeat.h"
|
||||||
|
#include "inode.h"
|
||||||
|
#include "slot_map.h"
|
||||||
|
#include "super.h"
|
||||||
|
#include "sysfile.h"
|
||||||
|
|
||||||
|
#include "buffer_head_io.h"
|
||||||
|
|
||||||
|
static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
|
||||||
|
s16 global);
|
||||||
|
static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
|
||||||
|
s16 slot_num,
|
||||||
|
s16 node_num);
|
||||||
|
|
||||||
|
/* Use the slot information we've collected to create a map of mounted
|
||||||
|
* nodes. Should be holding an EX on super block. assumes slot info is
|
||||||
|
* up to date. Note that we call this *after* we find a slot, so our
|
||||||
|
* own node should be set in the map too... */
|
||||||
|
void ocfs2_populate_mounted_map(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct ocfs2_slot_info *si = osb->slot_info;
|
||||||
|
|
||||||
|
spin_lock(&si->si_lock);
|
||||||
|
|
||||||
|
for (i = 0; i < si->si_size; i++)
|
||||||
|
if (si->si_global_node_nums[i] != OCFS2_INVALID_SLOT)
|
||||||
|
ocfs2_node_map_set_bit(osb, &osb->mounted_map,
|
||||||
|
si->si_global_node_nums[i]);
|
||||||
|
|
||||||
|
spin_unlock(&si->si_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* post the slot information on disk into our slot_info struct. */
|
||||||
|
void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
__le16 *disk_info;
|
||||||
|
|
||||||
|
/* we don't read the slot block here as ocfs2_super_lock
|
||||||
|
* should've made sure we have the most recent copy. */
|
||||||
|
spin_lock(&si->si_lock);
|
||||||
|
disk_info = (__le16 *) si->si_bh->b_data;
|
||||||
|
|
||||||
|
for (i = 0; i < si->si_size; i++)
|
||||||
|
si->si_global_node_nums[i] = le16_to_cpu(disk_info[i]);
|
||||||
|
|
||||||
|
spin_unlock(&si->si_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* post the our slot info stuff into it's destination bh and write it
|
||||||
|
* out. */
|
||||||
|
int ocfs2_update_disk_slots(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_slot_info *si)
|
||||||
|
{
|
||||||
|
int status, i;
|
||||||
|
__le16 *disk_info = (__le16 *) si->si_bh->b_data;
|
||||||
|
|
||||||
|
spin_lock(&si->si_lock);
|
||||||
|
for (i = 0; i < si->si_size; i++)
|
||||||
|
disk_info[i] = cpu_to_le16(si->si_global_node_nums[i]);
|
||||||
|
spin_unlock(&si->si_lock);
|
||||||
|
|
||||||
|
status = ocfs2_write_block(osb, si->si_bh, si->si_inode);
|
||||||
|
if (status < 0)
|
||||||
|
mlog_errno(status);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* try to find global node in the slot info. Returns
|
||||||
|
* OCFS2_INVALID_SLOT if nothing is found. */
|
||||||
|
static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
|
||||||
|
s16 global)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
s16 ret = OCFS2_INVALID_SLOT;
|
||||||
|
|
||||||
|
for(i = 0; i < si->si_num_slots; i++) {
|
||||||
|
if (global == si->si_global_node_nums[i]) {
|
||||||
|
ret = (s16) i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
s16 ret = OCFS2_INVALID_SLOT;
|
||||||
|
|
||||||
|
for(i = 0; i < si->si_num_slots; i++) {
|
||||||
|
if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) {
|
||||||
|
ret = (s16) i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
|
||||||
|
s16 global)
|
||||||
|
{
|
||||||
|
s16 ret;
|
||||||
|
|
||||||
|
spin_lock(&si->si_lock);
|
||||||
|
ret = __ocfs2_node_num_to_slot(si, global);
|
||||||
|
spin_unlock(&si->si_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
|
||||||
|
s16 slot_num,
|
||||||
|
s16 node_num)
|
||||||
|
{
|
||||||
|
BUG_ON(slot_num == OCFS2_INVALID_SLOT);
|
||||||
|
BUG_ON(slot_num >= si->si_num_slots);
|
||||||
|
BUG_ON((node_num != O2NM_INVALID_NODE_NUM) &&
|
||||||
|
(node_num >= O2NM_MAX_NODES));
|
||||||
|
|
||||||
|
si->si_global_node_nums[slot_num] = node_num;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ocfs2_clear_slot(struct ocfs2_slot_info *si,
|
||||||
|
s16 slot_num)
|
||||||
|
{
|
||||||
|
spin_lock(&si->si_lock);
|
||||||
|
__ocfs2_fill_slot(si, slot_num, OCFS2_INVALID_SLOT);
|
||||||
|
spin_unlock(&si->si_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_init_slot_info(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
int status, i;
|
||||||
|
u64 blkno;
|
||||||
|
struct inode *inode = NULL;
|
||||||
|
struct buffer_head *bh = NULL;
|
||||||
|
struct ocfs2_slot_info *si;
|
||||||
|
|
||||||
|
si = kcalloc(1, sizeof(struct ocfs2_slot_info), GFP_KERNEL);
|
||||||
|
if (!si) {
|
||||||
|
status = -ENOMEM;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_lock_init(&si->si_lock);
|
||||||
|
si->si_num_slots = osb->max_slots;
|
||||||
|
si->si_size = OCFS2_MAX_SLOTS;
|
||||||
|
|
||||||
|
for(i = 0; i < si->si_num_slots; i++)
|
||||||
|
si->si_global_node_nums[i] = OCFS2_INVALID_SLOT;
|
||||||
|
|
||||||
|
inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
|
||||||
|
OCFS2_INVALID_SLOT);
|
||||||
|
if (!inode) {
|
||||||
|
status = -EINVAL;
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ocfs2_extent_map_get_blocks(inode, 0ULL, 1, &blkno, NULL);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ocfs2_read_block(osb, blkno, &bh, 0, inode);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
si->si_inode = inode;
|
||||||
|
si->si_bh = bh;
|
||||||
|
osb->slot_info = si;
|
||||||
|
bail:
|
||||||
|
if (status < 0 && si)
|
||||||
|
ocfs2_free_slot_info(si);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ocfs2_free_slot_info(struct ocfs2_slot_info *si)
|
||||||
|
{
|
||||||
|
if (si->si_inode)
|
||||||
|
iput(si->si_inode);
|
||||||
|
if (si->si_bh)
|
||||||
|
brelse(si->si_bh);
|
||||||
|
kfree(si);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_find_slot(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
s16 slot;
|
||||||
|
struct ocfs2_slot_info *si;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
si = osb->slot_info;
|
||||||
|
|
||||||
|
ocfs2_update_slot_info(si);
|
||||||
|
|
||||||
|
spin_lock(&si->si_lock);
|
||||||
|
/* search for ourselves first and take the slot if it already
|
||||||
|
* exists. Perhaps we need to mark this in a variable for our
|
||||||
|
* own journal recovery? Possibly not, though we certainly
|
||||||
|
* need to warn to the user */
|
||||||
|
slot = __ocfs2_node_num_to_slot(si, osb->node_num);
|
||||||
|
if (slot == OCFS2_INVALID_SLOT) {
|
||||||
|
/* if no slot yet, then just take 1st available
|
||||||
|
* one. */
|
||||||
|
slot = __ocfs2_find_empty_slot(si);
|
||||||
|
if (slot == OCFS2_INVALID_SLOT) {
|
||||||
|
spin_unlock(&si->si_lock);
|
||||||
|
mlog(ML_ERROR, "no free slots available!\n");
|
||||||
|
status = -EINVAL;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
mlog(ML_NOTICE, "slot %d is already allocated to this node!\n",
|
||||||
|
slot);
|
||||||
|
|
||||||
|
__ocfs2_fill_slot(si, slot, osb->node_num);
|
||||||
|
osb->slot_num = slot;
|
||||||
|
spin_unlock(&si->si_lock);
|
||||||
|
|
||||||
|
mlog(ML_NOTICE, "taking node slot %d\n", osb->slot_num);
|
||||||
|
|
||||||
|
status = ocfs2_update_disk_slots(osb, si);
|
||||||
|
if (status < 0)
|
||||||
|
mlog_errno(status);
|
||||||
|
|
||||||
|
bail:
|
||||||
|
mlog_exit(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ocfs2_put_slot(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
struct ocfs2_slot_info *si = osb->slot_info;
|
||||||
|
|
||||||
|
if (!si)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ocfs2_update_slot_info(si);
|
||||||
|
|
||||||
|
spin_lock(&si->si_lock);
|
||||||
|
__ocfs2_fill_slot(si, osb->slot_num, OCFS2_INVALID_SLOT);
|
||||||
|
osb->slot_num = OCFS2_INVALID_SLOT;
|
||||||
|
spin_unlock(&si->si_lock);
|
||||||
|
|
||||||
|
status = ocfs2_update_disk_slots(osb, si);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
bail:
|
||||||
|
osb->slot_info = NULL;
|
||||||
|
ocfs2_free_slot_info(si);
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* slotmap.h
|
||||||
|
*
|
||||||
|
* description here
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef SLOTMAP_H
|
||||||
|
#define SLOTMAP_H
|
||||||
|
|
||||||
|
struct ocfs2_slot_info {
|
||||||
|
spinlock_t si_lock;
|
||||||
|
|
||||||
|
struct inode *si_inode;
|
||||||
|
struct buffer_head *si_bh;
|
||||||
|
unsigned int si_num_slots;
|
||||||
|
unsigned int si_size;
|
||||||
|
s16 si_global_node_nums[OCFS2_MAX_SLOTS];
|
||||||
|
};
|
||||||
|
|
||||||
|
int ocfs2_init_slot_info(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_free_slot_info(struct ocfs2_slot_info *si);
|
||||||
|
|
||||||
|
int ocfs2_find_slot(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_put_slot(struct ocfs2_super *osb);
|
||||||
|
|
||||||
|
void ocfs2_update_slot_info(struct ocfs2_slot_info *si);
|
||||||
|
int ocfs2_update_disk_slots(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_slot_info *si);
|
||||||
|
|
||||||
|
s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
|
||||||
|
s16 global);
|
||||||
|
void ocfs2_clear_slot(struct ocfs2_slot_info *si,
|
||||||
|
s16 slot_num);
|
||||||
|
|
||||||
|
void ocfs2_populate_mounted_map(struct ocfs2_super *osb);
|
||||||
|
|
||||||
|
static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si,
|
||||||
|
int slot_num)
|
||||||
|
{
|
||||||
|
BUG_ON(slot_num == OCFS2_INVALID_SLOT);
|
||||||
|
assert_spin_locked(&si->si_lock);
|
||||||
|
|
||||||
|
return si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,132 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* suballoc.h
|
||||||
|
*
|
||||||
|
* Defines sub allocator api
|
||||||
|
*
|
||||||
|
* Copyright (C) 2003, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _CHAINALLOC_H_
|
||||||
|
#define _CHAINALLOC_H_
|
||||||
|
|
||||||
|
typedef int (group_search_t)(struct inode *,
|
||||||
|
struct buffer_head *,
|
||||||
|
u32,
|
||||||
|
u32,
|
||||||
|
u16 *,
|
||||||
|
u16 *);
|
||||||
|
|
||||||
|
struct ocfs2_alloc_context {
|
||||||
|
struct inode *ac_inode; /* which bitmap are we allocating from? */
|
||||||
|
struct buffer_head *ac_bh; /* file entry bh */
|
||||||
|
u32 ac_bits_wanted;
|
||||||
|
u32 ac_bits_given;
|
||||||
|
#define OCFS2_AC_USE_LOCAL 1
|
||||||
|
#define OCFS2_AC_USE_MAIN 2
|
||||||
|
#define OCFS2_AC_USE_INODE 3
|
||||||
|
#define OCFS2_AC_USE_META 4
|
||||||
|
u32 ac_which;
|
||||||
|
struct ocfs2_journal_handle *ac_handle;
|
||||||
|
|
||||||
|
/* these are used by the chain search */
|
||||||
|
u16 ac_chain;
|
||||||
|
int ac_allow_chain_relink;
|
||||||
|
group_search_t *ac_group_search;
|
||||||
|
};
|
||||||
|
|
||||||
|
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);
|
||||||
|
static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac)
|
||||||
|
{
|
||||||
|
return ac->ac_bits_wanted - ac->ac_bits_given;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_dinode *fe,
|
||||||
|
struct ocfs2_alloc_context **ac);
|
||||||
|
int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_alloc_context **ac);
|
||||||
|
int ocfs2_reserve_clusters(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
u32 bits_wanted,
|
||||||
|
struct ocfs2_alloc_context **ac);
|
||||||
|
|
||||||
|
int ocfs2_claim_metadata(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_alloc_context *ac,
|
||||||
|
u32 bits_wanted,
|
||||||
|
u16 *suballoc_bit_start,
|
||||||
|
u32 *num_bits,
|
||||||
|
u64 *blkno_start);
|
||||||
|
int ocfs2_claim_new_inode(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_alloc_context *ac,
|
||||||
|
u16 *suballoc_bit,
|
||||||
|
u64 *fe_blkno);
|
||||||
|
int ocfs2_claim_clusters(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_journal_handle *handle,
|
||||||
|
struct ocfs2_alloc_context *ac,
|
||||||
|
u32 min_clusters,
|
||||||
|
u32 *cluster_start,
|
||||||
|
u32 *num_clusters);
|
||||||
|
|
||||||
|
int ocfs2_free_dinode(struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *inode_alloc_inode,
|
||||||
|
struct buffer_head *inode_alloc_bh,
|
||||||
|
struct ocfs2_dinode *di);
|
||||||
|
int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *eb_alloc_inode,
|
||||||
|
struct buffer_head *eb_alloc_bh,
|
||||||
|
struct ocfs2_extent_block *eb);
|
||||||
|
int ocfs2_free_clusters(struct ocfs2_journal_handle *handle,
|
||||||
|
struct inode *bitmap_inode,
|
||||||
|
struct buffer_head *bitmap_bh,
|
||||||
|
u64 start_blk,
|
||||||
|
unsigned int num_clusters);
|
||||||
|
|
||||||
|
static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb,
|
||||||
|
u64 bg_blkno)
|
||||||
|
{
|
||||||
|
/* This should work for all block group descriptors as only
|
||||||
|
* the 1st group descriptor of the cluster bitmap is
|
||||||
|
* different. */
|
||||||
|
|
||||||
|
if (bg_blkno == osb->first_cluster_group_blkno)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* the rest of the block groups are located at the beginning
|
||||||
|
* of their 1st cluster, so a direct translation just
|
||||||
|
* works. */
|
||||||
|
return ocfs2_blocks_to_clusters(osb->sb, bg_blkno);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_is_cluster_bitmap(struct inode *inode)
|
||||||
|
{
|
||||||
|
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||||
|
return osb->bitmap_blkno == OCFS2_I(inode)->ip_blkno;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This is for local alloc ONLY. Others should use the task-specific
|
||||||
|
* apis above. */
|
||||||
|
int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
|
||||||
|
struct ocfs2_alloc_context *ac);
|
||||||
|
|
||||||
|
#endif /* _CHAINALLOC_H_ */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,44 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* super.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_SUPER_H
|
||||||
|
#define OCFS2_SUPER_H
|
||||||
|
|
||||||
|
extern struct workqueue_struct *ocfs2_wq;
|
||||||
|
|
||||||
|
int ocfs2_publish_get_mount_state(struct ocfs2_super *osb,
|
||||||
|
int node_num);
|
||||||
|
|
||||||
|
void __ocfs2_error(struct super_block *sb,
|
||||||
|
const char *function,
|
||||||
|
const char *fmt, ...);
|
||||||
|
#define ocfs2_error(sb, fmt, args...) __ocfs2_error(sb, __PRETTY_FUNCTION__, fmt, ##args)
|
||||||
|
|
||||||
|
void __ocfs2_abort(struct super_block *sb,
|
||||||
|
const char *function,
|
||||||
|
const char *fmt, ...);
|
||||||
|
#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
|
||||||
|
|
||||||
|
#endif /* OCFS2_SUPER_H */
|
|
@ -0,0 +1,180 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* linux/cluster/ssi/cfs/symlink.c
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License as
|
||||||
|
* published by the Free Software Foundation; either version 2 of
|
||||||
|
* the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE
|
||||||
|
* or NON INFRINGEMENT. See the GNU General Public License for more
|
||||||
|
* details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
*
|
||||||
|
* Questions/Comments/Bugfixes to ssic-linux-devel@lists.sourceforge.net
|
||||||
|
*
|
||||||
|
* Copyright (C) 1992 Rick Sladkey
|
||||||
|
*
|
||||||
|
* Optimization changes Copyright (C) 1994 Florian La Roche
|
||||||
|
*
|
||||||
|
* Jun 7 1999, cache symlink lookups in the page cache. -DaveM
|
||||||
|
*
|
||||||
|
* Portions Copyright (C) 2001 Compaq Computer Corporation
|
||||||
|
*
|
||||||
|
* ocfs2 symlink handling code.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2004, 2005 Oracle.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/pagemap.h>
|
||||||
|
#include <linux/utsname.h>
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_NAMEI
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "alloc.h"
|
||||||
|
#include "file.h"
|
||||||
|
#include "inode.h"
|
||||||
|
#include "journal.h"
|
||||||
|
#include "symlink.h"
|
||||||
|
|
||||||
|
#include "buffer_head_io.h"
|
||||||
|
|
||||||
|
static char *ocfs2_page_getlink(struct dentry * dentry,
|
||||||
|
struct page **ppage);
|
||||||
|
static char *ocfs2_fast_symlink_getlink(struct inode *inode,
|
||||||
|
struct buffer_head **bh);
|
||||||
|
|
||||||
|
/* get the link contents into pagecache */
|
||||||
|
static char *ocfs2_page_getlink(struct dentry * dentry,
|
||||||
|
struct page **ppage)
|
||||||
|
{
|
||||||
|
struct page * page;
|
||||||
|
struct address_space *mapping = dentry->d_inode->i_mapping;
|
||||||
|
page = read_cache_page(mapping, 0,
|
||||||
|
(filler_t *)mapping->a_ops->readpage, NULL);
|
||||||
|
if (IS_ERR(page))
|
||||||
|
goto sync_fail;
|
||||||
|
wait_on_page_locked(page);
|
||||||
|
if (!PageUptodate(page))
|
||||||
|
goto async_fail;
|
||||||
|
*ppage = page;
|
||||||
|
return kmap(page);
|
||||||
|
|
||||||
|
async_fail:
|
||||||
|
page_cache_release(page);
|
||||||
|
return ERR_PTR(-EIO);
|
||||||
|
|
||||||
|
sync_fail:
|
||||||
|
return (char*)page;
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *ocfs2_fast_symlink_getlink(struct inode *inode,
|
||||||
|
struct buffer_head **bh)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
char *link = NULL;
|
||||||
|
struct ocfs2_dinode *fe;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
|
||||||
|
OCFS2_I(inode)->ip_blkno,
|
||||||
|
bh,
|
||||||
|
OCFS2_BH_CACHED,
|
||||||
|
inode);
|
||||||
|
if (status < 0) {
|
||||||
|
mlog_errno(status);
|
||||||
|
link = ERR_PTR(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
fe = (struct ocfs2_dinode *) (*bh)->b_data;
|
||||||
|
link = (char *) fe->id2.i_symlink;
|
||||||
|
bail:
|
||||||
|
mlog_exit(status);
|
||||||
|
|
||||||
|
return link;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ocfs2_readlink(struct dentry *dentry,
|
||||||
|
char __user *buffer,
|
||||||
|
int buflen)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
char *link;
|
||||||
|
struct buffer_head *bh = NULL;
|
||||||
|
struct inode *inode = dentry->d_inode;
|
||||||
|
|
||||||
|
mlog_entry_void();
|
||||||
|
|
||||||
|
link = ocfs2_fast_symlink_getlink(inode, &bh);
|
||||||
|
if (IS_ERR(link)) {
|
||||||
|
ret = PTR_ERR(link);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = vfs_readlink(dentry, buffer, buflen, link);
|
||||||
|
|
||||||
|
brelse(bh);
|
||||||
|
out:
|
||||||
|
mlog_exit(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *ocfs2_follow_link(struct dentry *dentry,
|
||||||
|
struct nameidata *nd)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
char *link;
|
||||||
|
struct inode *inode = dentry->d_inode;
|
||||||
|
struct page *page = NULL;
|
||||||
|
struct buffer_head *bh = NULL;
|
||||||
|
|
||||||
|
if (ocfs2_inode_is_fast_symlink(inode))
|
||||||
|
link = ocfs2_fast_symlink_getlink(inode, &bh);
|
||||||
|
else
|
||||||
|
link = ocfs2_page_getlink(dentry, &page);
|
||||||
|
if (IS_ERR(link)) {
|
||||||
|
status = PTR_ERR(link);
|
||||||
|
mlog_errno(status);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = vfs_follow_link(nd, link);
|
||||||
|
if (status)
|
||||||
|
mlog_errno(status);
|
||||||
|
bail:
|
||||||
|
if (page) {
|
||||||
|
kunmap(page);
|
||||||
|
page_cache_release(page);
|
||||||
|
}
|
||||||
|
if (bh)
|
||||||
|
brelse(bh);
|
||||||
|
|
||||||
|
return ERR_PTR(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct inode_operations ocfs2_symlink_inode_operations = {
|
||||||
|
.readlink = page_readlink,
|
||||||
|
.follow_link = ocfs2_follow_link,
|
||||||
|
.getattr = ocfs2_getattr,
|
||||||
|
};
|
||||||
|
struct inode_operations ocfs2_fast_symlink_inode_operations = {
|
||||||
|
.readlink = ocfs2_readlink,
|
||||||
|
.follow_link = ocfs2_follow_link,
|
||||||
|
.getattr = ocfs2_getattr,
|
||||||
|
};
|
|
@ -0,0 +1,42 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* symlink.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_SYMLINK_H
|
||||||
|
#define OCFS2_SYMLINK_H
|
||||||
|
|
||||||
|
extern struct inode_operations ocfs2_symlink_inode_operations;
|
||||||
|
extern struct inode_operations ocfs2_fast_symlink_inode_operations;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test whether an inode is a fast symlink.
|
||||||
|
*/
|
||||||
|
static inline int ocfs2_inode_is_fast_symlink(struct inode *inode)
|
||||||
|
{
|
||||||
|
return (S_ISLNK(inode->i_mode) &&
|
||||||
|
inode->i_blocks == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* OCFS2_SYMLINK_H */
|
|
@ -0,0 +1,131 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* sysfile.c
|
||||||
|
*
|
||||||
|
* Initialize, read, write, etc. system files.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/highmem.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_INODE
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "alloc.h"
|
||||||
|
#include "dir.h"
|
||||||
|
#include "inode.h"
|
||||||
|
#include "journal.h"
|
||||||
|
#include "sysfile.h"
|
||||||
|
|
||||||
|
#include "buffer_head_io.h"
|
||||||
|
|
||||||
|
static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
|
||||||
|
int type,
|
||||||
|
u32 slot);
|
||||||
|
|
||||||
|
static inline int is_global_system_inode(int type);
|
||||||
|
static inline int is_in_system_inode_array(struct ocfs2_super *osb,
|
||||||
|
int type,
|
||||||
|
u32 slot);
|
||||||
|
|
||||||
|
static inline int is_global_system_inode(int type)
|
||||||
|
{
|
||||||
|
return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE &&
|
||||||
|
type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int is_in_system_inode_array(struct ocfs2_super *osb,
|
||||||
|
int type,
|
||||||
|
u32 slot)
|
||||||
|
{
|
||||||
|
return slot == osb->slot_num || is_global_system_inode(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb,
|
||||||
|
int type,
|
||||||
|
u32 slot)
|
||||||
|
{
|
||||||
|
struct inode *inode = NULL;
|
||||||
|
struct inode **arr = NULL;
|
||||||
|
|
||||||
|
/* avoid the lookup if cached in local system file array */
|
||||||
|
if (is_in_system_inode_array(osb, type, slot))
|
||||||
|
arr = &(osb->system_inodes[type]);
|
||||||
|
|
||||||
|
if (arr && ((inode = *arr) != NULL)) {
|
||||||
|
/* get a ref in addition to the array ref */
|
||||||
|
inode = igrab(inode);
|
||||||
|
if (!inode)
|
||||||
|
BUG();
|
||||||
|
|
||||||
|
return inode;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* this gets one ref thru iget */
|
||||||
|
inode = _ocfs2_get_system_file_inode(osb, type, slot);
|
||||||
|
|
||||||
|
/* add one more if putting into array for first time */
|
||||||
|
if (arr && inode) {
|
||||||
|
*arr = igrab(inode);
|
||||||
|
if (!*arr)
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
return inode;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
|
||||||
|
int type,
|
||||||
|
u32 slot)
|
||||||
|
{
|
||||||
|
char namebuf[40];
|
||||||
|
struct inode *inode = NULL;
|
||||||
|
u64 blkno;
|
||||||
|
struct buffer_head *dirent_bh = NULL;
|
||||||
|
struct ocfs2_dir_entry *de = NULL;
|
||||||
|
int status = 0;
|
||||||
|
|
||||||
|
ocfs2_sprintf_system_inode_name(namebuf,
|
||||||
|
sizeof(namebuf),
|
||||||
|
type, slot);
|
||||||
|
|
||||||
|
status = ocfs2_find_files_on_disk(namebuf, strlen(namebuf),
|
||||||
|
&blkno, osb->sys_root_inode,
|
||||||
|
&dirent_bh, &de);
|
||||||
|
if (status < 0) {
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
inode = ocfs2_iget(osb, blkno);
|
||||||
|
if (IS_ERR(inode)) {
|
||||||
|
mlog_errno(PTR_ERR(inode));
|
||||||
|
inode = NULL;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
bail:
|
||||||
|
if (dirent_bh)
|
||||||
|
brelse(dirent_bh);
|
||||||
|
return inode;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* sysfile.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_SYSFILE_H
|
||||||
|
#define OCFS2_SYSFILE_H
|
||||||
|
|
||||||
|
struct inode * ocfs2_get_system_file_inode(struct ocfs2_super *osb,
|
||||||
|
int type,
|
||||||
|
u32 slot);
|
||||||
|
|
||||||
|
#endif /* OCFS2_SYSFILE_H */
|
|
@ -0,0 +1,544 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* uptodate.c
|
||||||
|
*
|
||||||
|
* Tracking the up-to-date-ness of a local buffer_head with respect to
|
||||||
|
* the cluster.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004, 2005 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*
|
||||||
|
* Standard buffer head caching flags (uptodate, etc) are insufficient
|
||||||
|
* in a clustered environment - a buffer may be marked up to date on
|
||||||
|
* our local node but could have been modified by another cluster
|
||||||
|
* member. As a result an additional (and performant) caching scheme
|
||||||
|
* is required. A further requirement is that we consume as little
|
||||||
|
* memory as possible - we never pin buffer_head structures in order
|
||||||
|
* to cache them.
|
||||||
|
*
|
||||||
|
* We track the existence of up to date buffers on the inodes which
|
||||||
|
* are associated with them. Because we don't want to pin
|
||||||
|
* buffer_heads, this is only a (strong) hint and several other checks
|
||||||
|
* are made in the I/O path to ensure that we don't use a stale or
|
||||||
|
* invalid buffer without going to disk:
|
||||||
|
* - buffer_jbd is used liberally - if a bh is in the journal on
|
||||||
|
* this node then it *must* be up to date.
|
||||||
|
* - the standard buffer_uptodate() macro is used to detect buffers
|
||||||
|
* which may be invalid (even if we have an up to date tracking
|
||||||
|
* item for them)
|
||||||
|
*
|
||||||
|
* For a full understanding of how this code works together, one
|
||||||
|
* should read the callers in dlmglue.c, the I/O functions in
|
||||||
|
* buffer_head_io.c and ocfs2_journal_access in journal.c
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/highmem.h>
|
||||||
|
#include <linux/buffer_head.h>
|
||||||
|
#include <linux/rbtree.h>
|
||||||
|
#include <linux/jbd.h>
|
||||||
|
|
||||||
|
#define MLOG_MASK_PREFIX ML_UPTODATE
|
||||||
|
|
||||||
|
#include <cluster/masklog.h>
|
||||||
|
|
||||||
|
#include "ocfs2.h"
|
||||||
|
|
||||||
|
#include "inode.h"
|
||||||
|
#include "uptodate.h"
|
||||||
|
|
||||||
|
struct ocfs2_meta_cache_item {
|
||||||
|
struct rb_node c_node;
|
||||||
|
sector_t c_block;
|
||||||
|
};
|
||||||
|
|
||||||
|
static kmem_cache_t *ocfs2_uptodate_cachep = NULL;
|
||||||
|
|
||||||
|
void ocfs2_metadata_cache_init(struct inode *inode)
|
||||||
|
{
|
||||||
|
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||||
|
struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
|
||||||
|
|
||||||
|
oi->ip_flags |= OCFS2_INODE_CACHE_INLINE;
|
||||||
|
ci->ci_num_cached = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No lock taken here as 'root' is not expected to be visible to other
|
||||||
|
* processes. */
|
||||||
|
static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root)
|
||||||
|
{
|
||||||
|
unsigned int purged = 0;
|
||||||
|
struct rb_node *node;
|
||||||
|
struct ocfs2_meta_cache_item *item;
|
||||||
|
|
||||||
|
while ((node = rb_last(root)) != NULL) {
|
||||||
|
item = rb_entry(node, struct ocfs2_meta_cache_item, c_node);
|
||||||
|
|
||||||
|
mlog(0, "Purge item %llu\n",
|
||||||
|
(unsigned long long) item->c_block);
|
||||||
|
|
||||||
|
rb_erase(&item->c_node, root);
|
||||||
|
kmem_cache_free(ocfs2_uptodate_cachep, item);
|
||||||
|
|
||||||
|
purged++;
|
||||||
|
}
|
||||||
|
return purged;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Called from locking and called from ocfs2_clear_inode. Dump the
|
||||||
|
* cache for a given inode.
|
||||||
|
*
|
||||||
|
* This function is a few more lines longer than necessary due to some
|
||||||
|
* accounting done here, but I think it's worth tracking down those
|
||||||
|
* bugs sooner -- Mark */
|
||||||
|
void ocfs2_metadata_cache_purge(struct inode *inode)
|
||||||
|
{
|
||||||
|
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||||
|
unsigned int tree, to_purge, purged;
|
||||||
|
struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
|
||||||
|
struct rb_root root = RB_ROOT;
|
||||||
|
|
||||||
|
spin_lock(&oi->ip_lock);
|
||||||
|
tree = !(oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
|
||||||
|
to_purge = ci->ci_num_cached;
|
||||||
|
|
||||||
|
mlog(0, "Purge %u %s items from Inode %"MLFu64"\n", to_purge,
|
||||||
|
tree ? "array" : "tree", oi->ip_blkno);
|
||||||
|
|
||||||
|
/* If we're a tree, save off the root so that we can safely
|
||||||
|
* initialize the cache. We do the work to free tree members
|
||||||
|
* without the spinlock. */
|
||||||
|
if (tree)
|
||||||
|
root = ci->ci_cache.ci_tree;
|
||||||
|
|
||||||
|
ocfs2_metadata_cache_init(inode);
|
||||||
|
spin_unlock(&oi->ip_lock);
|
||||||
|
|
||||||
|
purged = ocfs2_purge_copied_metadata_tree(&root);
|
||||||
|
/* If possible, track the number wiped so that we can more
|
||||||
|
* easily detect counting errors. Unfortunately, this is only
|
||||||
|
* meaningful for trees. */
|
||||||
|
if (tree && purged != to_purge)
|
||||||
|
mlog(ML_ERROR, "Inode %"MLFu64", count = %u, purged = %u\n",
|
||||||
|
oi->ip_blkno, to_purge, purged);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns the index in the cache array, -1 if not found.
|
||||||
|
* Requires ip_lock. */
|
||||||
|
static int ocfs2_search_cache_array(struct ocfs2_caching_info *ci,
|
||||||
|
sector_t item)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < ci->ci_num_cached; i++) {
|
||||||
|
if (item == ci->ci_cache.ci_array[i])
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns the cache item if found, otherwise NULL.
|
||||||
|
* Requires ip_lock. */
|
||||||
|
static struct ocfs2_meta_cache_item *
|
||||||
|
ocfs2_search_cache_tree(struct ocfs2_caching_info *ci,
|
||||||
|
sector_t block)
|
||||||
|
{
|
||||||
|
struct rb_node * n = ci->ci_cache.ci_tree.rb_node;
|
||||||
|
struct ocfs2_meta_cache_item *item = NULL;
|
||||||
|
|
||||||
|
while (n) {
|
||||||
|
item = rb_entry(n, struct ocfs2_meta_cache_item, c_node);
|
||||||
|
|
||||||
|
if (block < item->c_block)
|
||||||
|
n = n->rb_left;
|
||||||
|
else if (block > item->c_block)
|
||||||
|
n = n->rb_right;
|
||||||
|
else
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
|
||||||
|
struct buffer_head *bh)
|
||||||
|
{
|
||||||
|
int index = -1;
|
||||||
|
struct ocfs2_meta_cache_item *item = NULL;
|
||||||
|
|
||||||
|
spin_lock(&oi->ip_lock);
|
||||||
|
|
||||||
|
mlog(0, "Inode %"MLFu64", query block %llu (inline = %u)\n",
|
||||||
|
oi->ip_blkno, (unsigned long long) bh->b_blocknr,
|
||||||
|
!!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE));
|
||||||
|
|
||||||
|
if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE)
|
||||||
|
index = ocfs2_search_cache_array(&oi->ip_metadata_cache,
|
||||||
|
bh->b_blocknr);
|
||||||
|
else
|
||||||
|
item = ocfs2_search_cache_tree(&oi->ip_metadata_cache,
|
||||||
|
bh->b_blocknr);
|
||||||
|
|
||||||
|
spin_unlock(&oi->ip_lock);
|
||||||
|
|
||||||
|
mlog(0, "index = %d, item = %p\n", index, item);
|
||||||
|
|
||||||
|
return (index != -1) || (item != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Warning: even if it returns true, this does *not* guarantee that
|
||||||
|
* the block is stored in our inode metadata cache. */
|
||||||
|
int ocfs2_buffer_uptodate(struct inode *inode,
|
||||||
|
struct buffer_head *bh)
|
||||||
|
{
|
||||||
|
/* Doesn't matter if the bh is in our cache or not -- if it's
|
||||||
|
* not marked uptodate then we know it can't have correct
|
||||||
|
* data. */
|
||||||
|
if (!buffer_uptodate(bh))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* OCFS2 does not allow multiple nodes to be changing the same
|
||||||
|
* block at the same time. */
|
||||||
|
if (buffer_jbd(bh))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/* Ok, locally the buffer is marked as up to date, now search
|
||||||
|
* our cache to see if we can trust that. */
|
||||||
|
return ocfs2_buffer_cached(OCFS2_I(inode), bh);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Requires ip_lock */
|
||||||
|
static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
|
||||||
|
sector_t block)
|
||||||
|
{
|
||||||
|
BUG_ON(ci->ci_num_cached >= OCFS2_INODE_MAX_CACHE_ARRAY);
|
||||||
|
|
||||||
|
mlog(0, "block %llu takes position %u\n", (unsigned long long) block,
|
||||||
|
ci->ci_num_cached);
|
||||||
|
|
||||||
|
ci->ci_cache.ci_array[ci->ci_num_cached] = block;
|
||||||
|
ci->ci_num_cached++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* By now the caller should have checked that the item does *not*
|
||||||
|
* exist in the tree.
|
||||||
|
* Requires ip_lock. */
|
||||||
|
static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci,
|
||||||
|
struct ocfs2_meta_cache_item *new)
|
||||||
|
{
|
||||||
|
sector_t block = new->c_block;
|
||||||
|
struct rb_node *parent = NULL;
|
||||||
|
struct rb_node **p = &ci->ci_cache.ci_tree.rb_node;
|
||||||
|
struct ocfs2_meta_cache_item *tmp;
|
||||||
|
|
||||||
|
mlog(0, "Insert block %llu num = %u\n", (unsigned long long) block,
|
||||||
|
ci->ci_num_cached);
|
||||||
|
|
||||||
|
while(*p) {
|
||||||
|
parent = *p;
|
||||||
|
|
||||||
|
tmp = rb_entry(parent, struct ocfs2_meta_cache_item, c_node);
|
||||||
|
|
||||||
|
if (block < tmp->c_block)
|
||||||
|
p = &(*p)->rb_left;
|
||||||
|
else if (block > tmp->c_block)
|
||||||
|
p = &(*p)->rb_right;
|
||||||
|
else {
|
||||||
|
/* This should never happen! */
|
||||||
|
mlog(ML_ERROR, "Duplicate block %llu cached!\n",
|
||||||
|
(unsigned long long) block);
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rb_link_node(&new->c_node, parent, p);
|
||||||
|
rb_insert_color(&new->c_node, &ci->ci_cache.ci_tree);
|
||||||
|
ci->ci_num_cached++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi,
|
||||||
|
struct ocfs2_caching_info *ci)
|
||||||
|
{
|
||||||
|
assert_spin_locked(&oi->ip_lock);
|
||||||
|
|
||||||
|
return (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) &&
|
||||||
|
(ci->ci_num_cached < OCFS2_INODE_MAX_CACHE_ARRAY);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* tree should be exactly OCFS2_INODE_MAX_CACHE_ARRAY wide. NULL the
|
||||||
|
* pointers in tree after we use them - this allows caller to detect
|
||||||
|
* when to free in case of error. */
|
||||||
|
static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
|
||||||
|
struct ocfs2_meta_cache_item **tree)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
|
||||||
|
|
||||||
|
mlog_bug_on_msg(ci->ci_num_cached != OCFS2_INODE_MAX_CACHE_ARRAY,
|
||||||
|
"Inode %"MLFu64", num cached = %u, should be %u\n",
|
||||||
|
oi->ip_blkno, ci->ci_num_cached,
|
||||||
|
OCFS2_INODE_MAX_CACHE_ARRAY);
|
||||||
|
mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
|
||||||
|
"Inode %"MLFu64" not marked as inline anymore!\n",
|
||||||
|
oi->ip_blkno);
|
||||||
|
assert_spin_locked(&oi->ip_lock);
|
||||||
|
|
||||||
|
/* Be careful to initialize the tree members *first* because
|
||||||
|
* once the ci_tree is used, the array is junk... */
|
||||||
|
for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++)
|
||||||
|
tree[i]->c_block = ci->ci_cache.ci_array[i];
|
||||||
|
|
||||||
|
oi->ip_flags &= ~OCFS2_INODE_CACHE_INLINE;
|
||||||
|
ci->ci_cache.ci_tree = RB_ROOT;
|
||||||
|
/* this will be set again by __ocfs2_insert_cache_tree */
|
||||||
|
ci->ci_num_cached = 0;
|
||||||
|
|
||||||
|
for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) {
|
||||||
|
__ocfs2_insert_cache_tree(ci, tree[i]);
|
||||||
|
tree[i] = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
mlog(0, "Expanded %"MLFu64" to a tree cache: flags 0x%x, num = %u\n",
|
||||||
|
oi->ip_blkno, oi->ip_flags, ci->ci_num_cached);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Slow path function - memory allocation is necessary. See the
|
||||||
|
* comment above ocfs2_set_buffer_uptodate for more information. */
|
||||||
|
static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
|
||||||
|
sector_t block,
|
||||||
|
int expand_tree)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
|
||||||
|
struct ocfs2_meta_cache_item *new = NULL;
|
||||||
|
struct ocfs2_meta_cache_item *tree[OCFS2_INODE_MAX_CACHE_ARRAY] =
|
||||||
|
{ NULL, };
|
||||||
|
|
||||||
|
mlog(0, "Inode %"MLFu64", block %llu, expand = %d\n",
|
||||||
|
oi->ip_blkno, (unsigned long long) block, expand_tree);
|
||||||
|
|
||||||
|
new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_KERNEL);
|
||||||
|
if (!new) {
|
||||||
|
mlog_errno(-ENOMEM);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
new->c_block = block;
|
||||||
|
|
||||||
|
if (expand_tree) {
|
||||||
|
/* Do *not* allocate an array here - the removal code
|
||||||
|
* has no way of tracking that. */
|
||||||
|
for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) {
|
||||||
|
tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep,
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!tree[i]) {
|
||||||
|
mlog_errno(-ENOMEM);
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* These are initialized in ocfs2_expand_cache! */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_lock(&oi->ip_lock);
|
||||||
|
if (ocfs2_insert_can_use_array(oi, ci)) {
|
||||||
|
mlog(0, "Someone cleared the tree underneath us\n");
|
||||||
|
/* Ok, items were removed from the cache in between
|
||||||
|
* locks. Detect this and revert back to the fast path */
|
||||||
|
ocfs2_append_cache_array(ci, block);
|
||||||
|
spin_unlock(&oi->ip_lock);
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (expand_tree)
|
||||||
|
ocfs2_expand_cache(oi, tree);
|
||||||
|
|
||||||
|
__ocfs2_insert_cache_tree(ci, new);
|
||||||
|
spin_unlock(&oi->ip_lock);
|
||||||
|
|
||||||
|
new = NULL;
|
||||||
|
out_free:
|
||||||
|
if (new)
|
||||||
|
kmem_cache_free(ocfs2_uptodate_cachep, new);
|
||||||
|
|
||||||
|
/* If these were used, then ocfs2_expand_cache re-set them to
|
||||||
|
* NULL for us. */
|
||||||
|
if (tree[0]) {
|
||||||
|
for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++)
|
||||||
|
if (tree[i])
|
||||||
|
kmem_cache_free(ocfs2_uptodate_cachep,
|
||||||
|
tree[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Item insertion is guarded by ip_io_sem, so the insertion path takes
|
||||||
|
* advantage of this by not rechecking for a duplicate insert during
|
||||||
|
* the slow case. Additionally, if the cache needs to be bumped up to
|
||||||
|
* a tree, the code will not recheck after acquiring the lock --
|
||||||
|
* multiple paths cannot be expanding to a tree at the same time.
|
||||||
|
*
|
||||||
|
* The slow path takes into account that items can be removed
|
||||||
|
* (including the whole tree wiped and reset) when this process it out
|
||||||
|
* allocating memory. In those cases, it reverts back to the fast
|
||||||
|
* path.
|
||||||
|
*
|
||||||
|
* Note that this function may actually fail to insert the block if
|
||||||
|
* memory cannot be allocated. This is not fatal however (but may
|
||||||
|
* result in a performance penalty) */
|
||||||
|
void ocfs2_set_buffer_uptodate(struct inode *inode,
|
||||||
|
struct buffer_head *bh)
|
||||||
|
{
|
||||||
|
int expand;
|
||||||
|
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||||
|
struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
|
||||||
|
|
||||||
|
/* The block may very well exist in our cache already, so avoid
|
||||||
|
* doing any more work in that case. */
|
||||||
|
if (ocfs2_buffer_cached(oi, bh))
|
||||||
|
return;
|
||||||
|
|
||||||
|
mlog(0, "Inode %"MLFu64", inserting block %llu\n", oi->ip_blkno,
|
||||||
|
(unsigned long long) bh->b_blocknr);
|
||||||
|
|
||||||
|
/* No need to recheck under spinlock - insertion is guarded by
|
||||||
|
* ip_io_sem */
|
||||||
|
spin_lock(&oi->ip_lock);
|
||||||
|
if (ocfs2_insert_can_use_array(oi, ci)) {
|
||||||
|
/* Fast case - it's an array and there's a free
|
||||||
|
* spot. */
|
||||||
|
ocfs2_append_cache_array(ci, bh->b_blocknr);
|
||||||
|
spin_unlock(&oi->ip_lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
expand = 0;
|
||||||
|
if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) {
|
||||||
|
/* We need to bump things up to a tree. */
|
||||||
|
expand = 1;
|
||||||
|
}
|
||||||
|
spin_unlock(&oi->ip_lock);
|
||||||
|
|
||||||
|
__ocfs2_set_buffer_uptodate(oi, bh->b_blocknr, expand);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Called against a newly allocated buffer. Most likely nobody should
|
||||||
|
* be able to read this sort of metadata while it's still being
|
||||||
|
* allocated, but this is careful to take ip_io_sem anyway. */
|
||||||
|
void ocfs2_set_new_buffer_uptodate(struct inode *inode,
|
||||||
|
struct buffer_head *bh)
|
||||||
|
{
|
||||||
|
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||||
|
|
||||||
|
/* This should definitely *not* exist in our cache */
|
||||||
|
BUG_ON(ocfs2_buffer_cached(oi, bh));
|
||||||
|
|
||||||
|
set_buffer_uptodate(bh);
|
||||||
|
|
||||||
|
down(&oi->ip_io_sem);
|
||||||
|
ocfs2_set_buffer_uptodate(inode, bh);
|
||||||
|
up(&oi->ip_io_sem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Requires ip_lock. */
|
||||||
|
static void ocfs2_remove_metadata_array(struct ocfs2_caching_info *ci,
|
||||||
|
int index)
|
||||||
|
{
|
||||||
|
sector_t *array = ci->ci_cache.ci_array;
|
||||||
|
int bytes;
|
||||||
|
|
||||||
|
BUG_ON(index < 0 || index >= OCFS2_INODE_MAX_CACHE_ARRAY);
|
||||||
|
BUG_ON(index >= ci->ci_num_cached);
|
||||||
|
BUG_ON(!ci->ci_num_cached);
|
||||||
|
|
||||||
|
mlog(0, "remove index %d (num_cached = %u\n", index,
|
||||||
|
ci->ci_num_cached);
|
||||||
|
|
||||||
|
ci->ci_num_cached--;
|
||||||
|
|
||||||
|
/* don't need to copy if the array is now empty, or if we
|
||||||
|
* removed at the tail */
|
||||||
|
if (ci->ci_num_cached && index < ci->ci_num_cached) {
|
||||||
|
bytes = sizeof(sector_t) * (ci->ci_num_cached - index);
|
||||||
|
memmove(&array[index], &array[index + 1], bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Requires ip_lock. */
|
||||||
|
static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci,
|
||||||
|
struct ocfs2_meta_cache_item *item)
|
||||||
|
{
|
||||||
|
mlog(0, "remove block %llu from tree\n",
|
||||||
|
(unsigned long long) item->c_block);
|
||||||
|
|
||||||
|
rb_erase(&item->c_node, &ci->ci_cache.ci_tree);
|
||||||
|
ci->ci_num_cached--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Called when we remove a chunk of metadata from an inode. We don't
|
||||||
|
* bother reverting things to an inlined array in the case of a remove
|
||||||
|
* which moves us back under the limit. */
|
||||||
|
void ocfs2_remove_from_cache(struct inode *inode,
|
||||||
|
struct buffer_head *bh)
|
||||||
|
{
|
||||||
|
int index;
|
||||||
|
sector_t block = bh->b_blocknr;
|
||||||
|
struct ocfs2_meta_cache_item *item = NULL;
|
||||||
|
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||||
|
struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
|
||||||
|
|
||||||
|
spin_lock(&oi->ip_lock);
|
||||||
|
mlog(0, "Inode %"MLFu64", remove %llu, items = %u, array = %u\n",
|
||||||
|
oi->ip_blkno, (unsigned long long) block, ci->ci_num_cached,
|
||||||
|
oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
|
||||||
|
|
||||||
|
if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) {
|
||||||
|
index = ocfs2_search_cache_array(ci, block);
|
||||||
|
if (index != -1)
|
||||||
|
ocfs2_remove_metadata_array(ci, index);
|
||||||
|
} else {
|
||||||
|
item = ocfs2_search_cache_tree(ci, block);
|
||||||
|
if (item)
|
||||||
|
ocfs2_remove_metadata_tree(ci, item);
|
||||||
|
}
|
||||||
|
spin_unlock(&oi->ip_lock);
|
||||||
|
|
||||||
|
if (item)
|
||||||
|
kmem_cache_free(ocfs2_uptodate_cachep, item);
|
||||||
|
}
|
||||||
|
|
||||||
|
int __init init_ocfs2_uptodate_cache(void)
|
||||||
|
{
|
||||||
|
ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate",
|
||||||
|
sizeof(struct ocfs2_meta_cache_item),
|
||||||
|
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
|
||||||
|
if (!ocfs2_uptodate_cachep)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
mlog(0, "%u inlined cache items per inode.\n",
|
||||||
|
OCFS2_INODE_MAX_CACHE_ARRAY);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void __exit exit_ocfs2_uptodate_cache(void)
|
||||||
|
{
|
||||||
|
if (ocfs2_uptodate_cachep)
|
||||||
|
kmem_cache_destroy(ocfs2_uptodate_cachep);
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* uptodate.h
|
||||||
|
*
|
||||||
|
* Cluster uptodate tracking
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004, 2005 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_UPTODATE_H
|
||||||
|
#define OCFS2_UPTODATE_H
|
||||||
|
|
||||||
|
int __init init_ocfs2_uptodate_cache(void);
|
||||||
|
void __exit exit_ocfs2_uptodate_cache(void);
|
||||||
|
|
||||||
|
void ocfs2_metadata_cache_init(struct inode *inode);
|
||||||
|
void ocfs2_metadata_cache_purge(struct inode *inode);
|
||||||
|
|
||||||
|
int ocfs2_buffer_uptodate(struct inode *inode,
|
||||||
|
struct buffer_head *bh);
|
||||||
|
void ocfs2_set_buffer_uptodate(struct inode *inode,
|
||||||
|
struct buffer_head *bh);
|
||||||
|
void ocfs2_set_new_buffer_uptodate(struct inode *inode,
|
||||||
|
struct buffer_head *bh);
|
||||||
|
void ocfs2_remove_from_cache(struct inode *inode,
|
||||||
|
struct buffer_head *bh);
|
||||||
|
|
||||||
|
#endif /* OCFS2_UPTODATE_H */
|
|
@ -0,0 +1,43 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* ver.c
|
||||||
|
*
|
||||||
|
* version string
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2005 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/string.h>
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
|
||||||
|
#include "ver.h"
|
||||||
|
|
||||||
|
#define OCFS2_BUILD_VERSION "1.3.3"
|
||||||
|
|
||||||
|
#define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION
|
||||||
|
|
||||||
|
void ocfs2_print_version(void)
|
||||||
|
{
|
||||||
|
printk(KERN_INFO "%s\n", VERSION_STR);
|
||||||
|
}
|
||||||
|
|
||||||
|
MODULE_DESCRIPTION(VERSION_STR);
|
||||||
|
|
||||||
|
MODULE_VERSION(OCFS2_BUILD_VERSION);
|
|
@ -0,0 +1,31 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* ver.h
|
||||||
|
*
|
||||||
|
* Function prototypes
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OCFS2_VER_H
|
||||||
|
#define OCFS2_VER_H
|
||||||
|
|
||||||
|
void ocfs2_print_version(void);
|
||||||
|
|
||||||
|
#endif /* OCFS2_VER_H */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,56 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||||
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||||
|
*
|
||||||
|
* vote.h
|
||||||
|
*
|
||||||
|
* description here
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public
|
||||||
|
* License along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
* Boston, MA 021110-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef VOTE_H
|
||||||
|
#define VOTE_H
|
||||||
|
|
||||||
|
int ocfs2_vote_thread(void *arg);
|
||||||
|
static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
spin_lock(&osb->vote_task_lock);
|
||||||
|
/* make sure the voting thread gets a swipe at whatever changes
|
||||||
|
* the caller may have made to the voting state */
|
||||||
|
osb->vote_wake_sequence++;
|
||||||
|
spin_unlock(&osb->vote_task_lock);
|
||||||
|
wake_up(&osb->vote_event);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ocfs2_request_delete_vote(struct inode *inode);
|
||||||
|
int ocfs2_request_unlink_vote(struct inode *inode,
|
||||||
|
struct dentry *dentry,
|
||||||
|
unsigned int nlink);
|
||||||
|
int ocfs2_request_rename_vote(struct inode *inode,
|
||||||
|
struct dentry *dentry);
|
||||||
|
int ocfs2_request_mount_vote(struct ocfs2_super *osb);
|
||||||
|
int ocfs2_request_umount_vote(struct ocfs2_super *osb);
|
||||||
|
int ocfs2_register_net_handlers(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_unregister_net_handlers(struct ocfs2_super *osb);
|
||||||
|
|
||||||
|
void ocfs2_mark_inode_remotely_deleted(struct inode *inode);
|
||||||
|
|
||||||
|
void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb,
|
||||||
|
int node_num);
|
||||||
|
#endif
|
Загрузка…
Ссылка в новой задаче