Merge branch 'akpm' (patches from Andrew)
Merge patch-bomb from Andrew Morton: - a few misc things - Andy's "ambient capabilities" - fs/nofity updates - the ocfs2 queue - kernel/watchdog.c updates and feature work. - some of MM. Includes Andrea's userfaultfd feature. [ Hadn't noticed that userfaultfd was 'default y' when applying the patches, so that got fixed in this merge instead. We do _not_ mark new features that nobody uses yet 'default y' - Linus ] * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (118 commits) mm/hugetlb.c: make vma_has_reserves() return bool mm/madvise.c: make madvise_behaviour_valid() return bool mm/memory.c: make tlb_next_batch() return bool mm/dmapool.c: change is_page_busy() return from int to bool mm: remove struct node_active_region mremap: simplify the "overlap" check in mremap_to() mremap: don't do uneccesary checks if new_len == old_len mremap: don't do mm_populate(new_addr) on failure mm: move ->mremap() from file_operations to vm_operations_struct mremap: don't leak new_vma if f_op->mremap() fails mm/hugetlb.c: make vma_shareable() return bool mm: make GUP handle pfn mapping unless FOLL_GET is requested mm: fix status code which move_pages() returns for zero page mm: memcontrol: bring back the VM_BUG_ON() in mem_cgroup_swapout() genalloc: add support of multiple gen_pools per device genalloc: add name arg to gen_pool_get() and devm_gen_pool_create() mm/memblock: WARN_ON when nid differs from overlap region Documentation/features/vm: add feature description and arch support status for batched TLB flush after unmap mm: defer flush of writable TLB entries mm: send one IPI per CPU to TLB flush all entries after unmapping pages ...
This commit is contained in:
Коммит
6c0f568e84
|
@ -0,0 +1,40 @@
|
|||
#
|
||||
# Feature name: batch-unmap-tlb-flush
|
||||
# Kconfig: ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
# description: arch supports deferral of TLB flush until multiple pages are unmapped
|
||||
#
|
||||
-----------------------
|
||||
| arch |status|
|
||||
-----------------------
|
||||
| alpha: | TODO |
|
||||
| arc: | TODO |
|
||||
| arm: | TODO |
|
||||
| arm64: | TODO |
|
||||
| avr32: | .. |
|
||||
| blackfin: | TODO |
|
||||
| c6x: | .. |
|
||||
| cris: | .. |
|
||||
| frv: | .. |
|
||||
| h8300: | .. |
|
||||
| hexagon: | TODO |
|
||||
| ia64: | TODO |
|
||||
| m32r: | TODO |
|
||||
| m68k: | .. |
|
||||
| metag: | TODO |
|
||||
| microblaze: | .. |
|
||||
| mips: | TODO |
|
||||
| mn10300: | TODO |
|
||||
| nios2: | .. |
|
||||
| openrisc: | .. |
|
||||
| parisc: | TODO |
|
||||
| powerpc: | TODO |
|
||||
| s390: | TODO |
|
||||
| score: | .. |
|
||||
| sh: | TODO |
|
||||
| sparc: | TODO |
|
||||
| tile: | TODO |
|
||||
| um: | .. |
|
||||
| unicore32: | .. |
|
||||
| x86: | ok |
|
||||
| xtensa: | TODO |
|
||||
-----------------------
|
|
@ -303,6 +303,7 @@ Code Seq#(hex) Include File Comments
|
|||
0xA3 80-8F Port ACL in development:
|
||||
<mailto:tlewis@mindspring.com>
|
||||
0xA3 90-9F linux/dtlk.h
|
||||
0xAA 00-3F linux/uapi/linux/userfaultfd.h
|
||||
0xAB 00-1F linux/nbd.h
|
||||
0xAC 00-1F linux/raw.h
|
||||
0xAD 00 Netfilter device in development:
|
||||
|
|
|
@ -0,0 +1,144 @@
|
|||
= Userfaultfd =
|
||||
|
||||
== Objective ==
|
||||
|
||||
Userfaults allow the implementation of on-demand paging from userland
|
||||
and more generally they allow userland to take control of various
|
||||
memory page faults, something otherwise only the kernel code could do.
|
||||
|
||||
For example userfaults allows a proper and more optimal implementation
|
||||
of the PROT_NONE+SIGSEGV trick.
|
||||
|
||||
== Design ==
|
||||
|
||||
Userfaults are delivered and resolved through the userfaultfd syscall.
|
||||
|
||||
The userfaultfd (aside from registering and unregistering virtual
|
||||
memory ranges) provides two primary functionalities:
|
||||
|
||||
1) read/POLLIN protocol to notify a userland thread of the faults
|
||||
happening
|
||||
|
||||
2) various UFFDIO_* ioctls that can manage the virtual memory regions
|
||||
registered in the userfaultfd that allows userland to efficiently
|
||||
resolve the userfaults it receives via 1) or to manage the virtual
|
||||
memory in the background
|
||||
|
||||
The real advantage of userfaults if compared to regular virtual memory
|
||||
management of mremap/mprotect is that the userfaults in all their
|
||||
operations never involve heavyweight structures like vmas (in fact the
|
||||
userfaultfd runtime load never takes the mmap_sem for writing).
|
||||
|
||||
Vmas are not suitable for page- (or hugepage) granular fault tracking
|
||||
when dealing with virtual address spaces that could span
|
||||
Terabytes. Too many vmas would be needed for that.
|
||||
|
||||
The userfaultfd once opened by invoking the syscall, can also be
|
||||
passed using unix domain sockets to a manager process, so the same
|
||||
manager process could handle the userfaults of a multitude of
|
||||
different processes without them being aware about what is going on
|
||||
(well of course unless they later try to use the userfaultfd
|
||||
themselves on the same region the manager is already tracking, which
|
||||
is a corner case that would currently return -EBUSY).
|
||||
|
||||
== API ==
|
||||
|
||||
When first opened the userfaultfd must be enabled invoking the
|
||||
UFFDIO_API ioctl specifying a uffdio_api.api value set to UFFD_API (or
|
||||
a later API version) which will specify the read/POLLIN protocol
|
||||
userland intends to speak on the UFFD and the uffdio_api.features
|
||||
userland requires. The UFFDIO_API ioctl if successful (i.e. if the
|
||||
requested uffdio_api.api is spoken also by the running kernel and the
|
||||
requested features are going to be enabled) will return into
|
||||
uffdio_api.features and uffdio_api.ioctls two 64bit bitmasks of
|
||||
respectively all the available features of the read(2) protocol and
|
||||
the generic ioctl available.
|
||||
|
||||
Once the userfaultfd has been enabled the UFFDIO_REGISTER ioctl should
|
||||
be invoked (if present in the returned uffdio_api.ioctls bitmask) to
|
||||
register a memory range in the userfaultfd by setting the
|
||||
uffdio_register structure accordingly. The uffdio_register.mode
|
||||
bitmask will specify to the kernel which kind of faults to track for
|
||||
the range (UFFDIO_REGISTER_MODE_MISSING would track missing
|
||||
pages). The UFFDIO_REGISTER ioctl will return the
|
||||
uffdio_register.ioctls bitmask of ioctls that are suitable to resolve
|
||||
userfaults on the range registered. Not all ioctls will necessarily be
|
||||
supported for all memory types depending on the underlying virtual
|
||||
memory backend (anonymous memory vs tmpfs vs real filebacked
|
||||
mappings).
|
||||
|
||||
Userland can use the uffdio_register.ioctls to manage the virtual
|
||||
address space in the background (to add or potentially also remove
|
||||
memory from the userfaultfd registered range). This means a userfault
|
||||
could be triggering just before userland maps in the background the
|
||||
user-faulted page.
|
||||
|
||||
The primary ioctl to resolve userfaults is UFFDIO_COPY. That
|
||||
atomically copies a page into the userfault registered range and wakes
|
||||
up the blocked userfaults (unless uffdio_copy.mode &
|
||||
UFFDIO_COPY_MODE_DONTWAKE is set). Other ioctl works similarly to
|
||||
UFFDIO_COPY. They're atomic as in guaranteeing that nothing can see an
|
||||
half copied page since it'll keep userfaulting until the copy has
|
||||
finished.
|
||||
|
||||
== QEMU/KVM ==
|
||||
|
||||
QEMU/KVM is using the userfaultfd syscall to implement postcopy live
|
||||
migration. Postcopy live migration is one form of memory
|
||||
externalization consisting of a virtual machine running with part or
|
||||
all of its memory residing on a different node in the cloud. The
|
||||
userfaultfd abstraction is generic enough that not a single line of
|
||||
KVM kernel code had to be modified in order to add postcopy live
|
||||
migration to QEMU.
|
||||
|
||||
Guest async page faults, FOLL_NOWAIT and all other GUP features work
|
||||
just fine in combination with userfaults. Userfaults trigger async
|
||||
page faults in the guest scheduler so those guest processes that
|
||||
aren't waiting for userfaults (i.e. network bound) can keep running in
|
||||
the guest vcpus.
|
||||
|
||||
It is generally beneficial to run one pass of precopy live migration
|
||||
just before starting postcopy live migration, in order to avoid
|
||||
generating userfaults for readonly guest regions.
|
||||
|
||||
The implementation of postcopy live migration currently uses one
|
||||
single bidirectional socket but in the future two different sockets
|
||||
will be used (to reduce the latency of the userfaults to the minimum
|
||||
possible without having to decrease /proc/sys/net/ipv4/tcp_wmem).
|
||||
|
||||
The QEMU in the source node writes all pages that it knows are missing
|
||||
in the destination node, into the socket, and the migration thread of
|
||||
the QEMU running in the destination node runs UFFDIO_COPY|ZEROPAGE
|
||||
ioctls on the userfaultfd in order to map the received pages into the
|
||||
guest (UFFDIO_ZEROCOPY is used if the source page was a zero page).
|
||||
|
||||
A different postcopy thread in the destination node listens with
|
||||
poll() to the userfaultfd in parallel. When a POLLIN event is
|
||||
generated after a userfault triggers, the postcopy thread read() from
|
||||
the userfaultfd and receives the fault address (or -EAGAIN in case the
|
||||
userfault was already resolved and waken by a UFFDIO_COPY|ZEROPAGE run
|
||||
by the parallel QEMU migration thread).
|
||||
|
||||
After the QEMU postcopy thread (running in the destination node) gets
|
||||
the userfault address it writes the information about the missing page
|
||||
into the socket. The QEMU source node receives the information and
|
||||
roughly "seeks" to that page address and continues sending all
|
||||
remaining missing pages from that new page offset. Soon after that
|
||||
(just the time to flush the tcp_wmem queue through the network) the
|
||||
migration thread in the QEMU running in the destination node will
|
||||
receive the page that triggered the userfault and it'll map it as
|
||||
usual with the UFFDIO_COPY|ZEROPAGE (without actually knowing if it
|
||||
was spontaneously sent by the source or if it was an urgent page
|
||||
requested through an userfault).
|
||||
|
||||
By the time the userfaults start, the QEMU in the destination node
|
||||
doesn't need to keep any per-page state bitmap relative to the live
|
||||
migration around and a single per-page bitmap has to be maintained in
|
||||
the QEMU running in the source node to know which pages are still
|
||||
missing in the destination node. The bitmap in the source node is
|
||||
checked to find which missing pages to send in round robin and we seek
|
||||
over it when receiving incoming userfaults. After sending each page of
|
||||
course the bitmap is updated accordingly. It's also useful to avoid
|
||||
sending the same page twice (in case the userfault is read by the
|
||||
postcopy thread just before UFFDIO_COPY|ZEROPAGE runs in the migration
|
||||
thread).
|
|
@ -369,7 +369,7 @@ static void __init at91_pm_sram_init(void)
|
|||
return;
|
||||
}
|
||||
|
||||
sram_pool = gen_pool_get(&pdev->dev);
|
||||
sram_pool = gen_pool_get(&pdev->dev, NULL);
|
||||
if (!sram_pool) {
|
||||
pr_warn("%s: sram pool unavailable!\n", __func__);
|
||||
return;
|
||||
|
|
|
@ -297,7 +297,7 @@ static int __init imx_suspend_alloc_ocram(
|
|||
goto put_node;
|
||||
}
|
||||
|
||||
ocram_pool = gen_pool_get(&pdev->dev);
|
||||
ocram_pool = gen_pool_get(&pdev->dev, NULL);
|
||||
if (!ocram_pool) {
|
||||
pr_warn("%s: ocram pool unavailable!\n", __func__);
|
||||
ret = -ENODEV;
|
||||
|
|
|
@ -451,7 +451,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata)
|
|||
goto put_node;
|
||||
}
|
||||
|
||||
ocram_pool = gen_pool_get(&pdev->dev);
|
||||
ocram_pool = gen_pool_get(&pdev->dev, NULL);
|
||||
if (!ocram_pool) {
|
||||
pr_warn("%s: ocram pool unavailable!\n", __func__);
|
||||
ret = -ENODEV;
|
||||
|
|
|
@ -56,7 +56,7 @@ static int socfpga_setup_ocram_self_refresh(void)
|
|||
goto put_node;
|
||||
}
|
||||
|
||||
ocram_pool = gen_pool_get(&pdev->dev);
|
||||
ocram_pool = gen_pool_get(&pdev->dev, NULL);
|
||||
if (!ocram_pool) {
|
||||
pr_warn("%s: ocram pool unavailable!\n", __func__);
|
||||
ret = -ENODEV;
|
||||
|
|
|
@ -488,7 +488,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
|
|||
int arch_add_memory(int nid, u64 start, u64 size)
|
||||
{
|
||||
pg_data_t *pgdat;
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long start_pfn = PFN_DOWN(start);
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
int ret;
|
||||
|
||||
|
@ -517,7 +517,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
|
|||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
int arch_remove_memory(u64 start, u64 size)
|
||||
{
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long start_pfn = PFN_DOWN(start);
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
struct zone *zone;
|
||||
int ret;
|
||||
|
|
|
@ -33,8 +33,8 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
|
|||
/* Don't allow bogus node assignment */
|
||||
BUG_ON(nid >= MAX_NUMNODES || nid <= 0);
|
||||
|
||||
start_pfn = start >> PAGE_SHIFT;
|
||||
end_pfn = end >> PAGE_SHIFT;
|
||||
start_pfn = PFN_DOWN(start);
|
||||
end_pfn = PFN_DOWN(end);
|
||||
|
||||
pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
|
||||
PAGE_KERNEL);
|
||||
|
|
|
@ -41,6 +41,7 @@ config X86
|
|||
select ARCH_USE_CMPXCHG_LOCKREF if X86_64
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
|
||||
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
||||
select ARCH_WANT_FRAME_POINTERS
|
||||
select ARCH_WANT_IPC_PARSE_VERSION if X86_32
|
||||
|
|
|
@ -380,3 +380,4 @@
|
|||
371 i386 recvfrom sys_recvfrom compat_sys_recvfrom
|
||||
372 i386 recvmsg sys_recvmsg compat_sys_recvmsg
|
||||
373 i386 shutdown sys_shutdown
|
||||
374 i386 userfaultfd sys_userfaultfd
|
||||
|
|
|
@ -329,6 +329,7 @@
|
|||
320 common kexec_file_load sys_kexec_file_load
|
||||
321 common bpf sys_bpf
|
||||
322 64 execveat stub_execveat
|
||||
323 common userfaultfd sys_userfaultfd
|
||||
|
||||
#
|
||||
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||
|
|
|
@ -261,6 +261,12 @@ static inline void reset_lazy_tlbstate(void)
|
|||
|
||||
#endif /* SMP */
|
||||
|
||||
/* Not inlined due to inc_irq_stat not being defined yet */
|
||||
#define flush_tlb_local() { \
|
||||
inc_irq_stat(irq_tlb_count); \
|
||||
local_flush_tlb(); \
|
||||
}
|
||||
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
#define flush_tlb_others(mask, mm, start, end) \
|
||||
native_flush_tlb_others(mask, mm, start, end)
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/watchdog.h>
|
||||
#include <linux/nmi.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/hardirq.h>
|
||||
|
@ -3627,7 +3627,10 @@ static __init int fixup_ht_bug(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
watchdog_nmi_disable_all();
|
||||
if (lockup_detector_suspend() != 0) {
|
||||
pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
|
||||
|
||||
|
@ -3635,7 +3638,7 @@ static __init int fixup_ht_bug(void)
|
|||
x86_pmu.commit_scheduling = NULL;
|
||||
x86_pmu.stop_scheduling = NULL;
|
||||
|
||||
watchdog_nmi_enable_all();
|
||||
lockup_detector_resume();
|
||||
|
||||
get_online_cpus();
|
||||
|
||||
|
|
|
@ -140,6 +140,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
|
|||
info.flush_end = end;
|
||||
|
||||
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
|
||||
trace_tlb_flush(TLB_REMOTE_SEND_IPI, end - start);
|
||||
if (is_uv_system()) {
|
||||
unsigned int cpu;
|
||||
|
||||
|
|
|
@ -392,6 +392,16 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
|
|||
for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
|
||||
int page_nid;
|
||||
|
||||
/*
|
||||
* memory block could have several absent sections from start.
|
||||
* skip pfn range from absent section
|
||||
*/
|
||||
if (!pfn_present(pfn)) {
|
||||
pfn = round_down(pfn + PAGES_PER_SECTION,
|
||||
PAGES_PER_SECTION) - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
page_nid = get_nid_for_pfn(pfn);
|
||||
if (page_nid < 0)
|
||||
continue;
|
||||
|
|
|
@ -2157,7 +2157,7 @@ static int coda_probe(struct platform_device *pdev)
|
|||
/* Get IRAM pool from device tree or platform data */
|
||||
pool = of_gen_pool_get(np, "iram", 0);
|
||||
if (!pool && pdata)
|
||||
pool = gen_pool_get(pdata->iram_dev);
|
||||
pool = gen_pool_get(pdata->iram_dev, NULL);
|
||||
if (!pool) {
|
||||
dev_err(&pdev->dev, "iram pool not available\n");
|
||||
return -ENOMEM;
|
||||
|
|
|
@ -186,10 +186,10 @@ static int sram_probe(struct platform_device *pdev)
|
|||
if (IS_ERR(sram->virt_base))
|
||||
return PTR_ERR(sram->virt_base);
|
||||
|
||||
sram->pool = devm_gen_pool_create(sram->dev,
|
||||
ilog2(SRAM_GRANULARITY), -1);
|
||||
if (!sram->pool)
|
||||
return -ENOMEM;
|
||||
sram->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
|
||||
NUMA_NO_NODE, NULL);
|
||||
if (IS_ERR(sram->pool))
|
||||
return PTR_ERR(sram->pool);
|
||||
|
||||
ret = sram_reserve_regions(sram, res);
|
||||
if (ret)
|
||||
|
|
|
@ -9,7 +9,7 @@ config VGA_CONSOLE
|
|||
depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && \
|
||||
!SUPERH && !BLACKFIN && !AVR32 && !MN10300 && !CRIS && \
|
||||
(!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && \
|
||||
!ARM64
|
||||
!ARM64 && !ARC
|
||||
default y
|
||||
help
|
||||
Saying Y here will allow you to use Linux in text mode through a
|
||||
|
|
|
@ -27,6 +27,7 @@ obj-$(CONFIG_ANON_INODES) += anon_inodes.o
|
|||
obj-$(CONFIG_SIGNALFD) += signalfd.o
|
||||
obj-$(CONFIG_TIMERFD) += timerfd.o
|
||||
obj-$(CONFIG_EVENTFD) += eventfd.o
|
||||
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
|
||||
obj-$(CONFIG_AIO) += aio.o
|
||||
obj-$(CONFIG_FS_DAX) += dax.o
|
||||
obj-$(CONFIG_FILE_LOCKING) += locks.o
|
||||
|
|
27
fs/aio.c
27
fs/aio.c
|
@ -308,15 +308,9 @@ static void aio_free_ring(struct kioctx *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
vma->vm_flags |= VM_DONTEXPAND;
|
||||
vma->vm_ops = &generic_file_vm_ops;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
|
||||
static int aio_ring_mremap(struct vm_area_struct *vma)
|
||||
{
|
||||
struct file *file = vma->vm_file;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct kioctx_table *table;
|
||||
int i, res = -EINVAL;
|
||||
|
@ -342,9 +336,24 @@ static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
|
|||
return res;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct aio_ring_vm_ops = {
|
||||
.mremap = aio_ring_mremap,
|
||||
#if IS_ENABLED(CONFIG_MMU)
|
||||
.fault = filemap_fault,
|
||||
.map_pages = filemap_map_pages,
|
||||
.page_mkwrite = filemap_page_mkwrite,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
vma->vm_flags |= VM_DONTEXPAND;
|
||||
vma->vm_ops = &aio_ring_vm_ops;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations aio_ring_fops = {
|
||||
.mmap = aio_ring_mmap,
|
||||
.mremap = aio_ring_remap,
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_MIGRATION)
|
||||
|
|
|
@ -479,7 +479,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
|
|||
if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
|
||||
seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
|
||||
if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
|
||||
seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
|
||||
seq_show_option(m, "snapdirname", fsopt->snapdir_name);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -394,17 +394,17 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
|
|||
struct sockaddr *srcaddr;
|
||||
srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
|
||||
|
||||
seq_printf(s, ",vers=%s", tcon->ses->server->vals->version_string);
|
||||
seq_show_option(s, "vers", tcon->ses->server->vals->version_string);
|
||||
cifs_show_security(s, tcon->ses);
|
||||
cifs_show_cache_flavor(s, cifs_sb);
|
||||
|
||||
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
|
||||
seq_puts(s, ",multiuser");
|
||||
else if (tcon->ses->user_name)
|
||||
seq_printf(s, ",username=%s", tcon->ses->user_name);
|
||||
seq_show_option(s, "username", tcon->ses->user_name);
|
||||
|
||||
if (tcon->ses->domainName)
|
||||
seq_printf(s, ",domain=%s", tcon->ses->domainName);
|
||||
seq_show_option(s, "domain", tcon->ses->domainName);
|
||||
|
||||
if (srcaddr->sa_family != AF_UNSPEC) {
|
||||
struct sockaddr_in *saddr4;
|
||||
|
|
|
@ -1776,10 +1776,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
|
|||
}
|
||||
|
||||
if (sbi->s_qf_names[USRQUOTA])
|
||||
seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
|
||||
seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
|
||||
|
||||
if (sbi->s_qf_names[GRPQUOTA])
|
||||
seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
|
||||
seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -1334,11 +1334,11 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
|
|||
if (is_ancestor(root, sdp->sd_master_dir))
|
||||
seq_puts(s, ",meta");
|
||||
if (args->ar_lockproto[0])
|
||||
seq_printf(s, ",lockproto=%s", args->ar_lockproto);
|
||||
seq_show_option(s, "lockproto", args->ar_lockproto);
|
||||
if (args->ar_locktable[0])
|
||||
seq_printf(s, ",locktable=%s", args->ar_locktable);
|
||||
seq_show_option(s, "locktable", args->ar_locktable);
|
||||
if (args->ar_hostdata[0])
|
||||
seq_printf(s, ",hostdata=%s", args->ar_hostdata);
|
||||
seq_show_option(s, "hostdata", args->ar_hostdata);
|
||||
if (args->ar_spectator)
|
||||
seq_puts(s, ",spectator");
|
||||
if (args->ar_localflocks)
|
||||
|
|
|
@ -136,9 +136,9 @@ static int hfs_show_options(struct seq_file *seq, struct dentry *root)
|
|||
struct hfs_sb_info *sbi = HFS_SB(root->d_sb);
|
||||
|
||||
if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f))
|
||||
seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator);
|
||||
seq_show_option_n(seq, "creator", (char *)&sbi->s_creator, 4);
|
||||
if (sbi->s_type != cpu_to_be32(0x3f3f3f3f))
|
||||
seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type);
|
||||
seq_show_option_n(seq, "type", (char *)&sbi->s_type, 4);
|
||||
seq_printf(seq, ",uid=%u,gid=%u",
|
||||
from_kuid_munged(&init_user_ns, sbi->s_uid),
|
||||
from_kgid_munged(&init_user_ns, sbi->s_gid));
|
||||
|
|
|
@ -218,9 +218,9 @@ int hfsplus_show_options(struct seq_file *seq, struct dentry *root)
|
|||
struct hfsplus_sb_info *sbi = HFSPLUS_SB(root->d_sb);
|
||||
|
||||
if (sbi->creator != HFSPLUS_DEF_CR_TYPE)
|
||||
seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
|
||||
seq_show_option_n(seq, "creator", (char *)&sbi->creator, 4);
|
||||
if (sbi->type != HFSPLUS_DEF_CR_TYPE)
|
||||
seq_printf(seq, ",type=%.4s", (char *)&sbi->type);
|
||||
seq_show_option_n(seq, "type", (char *)&sbi->type, 4);
|
||||
seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask,
|
||||
from_kuid_munged(&init_user_ns, sbi->uid),
|
||||
from_kgid_munged(&init_user_ns, sbi->gid));
|
||||
|
|
|
@ -260,7 +260,7 @@ static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
|
|||
size_t offset = strlen(root_ino) + 1;
|
||||
|
||||
if (strlen(root_path) > offset)
|
||||
seq_printf(seq, ",%s", root_path + offset);
|
||||
seq_show_option(seq, root_path + offset, NULL);
|
||||
|
||||
if (append)
|
||||
seq_puts(seq, ",append");
|
||||
|
|
|
@ -154,6 +154,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
|
|||
struct dnotify_struct *dn;
|
||||
struct dnotify_struct **prev;
|
||||
struct inode *inode;
|
||||
bool free = false;
|
||||
|
||||
inode = file_inode(filp);
|
||||
if (!S_ISDIR(inode->i_mode))
|
||||
|
@ -182,11 +183,15 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
|
|||
|
||||
/* nothing else could have found us thanks to the dnotify_groups
|
||||
mark_mutex */
|
||||
if (dn_mark->dn == NULL)
|
||||
fsnotify_destroy_mark_locked(fsn_mark, dnotify_group);
|
||||
if (dn_mark->dn == NULL) {
|
||||
fsnotify_detach_mark(fsn_mark);
|
||||
free = true;
|
||||
}
|
||||
|
||||
mutex_unlock(&dnotify_group->mark_mutex);
|
||||
|
||||
if (free)
|
||||
fsnotify_free_mark(fsn_mark);
|
||||
fsnotify_put_mark(fsn_mark);
|
||||
}
|
||||
|
||||
|
@ -362,9 +367,10 @@ out:
|
|||
spin_unlock(&fsn_mark->lock);
|
||||
|
||||
if (destroy)
|
||||
fsnotify_destroy_mark_locked(fsn_mark, dnotify_group);
|
||||
|
||||
fsnotify_detach_mark(fsn_mark);
|
||||
mutex_unlock(&dnotify_group->mark_mutex);
|
||||
if (destroy)
|
||||
fsnotify_free_mark(fsn_mark);
|
||||
fsnotify_put_mark(fsn_mark);
|
||||
out_err:
|
||||
if (new_fsn_mark)
|
||||
|
|
|
@ -529,8 +529,10 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
|
|||
removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
|
||||
&destroy_mark);
|
||||
if (destroy_mark)
|
||||
fsnotify_destroy_mark_locked(fsn_mark, group);
|
||||
fsnotify_detach_mark(fsn_mark);
|
||||
mutex_unlock(&group->mark_mutex);
|
||||
if (destroy_mark)
|
||||
fsnotify_free_mark(fsn_mark);
|
||||
|
||||
fsnotify_put_mark(fsn_mark);
|
||||
if (removed & real_mount(mnt)->mnt_fsnotify_mask)
|
||||
|
@ -557,8 +559,10 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
|
|||
removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
|
||||
&destroy_mark);
|
||||
if (destroy_mark)
|
||||
fsnotify_destroy_mark_locked(fsn_mark, group);
|
||||
fsnotify_detach_mark(fsn_mark);
|
||||
mutex_unlock(&group->mark_mutex);
|
||||
if (destroy_mark)
|
||||
fsnotify_free_mark(fsn_mark);
|
||||
|
||||
/* matches the fsnotify_find_inode_mark() */
|
||||
fsnotify_put_mark(fsn_mark);
|
||||
|
|
|
@ -76,7 +76,8 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
|
|||
struct inotify_inode_mark *inode_mark;
|
||||
struct inode *inode;
|
||||
|
||||
if (!(mark->flags & (FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_INODE)))
|
||||
if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE) ||
|
||||
!(mark->flags & FSNOTIFY_MARK_FLAG_INODE))
|
||||
return;
|
||||
|
||||
inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
|
||||
#include <linux/fsnotify_backend.h>
|
||||
#include "fsnotify.h"
|
||||
#include "../mount.h"
|
||||
|
||||
/*
|
||||
* Clear all of the marks on an inode when it is being evicted from core
|
||||
|
@ -204,6 +203,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
|
|||
else
|
||||
mnt = NULL;
|
||||
|
||||
/*
|
||||
* Optimization: srcu_read_lock() has a memory barrier which can
|
||||
* be expensive. It protects walking the *_fsnotify_marks lists.
|
||||
* However, if we do not walk the lists, we do not have to do
|
||||
* SRCU because we have no references to any objects and do not
|
||||
* need SRCU to keep them "alive".
|
||||
*/
|
||||
if (hlist_empty(&to_tell->i_fsnotify_marks) &&
|
||||
(!mnt || hlist_empty(&mnt->mnt_fsnotify_marks)))
|
||||
return 0;
|
||||
/*
|
||||
* if this is a modify event we may need to clear the ignored masks
|
||||
* otherwise return if neither the inode nor the vfsmount care about
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include <linux/srcu.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "../mount.h"
|
||||
|
||||
/* destroy all events sitting in this groups notification queue */
|
||||
extern void fsnotify_flush_notify(struct fsnotify_group *group);
|
||||
|
||||
|
@ -38,15 +40,22 @@ extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
|
|||
extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark);
|
||||
/* inode specific destruction of a mark */
|
||||
extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
|
||||
/* Destroy all marks in the given list */
|
||||
extern void fsnotify_destroy_marks(struct list_head *to_free);
|
||||
/* Find mark belonging to given group in the list of marks */
|
||||
extern struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head,
|
||||
struct fsnotify_group *group);
|
||||
/* run the list of all marks associated with inode and flag them to be freed */
|
||||
extern void fsnotify_clear_marks_by_inode(struct inode *inode);
|
||||
/* run the list of all marks associated with vfsmount and flag them to be freed */
|
||||
extern void fsnotify_clear_marks_by_mount(struct vfsmount *mnt);
|
||||
/* Destroy all marks in the given list protected by 'lock' */
|
||||
extern void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock);
|
||||
/* run the list of all marks associated with inode and destroy them */
|
||||
static inline void fsnotify_clear_marks_by_inode(struct inode *inode)
|
||||
{
|
||||
fsnotify_destroy_marks(&inode->i_fsnotify_marks, &inode->i_lock);
|
||||
}
|
||||
/* run the list of all marks associated with vfsmount and destroy them */
|
||||
static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
|
||||
{
|
||||
fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks,
|
||||
&mnt->mnt_root->d_lock);
|
||||
}
|
||||
/*
|
||||
* update the dentry->d_flags of all of inode's children to indicate if inode cares
|
||||
* about events that happen to its children.
|
||||
|
|
|
@ -64,26 +64,6 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
|
|||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given an inode, destroy all of the marks associated with that inode.
|
||||
*/
|
||||
void fsnotify_clear_marks_by_inode(struct inode *inode)
|
||||
{
|
||||
struct fsnotify_mark *mark;
|
||||
struct hlist_node *n;
|
||||
LIST_HEAD(free_list);
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, obj_list) {
|
||||
list_add(&mark->free_list, &free_list);
|
||||
hlist_del_init_rcu(&mark->obj_list);
|
||||
fsnotify_get_mark(mark);
|
||||
}
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
fsnotify_destroy_marks(&free_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a group clear all of the inode marks associated with that group.
|
||||
*/
|
||||
|
|
113
fs/notify/mark.c
113
fs/notify/mark.c
|
@ -122,26 +122,27 @@ u32 fsnotify_recalc_mask(struct hlist_head *head)
|
|||
}
|
||||
|
||||
/*
|
||||
* Any time a mark is getting freed we end up here.
|
||||
* The caller had better be holding a reference to this mark so we don't actually
|
||||
* do the final put under the mark->lock
|
||||
* Remove mark from inode / vfsmount list, group list, drop inode reference
|
||||
* if we got one.
|
||||
*
|
||||
* Must be called with group->mark_mutex held.
|
||||
*/
|
||||
void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
|
||||
struct fsnotify_group *group)
|
||||
void fsnotify_detach_mark(struct fsnotify_mark *mark)
|
||||
{
|
||||
struct inode *inode = NULL;
|
||||
struct fsnotify_group *group = mark->group;
|
||||
|
||||
BUG_ON(!mutex_is_locked(&group->mark_mutex));
|
||||
|
||||
spin_lock(&mark->lock);
|
||||
|
||||
/* something else already called this function on this mark */
|
||||
if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
|
||||
if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
|
||||
spin_unlock(&mark->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
|
||||
mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
|
||||
|
||||
if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
|
||||
inode = mark->inode;
|
||||
|
@ -150,6 +151,12 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
|
|||
fsnotify_destroy_vfsmount_mark(mark);
|
||||
else
|
||||
BUG();
|
||||
/*
|
||||
* Note that we didn't update flags telling whether inode cares about
|
||||
* what's happening with children. We update these flags from
|
||||
* __fsnotify_parent() lazily when next event happens on one of our
|
||||
* children.
|
||||
*/
|
||||
|
||||
list_del_init(&mark->g_list);
|
||||
|
||||
|
@ -157,18 +164,32 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
|
|||
|
||||
if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
|
||||
iput(inode);
|
||||
/* release lock temporarily */
|
||||
mutex_unlock(&group->mark_mutex);
|
||||
|
||||
atomic_dec(&group->num_marks);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free fsnotify mark. The freeing is actually happening from a kthread which
|
||||
* first waits for srcu period end. Caller must have a reference to the mark
|
||||
* or be protected by fsnotify_mark_srcu.
|
||||
*/
|
||||
void fsnotify_free_mark(struct fsnotify_mark *mark)
|
||||
{
|
||||
struct fsnotify_group *group = mark->group;
|
||||
|
||||
spin_lock(&mark->lock);
|
||||
/* something else already called this function on this mark */
|
||||
if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
|
||||
spin_unlock(&mark->lock);
|
||||
return;
|
||||
}
|
||||
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
|
||||
spin_unlock(&mark->lock);
|
||||
|
||||
spin_lock(&destroy_lock);
|
||||
list_add(&mark->g_list, &destroy_list);
|
||||
spin_unlock(&destroy_lock);
|
||||
wake_up(&destroy_waitq);
|
||||
/*
|
||||
* We don't necessarily have a ref on mark from caller so the above destroy
|
||||
* may have actually freed it, unless this group provides a 'freeing_mark'
|
||||
* function which must be holding a reference.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Some groups like to know that marks are being freed. This is a
|
||||
|
@ -177,50 +198,45 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
|
|||
*/
|
||||
if (group->ops->freeing_mark)
|
||||
group->ops->freeing_mark(mark, group);
|
||||
|
||||
/*
|
||||
* __fsnotify_update_child_dentry_flags(inode);
|
||||
*
|
||||
* I really want to call that, but we can't, we have no idea if the inode
|
||||
* still exists the second we drop the mark->lock.
|
||||
*
|
||||
* The next time an event arrive to this inode from one of it's children
|
||||
* __fsnotify_parent will see that the inode doesn't care about it's
|
||||
* children and will update all of these flags then. So really this
|
||||
* is just a lazy update (and could be a perf win...)
|
||||
*/
|
||||
|
||||
atomic_dec(&group->num_marks);
|
||||
|
||||
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
|
||||
}
|
||||
|
||||
void fsnotify_destroy_mark(struct fsnotify_mark *mark,
|
||||
struct fsnotify_group *group)
|
||||
{
|
||||
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
|
||||
fsnotify_destroy_mark_locked(mark, group);
|
||||
fsnotify_detach_mark(mark);
|
||||
mutex_unlock(&group->mark_mutex);
|
||||
fsnotify_free_mark(mark);
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy all marks in the given list. The marks must be already detached from
|
||||
* the original inode / vfsmount.
|
||||
*/
|
||||
void fsnotify_destroy_marks(struct list_head *to_free)
|
||||
void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
|
||||
{
|
||||
struct fsnotify_mark *mark, *lmark;
|
||||
struct fsnotify_group *group;
|
||||
struct fsnotify_mark *mark;
|
||||
|
||||
list_for_each_entry_safe(mark, lmark, to_free, free_list) {
|
||||
spin_lock(&mark->lock);
|
||||
fsnotify_get_group(mark->group);
|
||||
group = mark->group;
|
||||
spin_unlock(&mark->lock);
|
||||
|
||||
fsnotify_destroy_mark(mark, group);
|
||||
while (1) {
|
||||
/*
|
||||
* We have to be careful since we can race with e.g.
|
||||
* fsnotify_clear_marks_by_group() and once we drop 'lock',
|
||||
* mark can get removed from the obj_list and destroyed. But
|
||||
* we are holding mark reference so mark cannot be freed and
|
||||
* calling fsnotify_destroy_mark() more than once is fine.
|
||||
*/
|
||||
spin_lock(lock);
|
||||
if (hlist_empty(head)) {
|
||||
spin_unlock(lock);
|
||||
break;
|
||||
}
|
||||
mark = hlist_entry(head->first, struct fsnotify_mark, obj_list);
|
||||
/*
|
||||
* We don't update i_fsnotify_mask / mnt_fsnotify_mask here
|
||||
* since inode / mount is going away anyway. So just remove
|
||||
* mark from the list.
|
||||
*/
|
||||
hlist_del_init_rcu(&mark->obj_list);
|
||||
fsnotify_get_mark(mark);
|
||||
spin_unlock(lock);
|
||||
fsnotify_destroy_mark(mark, mark->group);
|
||||
fsnotify_put_mark(mark);
|
||||
fsnotify_put_group(group);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -332,7 +348,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
|
|||
* inode->i_lock
|
||||
*/
|
||||
spin_lock(&mark->lock);
|
||||
mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
|
||||
mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED;
|
||||
|
||||
fsnotify_get_group(group);
|
||||
mark->group = group;
|
||||
|
@ -438,8 +454,9 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
|
|||
}
|
||||
mark = list_first_entry(&to_free, struct fsnotify_mark, g_list);
|
||||
fsnotify_get_mark(mark);
|
||||
fsnotify_destroy_mark_locked(mark, group);
|
||||
fsnotify_detach_mark(mark);
|
||||
mutex_unlock(&group->mark_mutex);
|
||||
fsnotify_free_mark(mark);
|
||||
fsnotify_put_mark(mark);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,25 +28,6 @@
|
|||
|
||||
#include <linux/fsnotify_backend.h>
|
||||
#include "fsnotify.h"
|
||||
#include "../mount.h"
|
||||
|
||||
void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
|
||||
{
|
||||
struct fsnotify_mark *mark;
|
||||
struct hlist_node *n;
|
||||
struct mount *m = real_mount(mnt);
|
||||
LIST_HEAD(free_list);
|
||||
|
||||
spin_lock(&mnt->mnt_root->d_lock);
|
||||
hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, obj_list) {
|
||||
list_add(&mark->free_list, &free_list);
|
||||
hlist_del_init_rcu(&mark->obj_list);
|
||||
fsnotify_get_mark(mark);
|
||||
}
|
||||
spin_unlock(&mnt->mnt_root->d_lock);
|
||||
|
||||
fsnotify_destroy_marks(&free_list);
|
||||
}
|
||||
|
||||
void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
|
||||
{
|
||||
|
|
|
@ -2204,17 +2204,12 @@ get_ctx_vol_failed:
|
|||
return true;
|
||||
#ifdef NTFS_RW
|
||||
iput_usnjrnl_err_out:
|
||||
if (vol->usnjrnl_j_ino)
|
||||
iput(vol->usnjrnl_j_ino);
|
||||
if (vol->usnjrnl_max_ino)
|
||||
iput(vol->usnjrnl_max_ino);
|
||||
if (vol->usnjrnl_ino)
|
||||
iput(vol->usnjrnl_ino);
|
||||
iput(vol->usnjrnl_j_ino);
|
||||
iput(vol->usnjrnl_max_ino);
|
||||
iput(vol->usnjrnl_ino);
|
||||
iput_quota_err_out:
|
||||
if (vol->quota_q_ino)
|
||||
iput(vol->quota_q_ino);
|
||||
if (vol->quota_ino)
|
||||
iput(vol->quota_ino);
|
||||
iput(vol->quota_q_ino);
|
||||
iput(vol->quota_ino);
|
||||
iput(vol->extend_ino);
|
||||
#endif /* NTFS_RW */
|
||||
iput_sec_err_out:
|
||||
|
@ -2223,8 +2218,7 @@ iput_root_err_out:
|
|||
iput(vol->root_ino);
|
||||
iput_logfile_err_out:
|
||||
#ifdef NTFS_RW
|
||||
if (vol->logfile_ino)
|
||||
iput(vol->logfile_ino);
|
||||
iput(vol->logfile_ino);
|
||||
iput_vol_err_out:
|
||||
#endif /* NTFS_RW */
|
||||
iput(vol->vol_ino);
|
||||
|
@ -2254,8 +2248,7 @@ iput_mftbmp_err_out:
|
|||
iput(vol->mftbmp_ino);
|
||||
iput_mirr_err_out:
|
||||
#ifdef NTFS_RW
|
||||
if (vol->mftmirr_ino)
|
||||
iput(vol->mftmirr_ino);
|
||||
iput(vol->mftmirr_ino);
|
||||
#endif /* NTFS_RW */
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -284,7 +284,19 @@ int ocfs2_set_acl(handle_t *handle,
|
|||
|
||||
int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type)
|
||||
{
|
||||
return ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL);
|
||||
struct buffer_head *bh = NULL;
|
||||
int status = 0;
|
||||
|
||||
status = ocfs2_inode_lock(inode, &bh, 1);
|
||||
if (status < 0) {
|
||||
if (status != -ENOENT)
|
||||
mlog_errno(status);
|
||||
return status;
|
||||
}
|
||||
status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(bh);
|
||||
return status;
|
||||
}
|
||||
|
||||
struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
|
||||
|
@ -292,19 +304,21 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
|
|||
struct ocfs2_super *osb;
|
||||
struct buffer_head *di_bh = NULL;
|
||||
struct posix_acl *acl;
|
||||
int ret = -EAGAIN;
|
||||
int ret;
|
||||
|
||||
osb = OCFS2_SB(inode->i_sb);
|
||||
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
|
||||
return NULL;
|
||||
|
||||
ret = ocfs2_read_inode_block(inode, &di_bh);
|
||||
if (ret < 0)
|
||||
ret = ocfs2_inode_lock(inode, &di_bh, 0);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOENT)
|
||||
mlog_errno(ret);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
acl = ocfs2_get_acl_nolock(inode, type, di_bh);
|
||||
|
||||
ocfs2_inode_unlock(inode, 0);
|
||||
brelse(di_bh);
|
||||
|
||||
return acl;
|
||||
}
|
||||
|
|
148
fs/ocfs2/alloc.c
148
fs/ocfs2/alloc.c
|
@ -908,32 +908,30 @@ static int ocfs2_validate_extent_block(struct super_block *sb,
|
|||
*/
|
||||
|
||||
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
|
||||
ocfs2_error(sb,
|
||||
"Extent block #%llu has bad signature %.*s",
|
||||
(unsigned long long)bh->b_blocknr, 7,
|
||||
eb->h_signature);
|
||||
return -EINVAL;
|
||||
rc = ocfs2_error(sb,
|
||||
"Extent block #%llu has bad signature %.*s\n",
|
||||
(unsigned long long)bh->b_blocknr, 7,
|
||||
eb->h_signature);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) {
|
||||
ocfs2_error(sb,
|
||||
"Extent block #%llu has an invalid h_blkno "
|
||||
"of %llu",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(eb->h_blkno));
|
||||
return -EINVAL;
|
||||
rc = ocfs2_error(sb,
|
||||
"Extent block #%llu has an invalid h_blkno of %llu\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(eb->h_blkno));
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) {
|
||||
ocfs2_error(sb,
|
||||
"Extent block #%llu has an invalid "
|
||||
"h_fs_generation of #%u",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le32_to_cpu(eb->h_fs_generation));
|
||||
return -EINVAL;
|
||||
rc = ocfs2_error(sb,
|
||||
"Extent block #%llu has an invalid h_fs_generation of #%u\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le32_to_cpu(eb->h_fs_generation));
|
||||
goto bail;
|
||||
}
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
return rc;
|
||||
}
|
||||
|
||||
int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
|
||||
|
@ -1446,8 +1444,7 @@ static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
|
|||
while(le16_to_cpu(el->l_tree_depth) > 1) {
|
||||
if (le16_to_cpu(el->l_next_free_rec) == 0) {
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
|
||||
"Owner %llu has empty "
|
||||
"extent list (next_free_rec == 0)",
|
||||
"Owner %llu has empty extent list (next_free_rec == 0)\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
|
||||
status = -EIO;
|
||||
goto bail;
|
||||
|
@ -1456,9 +1453,7 @@ static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
|
|||
blkno = le64_to_cpu(el->l_recs[i].e_blkno);
|
||||
if (!blkno) {
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
|
||||
"Owner %llu has extent "
|
||||
"list where extent # %d has no physical "
|
||||
"block start",
|
||||
"Owner %llu has extent list where extent # %d has no physical block start\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i);
|
||||
status = -EIO;
|
||||
goto bail;
|
||||
|
@ -1788,8 +1783,7 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
|
|||
while (el->l_tree_depth) {
|
||||
if (le16_to_cpu(el->l_next_free_rec) == 0) {
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(ci),
|
||||
"Owner %llu has empty extent list at "
|
||||
"depth %u\n",
|
||||
"Owner %llu has empty extent list at depth %u\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(ci),
|
||||
le16_to_cpu(el->l_tree_depth));
|
||||
ret = -EROFS;
|
||||
|
@ -1814,8 +1808,7 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
|
|||
blkno = le64_to_cpu(el->l_recs[i].e_blkno);
|
||||
if (blkno == 0) {
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(ci),
|
||||
"Owner %llu has bad blkno in extent list "
|
||||
"at depth %u (index %d)\n",
|
||||
"Owner %llu has bad blkno in extent list at depth %u (index %d)\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(ci),
|
||||
le16_to_cpu(el->l_tree_depth), i);
|
||||
ret = -EROFS;
|
||||
|
@ -1836,8 +1829,7 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
|
|||
if (le16_to_cpu(el->l_next_free_rec) >
|
||||
le16_to_cpu(el->l_count)) {
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(ci),
|
||||
"Owner %llu has bad count in extent list "
|
||||
"at block %llu (next free=%u, count=%u)\n",
|
||||
"Owner %llu has bad count in extent list at block %llu (next free=%u, count=%u)\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(ci),
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le16_to_cpu(el->l_next_free_rec),
|
||||
|
@ -2116,8 +2108,7 @@ static int ocfs2_rotate_subtree_right(handle_t *handle,
|
|||
|
||||
if (left_el->l_next_free_rec != left_el->l_count) {
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
|
||||
"Inode %llu has non-full interior leaf node %llu"
|
||||
"(next free = %u)",
|
||||
"Inode %llu has non-full interior leaf node %llu (next free = %u)\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
|
||||
(unsigned long long)left_leaf_bh->b_blocknr,
|
||||
le16_to_cpu(left_el->l_next_free_rec));
|
||||
|
@ -2256,8 +2247,7 @@ int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
|
|||
* If we got here, we never found a valid node where
|
||||
* the tree indicated one should be.
|
||||
*/
|
||||
ocfs2_error(sb,
|
||||
"Invalid extent tree at extent block %llu\n",
|
||||
ocfs2_error(sb, "Invalid extent tree at extent block %llu\n",
|
||||
(unsigned long long)blkno);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
|
@ -2872,8 +2862,7 @@ int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
|
|||
* If we got here, we never found a valid node where
|
||||
* the tree indicated one should be.
|
||||
*/
|
||||
ocfs2_error(sb,
|
||||
"Invalid extent tree at extent block %llu\n",
|
||||
ocfs2_error(sb, "Invalid extent tree at extent block %llu\n",
|
||||
(unsigned long long)blkno);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
|
@ -3131,6 +3120,30 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_remove_rightmost_empty_extent(struct ocfs2_super *osb,
|
||||
struct ocfs2_extent_tree *et,
|
||||
struct ocfs2_path *path,
|
||||
struct ocfs2_cached_dealloc_ctxt *dealloc)
|
||||
{
|
||||
handle_t *handle;
|
||||
int ret;
|
||||
int credits = path->p_tree_depth * 2 + 1;
|
||||
|
||||
handle = ocfs2_start_trans(osb, credits);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
mlog_errno(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = ocfs2_remove_rightmost_path(handle, et, path, dealloc);
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Left rotation of btree records.
|
||||
*
|
||||
|
@ -3200,7 +3213,7 @@ rightmost_no_delete:
|
|||
if (le16_to_cpu(el->l_next_free_rec) == 0) {
|
||||
ret = -EIO;
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
|
||||
"Owner %llu has empty extent block at %llu",
|
||||
"Owner %llu has empty extent block at %llu\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
|
||||
(unsigned long long)le64_to_cpu(eb->h_blkno));
|
||||
goto out;
|
||||
|
@ -3930,7 +3943,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle,
|
|||
next_free = le16_to_cpu(el->l_next_free_rec);
|
||||
if (next_free == 0) {
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
|
||||
"Owner %llu has a bad extent list",
|
||||
"Owner %llu has a bad extent list\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
|
||||
ret = -EIO;
|
||||
return;
|
||||
|
@ -4355,10 +4368,7 @@ static int ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
|
|||
bh = path_leaf_bh(left_path);
|
||||
eb = (struct ocfs2_extent_block *)bh->b_data;
|
||||
ocfs2_error(sb,
|
||||
"Extent block #%llu has an "
|
||||
"invalid l_next_free_rec of "
|
||||
"%d. It should have "
|
||||
"matched the l_count of %d",
|
||||
"Extent block #%llu has an invalid l_next_free_rec of %d. It should have matched the l_count of %d\n",
|
||||
(unsigned long long)le64_to_cpu(eb->h_blkno),
|
||||
le16_to_cpu(new_el->l_next_free_rec),
|
||||
le16_to_cpu(new_el->l_count));
|
||||
|
@ -4413,8 +4423,7 @@ static int ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
|
|||
bh = path_leaf_bh(right_path);
|
||||
eb = (struct ocfs2_extent_block *)bh->b_data;
|
||||
ocfs2_error(sb,
|
||||
"Extent block #%llu has an "
|
||||
"invalid l_next_free_rec of %d",
|
||||
"Extent block #%llu has an invalid l_next_free_rec of %d\n",
|
||||
(unsigned long long)le64_to_cpu(eb->h_blkno),
|
||||
le16_to_cpu(new_el->l_next_free_rec));
|
||||
status = -EINVAL;
|
||||
|
@ -4970,10 +4979,9 @@ leftright:
|
|||
split_index = ocfs2_search_extent_list(el, cpos);
|
||||
if (split_index == -1) {
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
|
||||
"Owner %llu has an extent at cpos %u "
|
||||
"which can no longer be found.\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
|
||||
cpos);
|
||||
"Owner %llu has an extent at cpos %u which can no longer be found\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
|
||||
cpos);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
|
@ -5158,10 +5166,9 @@ int ocfs2_change_extent_flag(handle_t *handle,
|
|||
index = ocfs2_search_extent_list(el, cpos);
|
||||
if (index == -1) {
|
||||
ocfs2_error(sb,
|
||||
"Owner %llu has an extent at cpos %u which can no "
|
||||
"longer be found.\n",
|
||||
(unsigned long long)
|
||||
ocfs2_metadata_cache_owner(et->et_ci), cpos);
|
||||
"Owner %llu has an extent at cpos %u which can no longer be found\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
|
||||
cpos);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
|
@ -5228,9 +5235,7 @@ int ocfs2_mark_extent_written(struct inode *inode,
|
|||
cpos, len, phys);
|
||||
|
||||
if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
|
||||
ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
|
||||
"that are being written to, but the feature bit "
|
||||
"is not set in the super block.",
|
||||
ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents that are being written to, but the feature bit is not set in the super block\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
|
@ -5514,8 +5519,7 @@ int ocfs2_remove_extent(handle_t *handle,
|
|||
index = ocfs2_search_extent_list(el, cpos);
|
||||
if (index == -1) {
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
|
||||
"Owner %llu has an extent at cpos %u which can no "
|
||||
"longer be found.\n",
|
||||
"Owner %llu has an extent at cpos %u which can no longer be found\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
|
||||
cpos);
|
||||
ret = -EROFS;
|
||||
|
@ -5580,7 +5584,7 @@ int ocfs2_remove_extent(handle_t *handle,
|
|||
index = ocfs2_search_extent_list(el, cpos);
|
||||
if (index == -1) {
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
|
||||
"Owner %llu: split at cpos %u lost record.",
|
||||
"Owner %llu: split at cpos %u lost record\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
|
||||
cpos);
|
||||
ret = -EROFS;
|
||||
|
@ -5596,8 +5600,7 @@ int ocfs2_remove_extent(handle_t *handle,
|
|||
ocfs2_rec_clusters(el, rec);
|
||||
if (rec_range != trunc_range) {
|
||||
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
|
||||
"Owner %llu: error after split at cpos %u"
|
||||
"trunc len %u, existing record is (%u,%u)",
|
||||
"Owner %llu: error after split at cpos %u trunc len %u, existing record is (%u,%u)\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
|
||||
cpos, len, le32_to_cpu(rec->e_cpos),
|
||||
ocfs2_rec_clusters(el, rec));
|
||||
|
@ -6175,7 +6178,7 @@ bail:
|
|||
iput(tl_inode);
|
||||
brelse(tl_bh);
|
||||
|
||||
if (status < 0 && (*tl_copy)) {
|
||||
if (status < 0) {
|
||||
kfree(*tl_copy);
|
||||
*tl_copy = NULL;
|
||||
mlog_errno(status);
|
||||
|
@ -7108,15 +7111,23 @@ start:
|
|||
* to check it up here before changing the tree.
|
||||
*/
|
||||
if (root_el->l_tree_depth && rec->e_int_clusters == 0) {
|
||||
ocfs2_error(inode->i_sb, "Inode %lu has an empty "
|
||||
mlog(ML_ERROR, "Inode %lu has an empty "
|
||||
"extent record, depth %u\n", inode->i_ino,
|
||||
le16_to_cpu(root_el->l_tree_depth));
|
||||
status = -EROFS;
|
||||
goto bail;
|
||||
status = ocfs2_remove_rightmost_empty_extent(osb,
|
||||
&et, path, &dealloc);
|
||||
if (status) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
ocfs2_reinit_path(path, 1);
|
||||
goto start;
|
||||
} else {
|
||||
trunc_cpos = le32_to_cpu(rec->e_cpos);
|
||||
trunc_len = 0;
|
||||
blkno = 0;
|
||||
}
|
||||
trunc_cpos = le32_to_cpu(rec->e_cpos);
|
||||
trunc_len = 0;
|
||||
blkno = 0;
|
||||
} else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) {
|
||||
/*
|
||||
* Truncate entire record.
|
||||
|
@ -7204,8 +7215,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
|
|||
!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
|
||||
!ocfs2_supports_inline_data(osb)) {
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Inline data flags for inode %llu don't agree! "
|
||||
"Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n",
|
||||
"Inline data flags for inode %llu don't agree! Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
le16_to_cpu(di->i_dyn_features),
|
||||
OCFS2_I(inode)->ip_dyn_features,
|
||||
|
|
|
@ -227,7 +227,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
|
|||
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
|
||||
|
||||
if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {
|
||||
ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag",
|
||||
ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||
return -EROFS;
|
||||
}
|
||||
|
@ -237,7 +237,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
|
|||
if (size > PAGE_CACHE_SIZE ||
|
||||
size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Inode %llu has with inline data has bad size: %Lu",
|
||||
"Inode %llu has with inline data has bad size: %Lu\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
(unsigned long long)size);
|
||||
return -EROFS;
|
||||
|
@ -533,10 +533,14 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
|
|||
|
||||
inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
|
||||
|
||||
down_read(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
|
||||
/* This figures out the size of the next contiguous block, and
|
||||
* our logical offset */
|
||||
ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
|
||||
&contig_blocks, &ext_flags);
|
||||
up_read(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
|
||||
if (ret) {
|
||||
mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
|
||||
(unsigned long long)iblock);
|
||||
|
@ -557,6 +561,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
|
|||
|
||||
alloc_locked = 1;
|
||||
|
||||
down_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
|
||||
/* fill hole, allocate blocks can't be larger than the size
|
||||
* of the hole */
|
||||
clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len);
|
||||
|
@ -569,6 +575,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
|
|||
ret = ocfs2_extend_allocation(inode, cpos,
|
||||
clusters_to_alloc, 0);
|
||||
if (ret < 0) {
|
||||
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
|
@ -576,11 +583,13 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
|
|||
ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
|
||||
&contig_blocks, &ext_flags);
|
||||
if (ret < 0) {
|
||||
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
|
||||
(unsigned long long)iblock);
|
||||
ret = -EIO;
|
||||
goto bail;
|
||||
}
|
||||
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -627,10 +636,13 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
|
|||
mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
|
||||
}
|
||||
|
||||
ocfs2_iocb_clear_rw_locked(iocb);
|
||||
/* Let rw unlock to be done later to protect append direct io write */
|
||||
if (offset + bytes <= i_size_read(inode)) {
|
||||
ocfs2_iocb_clear_rw_locked(iocb);
|
||||
|
||||
level = ocfs2_iocb_rw_locked_level(iocb);
|
||||
ocfs2_rw_unlock(inode, level);
|
||||
level = ocfs2_iocb_rw_locked_level(iocb);
|
||||
ocfs2_rw_unlock(inode, level);
|
||||
}
|
||||
}
|
||||
|
||||
static int ocfs2_releasepage(struct page *page, gfp_t wait)
|
||||
|
@ -832,12 +844,17 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
|
|||
|
||||
/* zeroing out the previously allocated cluster tail
|
||||
* that but not zeroed */
|
||||
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
|
||||
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
|
||||
down_read(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
ret = ocfs2_direct_IO_zero_extend(osb, inode, offset,
|
||||
zero_len_tail, cluster_align_tail);
|
||||
else
|
||||
up_read(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
} else {
|
||||
down_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
ret = ocfs2_direct_IO_extend_no_holes(osb, inode,
|
||||
offset);
|
||||
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
}
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
|
@ -857,7 +874,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
|
|||
written = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
|
||||
offset, ocfs2_direct_IO_get_blocks,
|
||||
ocfs2_dio_end_io, NULL, 0);
|
||||
if (unlikely(written < 0)) {
|
||||
/* overwrite aio may return -EIOCBQUEUED, and it is not an error */
|
||||
if ((written < 0) && (written != -EIOCBQUEUED)) {
|
||||
loff_t i_size = i_size_read(inode);
|
||||
|
||||
if (offset + count > i_size) {
|
||||
|
@ -876,12 +894,14 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
|
|||
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
di_bh = NULL;
|
||||
goto clean_orphan;
|
||||
}
|
||||
}
|
||||
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
di_bh = NULL;
|
||||
|
||||
ret = jbd2_journal_force_commit(journal);
|
||||
if (ret < 0)
|
||||
|
@ -936,10 +956,12 @@ clean_orphan:
|
|||
if (tmp_ret < 0) {
|
||||
ret = tmp_ret;
|
||||
mlog_errno(ret);
|
||||
brelse(di_bh);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
|
||||
tmp_ret = jbd2_journal_force_commit(journal);
|
||||
if (tmp_ret < 0) {
|
||||
|
@ -2185,10 +2207,7 @@ try_again:
|
|||
if (ret)
|
||||
goto out_commit;
|
||||
}
|
||||
/*
|
||||
* We don't want this to fail in ocfs2_write_end(), so do it
|
||||
* here.
|
||||
*/
|
||||
|
||||
ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (ret) {
|
||||
|
@ -2345,7 +2364,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
|
|||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata)
|
||||
{
|
||||
int i;
|
||||
int i, ret;
|
||||
unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
|
||||
struct inode *inode = mapping->host;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
|
@ -2354,6 +2373,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
|
|||
handle_t *handle = wc->w_handle;
|
||||
struct page *tmppage;
|
||||
|
||||
ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (ret) {
|
||||
copied = ret;
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
|
||||
ocfs2_write_end_inline(inode, pos, len, &copied, di, wc);
|
||||
goto out_write_size;
|
||||
|
@ -2409,6 +2436,7 @@ out_write_size:
|
|||
ocfs2_update_inode_fsync_trans(handle, inode, 1);
|
||||
ocfs2_journal_dirty(handle, wc->w_di_bh);
|
||||
|
||||
out:
|
||||
/* unlock pages before dealloc since it needs acquiring j_trans_barrier
|
||||
* lock, or it will cause a deadlock since journal commit threads holds
|
||||
* this lock and will ask for the page lock when flushing the data.
|
||||
|
|
|
@ -316,6 +316,12 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
|
|||
bh = bhs[i];
|
||||
|
||||
if (!(flags & OCFS2_BH_READAHEAD)) {
|
||||
if (status) {
|
||||
/* Clear the rest of the buffers on error */
|
||||
put_bh(bh);
|
||||
bhs[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
/* We know this can't have changed as we hold the
|
||||
* owner sem. Avoid doing any work on the bh if the
|
||||
* journal has it. */
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
#include <linux/debugfs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/bitmap.h>
|
||||
|
||||
#include <linux/ktime.h>
|
||||
#include "heartbeat.h"
|
||||
#include "tcp.h"
|
||||
#include "nodemanager.h"
|
||||
|
@ -1060,37 +1060,6 @@ bail:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Subtract b from a, storing the result in a. a *must* have a larger
|
||||
* value than b. */
|
||||
static void o2hb_tv_subtract(struct timeval *a,
|
||||
struct timeval *b)
|
||||
{
|
||||
/* just return 0 when a is after b */
|
||||
if (a->tv_sec < b->tv_sec ||
|
||||
(a->tv_sec == b->tv_sec && a->tv_usec < b->tv_usec)) {
|
||||
a->tv_sec = 0;
|
||||
a->tv_usec = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
a->tv_sec -= b->tv_sec;
|
||||
a->tv_usec -= b->tv_usec;
|
||||
while ( a->tv_usec < 0 ) {
|
||||
a->tv_sec--;
|
||||
a->tv_usec += 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int o2hb_elapsed_msecs(struct timeval *start,
|
||||
struct timeval *end)
|
||||
{
|
||||
struct timeval res = *end;
|
||||
|
||||
o2hb_tv_subtract(&res, start);
|
||||
|
||||
return res.tv_sec * 1000 + res.tv_usec / 1000;
|
||||
}
|
||||
|
||||
/*
|
||||
* we ride the region ref that the region dir holds. before the region
|
||||
* dir is removed and drops it ref it will wait to tear down this
|
||||
|
@ -1101,7 +1070,7 @@ static int o2hb_thread(void *data)
|
|||
int i, ret;
|
||||
struct o2hb_region *reg = data;
|
||||
struct o2hb_bio_wait_ctxt write_wc;
|
||||
struct timeval before_hb, after_hb;
|
||||
ktime_t before_hb, after_hb;
|
||||
unsigned int elapsed_msec;
|
||||
|
||||
mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread running\n");
|
||||
|
@ -1118,18 +1087,18 @@ static int o2hb_thread(void *data)
|
|||
* hr_timeout_ms between disk writes. On busy systems
|
||||
* this should result in a heartbeat which is less
|
||||
* likely to time itself out. */
|
||||
do_gettimeofday(&before_hb);
|
||||
before_hb = ktime_get_real();
|
||||
|
||||
ret = o2hb_do_disk_heartbeat(reg);
|
||||
|
||||
do_gettimeofday(&after_hb);
|
||||
elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
|
||||
after_hb = ktime_get_real();
|
||||
|
||||
elapsed_msec = (unsigned int)
|
||||
ktime_ms_delta(after_hb, before_hb);
|
||||
|
||||
mlog(ML_HEARTBEAT,
|
||||
"start = %lu.%lu, end = %lu.%lu, msec = %u, ret = %d\n",
|
||||
before_hb.tv_sec, (unsigned long) before_hb.tv_usec,
|
||||
after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
|
||||
elapsed_msec, ret);
|
||||
"start = %lld, end = %lld, msec = %u, ret = %d\n",
|
||||
before_hb.tv64, after_hb.tv64, elapsed_msec, ret);
|
||||
|
||||
if (!kthread_should_stop() &&
|
||||
elapsed_msec < reg->hr_timeout_ms) {
|
||||
|
@ -1619,17 +1588,13 @@ static int o2hb_map_slot_data(struct o2hb_region *reg)
|
|||
struct o2hb_disk_slot *slot;
|
||||
|
||||
reg->hr_tmp_block = kmalloc(reg->hr_block_bytes, GFP_KERNEL);
|
||||
if (reg->hr_tmp_block == NULL) {
|
||||
mlog_errno(-ENOMEM);
|
||||
if (reg->hr_tmp_block == NULL)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
reg->hr_slots = kcalloc(reg->hr_blocks,
|
||||
sizeof(struct o2hb_disk_slot), GFP_KERNEL);
|
||||
if (reg->hr_slots == NULL) {
|
||||
mlog_errno(-ENOMEM);
|
||||
if (reg->hr_slots == NULL)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for(i = 0; i < reg->hr_blocks; i++) {
|
||||
slot = ®->hr_slots[i];
|
||||
|
@ -1645,17 +1610,13 @@ static int o2hb_map_slot_data(struct o2hb_region *reg)
|
|||
|
||||
reg->hr_slot_data = kcalloc(reg->hr_num_pages, sizeof(struct page *),
|
||||
GFP_KERNEL);
|
||||
if (!reg->hr_slot_data) {
|
||||
mlog_errno(-ENOMEM);
|
||||
if (!reg->hr_slot_data)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for(i = 0; i < reg->hr_num_pages; i++) {
|
||||
page = alloc_page(GFP_KERNEL);
|
||||
if (!page) {
|
||||
mlog_errno(-ENOMEM);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
reg->hr_slot_data[i] = page;
|
||||
|
||||
|
@ -1687,10 +1648,8 @@ static int o2hb_populate_slot_data(struct o2hb_region *reg)
|
|||
struct o2hb_disk_heartbeat_block *hb_block;
|
||||
|
||||
ret = o2hb_read_slots(reg, reg->hr_blocks);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* We only want to get an idea of the values initially in each
|
||||
* slot, so we do no verification - o2hb_check_slot will
|
||||
|
|
|
@ -480,33 +480,26 @@ static int ocfs2_check_dir_trailer(struct inode *dir, struct buffer_head *bh)
|
|||
|
||||
trailer = ocfs2_trailer_from_bh(bh, dir->i_sb);
|
||||
if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
|
||||
rc = -EINVAL;
|
||||
ocfs2_error(dir->i_sb,
|
||||
"Invalid dirblock #%llu: "
|
||||
"signature = %.*s\n",
|
||||
(unsigned long long)bh->b_blocknr, 7,
|
||||
trailer->db_signature);
|
||||
rc = ocfs2_error(dir->i_sb,
|
||||
"Invalid dirblock #%llu: signature = %.*s\n",
|
||||
(unsigned long long)bh->b_blocknr, 7,
|
||||
trailer->db_signature);
|
||||
goto out;
|
||||
}
|
||||
if (le64_to_cpu(trailer->db_blkno) != bh->b_blocknr) {
|
||||
rc = -EINVAL;
|
||||
ocfs2_error(dir->i_sb,
|
||||
"Directory block #%llu has an invalid "
|
||||
"db_blkno of %llu",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(trailer->db_blkno));
|
||||
rc = ocfs2_error(dir->i_sb,
|
||||
"Directory block #%llu has an invalid db_blkno of %llu\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(trailer->db_blkno));
|
||||
goto out;
|
||||
}
|
||||
if (le64_to_cpu(trailer->db_parent_dinode) !=
|
||||
OCFS2_I(dir)->ip_blkno) {
|
||||
rc = -EINVAL;
|
||||
ocfs2_error(dir->i_sb,
|
||||
"Directory block #%llu on dinode "
|
||||
"#%llu has an invalid parent_dinode "
|
||||
"of %llu",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)OCFS2_I(dir)->ip_blkno,
|
||||
(unsigned long long)le64_to_cpu(trailer->db_blkno));
|
||||
rc = ocfs2_error(dir->i_sb,
|
||||
"Directory block #%llu on dinode #%llu has an invalid parent_dinode of %llu\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)OCFS2_I(dir)->ip_blkno,
|
||||
(unsigned long long)le64_to_cpu(trailer->db_blkno));
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
|
@ -604,14 +597,13 @@ static int ocfs2_validate_dx_root(struct super_block *sb,
|
|||
}
|
||||
|
||||
if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) {
|
||||
ocfs2_error(sb,
|
||||
"Dir Index Root # %llu has bad signature %.*s",
|
||||
(unsigned long long)le64_to_cpu(dx_root->dr_blkno),
|
||||
7, dx_root->dr_signature);
|
||||
return -EINVAL;
|
||||
ret = ocfs2_error(sb,
|
||||
"Dir Index Root # %llu has bad signature %.*s\n",
|
||||
(unsigned long long)le64_to_cpu(dx_root->dr_blkno),
|
||||
7, dx_root->dr_signature);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di,
|
||||
|
@ -648,12 +640,11 @@ static int ocfs2_validate_dx_leaf(struct super_block *sb,
|
|||
}
|
||||
|
||||
if (!OCFS2_IS_VALID_DX_LEAF(dx_leaf)) {
|
||||
ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s",
|
||||
7, dx_leaf->dl_signature);
|
||||
return -EROFS;
|
||||
ret = ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s\n",
|
||||
7, dx_leaf->dl_signature);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
|
||||
|
@ -812,11 +803,10 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
|
|||
el = &eb->h_list;
|
||||
|
||||
if (el->l_tree_depth) {
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has non zero tree depth in "
|
||||
"btree tree block %llu\n", inode->i_ino,
|
||||
(unsigned long long)eb_bh->b_blocknr);
|
||||
ret = -EROFS;
|
||||
ret = ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has non zero tree depth in btree tree block %llu\n",
|
||||
inode->i_ino,
|
||||
(unsigned long long)eb_bh->b_blocknr);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
@ -832,11 +822,11 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
|
|||
}
|
||||
|
||||
if (!found) {
|
||||
ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
|
||||
"record (%u, %u, 0) in btree", inode->i_ino,
|
||||
le32_to_cpu(rec->e_cpos),
|
||||
ocfs2_rec_clusters(el, rec));
|
||||
ret = -EROFS;
|
||||
ret = ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has bad extent record (%u, %u, 0) in btree\n",
|
||||
inode->i_ino,
|
||||
le32_to_cpu(rec->e_cpos),
|
||||
ocfs2_rec_clusters(el, rec));
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
|
|
@ -1465,39 +1465,46 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
|
|||
if (status == -ENOPROTOOPT) {
|
||||
status = 0;
|
||||
*response = JOIN_OK_NO_MAP;
|
||||
} else if (packet.code == JOIN_DISALLOW ||
|
||||
packet.code == JOIN_OK_NO_MAP) {
|
||||
*response = packet.code;
|
||||
} else if (packet.code == JOIN_PROTOCOL_MISMATCH) {
|
||||
mlog(ML_NOTICE,
|
||||
"This node requested DLM locking protocol %u.%u and "
|
||||
"filesystem locking protocol %u.%u. At least one of "
|
||||
"the protocol versions on node %d is not compatible, "
|
||||
"disconnecting\n",
|
||||
dlm->dlm_locking_proto.pv_major,
|
||||
dlm->dlm_locking_proto.pv_minor,
|
||||
dlm->fs_locking_proto.pv_major,
|
||||
dlm->fs_locking_proto.pv_minor,
|
||||
node);
|
||||
status = -EPROTO;
|
||||
*response = packet.code;
|
||||
} else if (packet.code == JOIN_OK) {
|
||||
*response = packet.code;
|
||||
/* Use the same locking protocol as the remote node */
|
||||
dlm->dlm_locking_proto.pv_minor = packet.dlm_minor;
|
||||
dlm->fs_locking_proto.pv_minor = packet.fs_minor;
|
||||
mlog(0,
|
||||
"Node %d responds JOIN_OK with DLM locking protocol "
|
||||
"%u.%u and fs locking protocol %u.%u\n",
|
||||
node,
|
||||
dlm->dlm_locking_proto.pv_major,
|
||||
dlm->dlm_locking_proto.pv_minor,
|
||||
dlm->fs_locking_proto.pv_major,
|
||||
dlm->fs_locking_proto.pv_minor);
|
||||
} else {
|
||||
status = -EINVAL;
|
||||
mlog(ML_ERROR, "invalid response %d from node %u\n",
|
||||
packet.code, node);
|
||||
*response = packet.code;
|
||||
switch (packet.code) {
|
||||
case JOIN_DISALLOW:
|
||||
case JOIN_OK_NO_MAP:
|
||||
break;
|
||||
case JOIN_PROTOCOL_MISMATCH:
|
||||
mlog(ML_NOTICE,
|
||||
"This node requested DLM locking protocol %u.%u and "
|
||||
"filesystem locking protocol %u.%u. At least one of "
|
||||
"the protocol versions on node %d is not compatible, "
|
||||
"disconnecting\n",
|
||||
dlm->dlm_locking_proto.pv_major,
|
||||
dlm->dlm_locking_proto.pv_minor,
|
||||
dlm->fs_locking_proto.pv_major,
|
||||
dlm->fs_locking_proto.pv_minor,
|
||||
node);
|
||||
status = -EPROTO;
|
||||
break;
|
||||
case JOIN_OK:
|
||||
/* Use the same locking protocol as the remote node */
|
||||
dlm->dlm_locking_proto.pv_minor = packet.dlm_minor;
|
||||
dlm->fs_locking_proto.pv_minor = packet.fs_minor;
|
||||
mlog(0,
|
||||
"Node %d responds JOIN_OK with DLM locking protocol "
|
||||
"%u.%u and fs locking protocol %u.%u\n",
|
||||
node,
|
||||
dlm->dlm_locking_proto.pv_major,
|
||||
dlm->dlm_locking_proto.pv_minor,
|
||||
dlm->fs_locking_proto.pv_major,
|
||||
dlm->fs_locking_proto.pv_minor);
|
||||
break;
|
||||
default:
|
||||
status = -EINVAL;
|
||||
mlog(ML_ERROR, "invalid response %d from node %u\n",
|
||||
packet.code, node);
|
||||
/* Reset response to JOIN_DISALLOW */
|
||||
*response = JOIN_DISALLOW;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mlog(0, "status %d, node %d response is %d\n", status, node,
|
||||
|
@ -1725,12 +1732,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
|
|||
|
||||
o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
|
||||
dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
|
||||
o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
|
||||
dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
|
||||
|
||||
status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_down);
|
||||
if (status)
|
||||
goto bail;
|
||||
|
||||
o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
|
||||
dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
|
||||
status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_up);
|
||||
if (status)
|
||||
goto bail;
|
||||
|
@ -1845,8 +1853,6 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
|
|||
sizeof(struct dlm_exit_domain),
|
||||
dlm_begin_exit_domain_handler,
|
||||
dlm, NULL, &dlm->dlm_domain_handlers);
|
||||
if (status)
|
||||
goto bail;
|
||||
|
||||
bail:
|
||||
if (status)
|
||||
|
|
|
@ -498,16 +498,6 @@ static void dlm_lockres_release(struct kref *kref)
|
|||
mlog(0, "destroying lockres %.*s\n", res->lockname.len,
|
||||
res->lockname.name);
|
||||
|
||||
spin_lock(&dlm->track_lock);
|
||||
if (!list_empty(&res->tracking))
|
||||
list_del_init(&res->tracking);
|
||||
else {
|
||||
mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n",
|
||||
res->lockname.len, res->lockname.name);
|
||||
dlm_print_one_lock_resource(res);
|
||||
}
|
||||
spin_unlock(&dlm->track_lock);
|
||||
|
||||
atomic_dec(&dlm->res_cur_count);
|
||||
|
||||
if (!hlist_unhashed(&res->hash_node) ||
|
||||
|
@ -795,8 +785,18 @@ lookup:
|
|||
dlm_lockres_grab_inflight_ref(dlm, tmpres);
|
||||
|
||||
spin_unlock(&tmpres->spinlock);
|
||||
if (res)
|
||||
if (res) {
|
||||
spin_lock(&dlm->track_lock);
|
||||
if (!list_empty(&res->tracking))
|
||||
list_del_init(&res->tracking);
|
||||
else
|
||||
mlog(ML_ERROR, "Resource %.*s not "
|
||||
"on the Tracking list\n",
|
||||
res->lockname.len,
|
||||
res->lockname.name);
|
||||
spin_unlock(&dlm->track_lock);
|
||||
dlm_lockres_put(res);
|
||||
}
|
||||
res = tmpres;
|
||||
goto leave;
|
||||
}
|
||||
|
|
|
@ -1776,7 +1776,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
|
|||
struct dlm_migratable_lockres *mres)
|
||||
{
|
||||
struct dlm_migratable_lock *ml;
|
||||
struct list_head *queue, *iter;
|
||||
struct list_head *queue;
|
||||
struct list_head *tmpq = NULL;
|
||||
struct dlm_lock *newlock = NULL;
|
||||
struct dlm_lockstatus *lksb = NULL;
|
||||
|
@ -1821,9 +1821,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
|
|||
spin_lock(&res->spinlock);
|
||||
for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
|
||||
tmpq = dlm_list_idx_to_ptr(res, j);
|
||||
list_for_each(iter, tmpq) {
|
||||
lock = list_entry(iter,
|
||||
struct dlm_lock, list);
|
||||
list_for_each_entry(lock, tmpq, list) {
|
||||
if (lock->ml.cookie == ml->cookie)
|
||||
break;
|
||||
lock = NULL;
|
||||
|
|
|
@ -211,6 +211,16 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
|
|||
|
||||
__dlm_unhash_lockres(dlm, res);
|
||||
|
||||
spin_lock(&dlm->track_lock);
|
||||
if (!list_empty(&res->tracking))
|
||||
list_del_init(&res->tracking);
|
||||
else {
|
||||
mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n",
|
||||
res->lockname.len, res->lockname.name);
|
||||
__dlm_print_one_lock_resource(res);
|
||||
}
|
||||
spin_unlock(&dlm->track_lock);
|
||||
|
||||
/* lockres is not in the hash now. drop the flag and wake up
|
||||
* any processes waiting in dlm_get_lock_resource. */
|
||||
if (!master) {
|
||||
|
|
|
@ -3035,8 +3035,6 @@ local:
|
|||
ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
|
||||
|
||||
osb->cconn = conn;
|
||||
|
||||
status = 0;
|
||||
bail:
|
||||
if (status < 0) {
|
||||
ocfs2_dlm_shutdown_debug(osb);
|
||||
|
|
|
@ -305,8 +305,8 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
|
|||
|
||||
if (el->l_tree_depth) {
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has non zero tree depth in "
|
||||
"leaf block %llu\n", inode->i_ino,
|
||||
"Inode %lu has non zero tree depth in leaf block %llu\n",
|
||||
inode->i_ino,
|
||||
(unsigned long long)eb_bh->b_blocknr);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
|
@ -441,8 +441,8 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
|
|||
|
||||
if (el->l_tree_depth) {
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has non zero tree depth in "
|
||||
"leaf block %llu\n", inode->i_ino,
|
||||
"Inode %lu has non zero tree depth in leaf block %llu\n",
|
||||
inode->i_ino,
|
||||
(unsigned long long)eb_bh->b_blocknr);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
|
@ -475,8 +475,9 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
|
|||
BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
|
||||
|
||||
if (!rec->e_blkno) {
|
||||
ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
|
||||
"record (%u, %u, 0)", inode->i_ino,
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has bad extent record (%u, %u, 0)\n",
|
||||
inode->i_ino,
|
||||
le32_to_cpu(rec->e_cpos),
|
||||
ocfs2_rec_clusters(el, rec));
|
||||
ret = -EROFS;
|
||||
|
@ -564,8 +565,8 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
|
|||
|
||||
if (el->l_tree_depth) {
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has non zero tree depth in "
|
||||
"xattr leaf block %llu\n", inode->i_ino,
|
||||
"Inode %lu has non zero tree depth in xattr leaf block %llu\n",
|
||||
inode->i_ino,
|
||||
(unsigned long long)eb_bh->b_blocknr);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
|
@ -582,8 +583,9 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
|
|||
BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
|
||||
|
||||
if (!rec->e_blkno) {
|
||||
ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
|
||||
"record (%u, %u, 0) in xattr", inode->i_ino,
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
|
||||
inode->i_ino,
|
||||
le32_to_cpu(rec->e_cpos),
|
||||
ocfs2_rec_clusters(el, rec));
|
||||
ret = -EROFS;
|
||||
|
|
|
@ -1130,6 +1130,7 @@ out:
|
|||
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
{
|
||||
int status = 0, size_change;
|
||||
int inode_locked = 0;
|
||||
struct inode *inode = d_inode(dentry);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct ocfs2_super *osb = OCFS2_SB(sb);
|
||||
|
@ -1178,6 +1179,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
|
|||
mlog_errno(status);
|
||||
goto bail_unlock_rw;
|
||||
}
|
||||
inode_locked = 1;
|
||||
|
||||
if (size_change) {
|
||||
status = inode_newsize_ok(inode, attr->ia_size);
|
||||
|
@ -1258,7 +1260,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
|
|||
bail_commit:
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
bail_unlock:
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
if (status) {
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
inode_locked = 0;
|
||||
}
|
||||
bail_unlock_rw:
|
||||
if (size_change)
|
||||
ocfs2_rw_unlock(inode, 1);
|
||||
|
@ -1274,6 +1279,8 @@ bail:
|
|||
if (status < 0)
|
||||
mlog_errno(status);
|
||||
}
|
||||
if (inode_locked)
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -2262,8 +2269,6 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
|
|||
ssize_t written = 0;
|
||||
ssize_t ret;
|
||||
size_t count = iov_iter_count(from), orig_count;
|
||||
loff_t old_size;
|
||||
u32 old_clusters;
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
|
@ -2271,6 +2276,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
|
|||
OCFS2_MOUNT_COHERENCY_BUFFERED);
|
||||
int unaligned_dio = 0;
|
||||
int dropped_dio = 0;
|
||||
int append_write = ((iocb->ki_pos + count) >=
|
||||
i_size_read(inode) ? 1 : 0);
|
||||
|
||||
trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
|
@ -2290,8 +2297,9 @@ relock:
|
|||
/*
|
||||
* Concurrent O_DIRECT writes are allowed with
|
||||
* mount_option "coherency=buffered".
|
||||
* For append write, we must take rw EX.
|
||||
*/
|
||||
rw_level = (!direct_io || full_coherency);
|
||||
rw_level = (!direct_io || full_coherency || append_write);
|
||||
|
||||
ret = ocfs2_rw_lock(inode, rw_level);
|
||||
if (ret < 0) {
|
||||
|
@ -2364,13 +2372,6 @@ relock:
|
|||
ocfs2_iocb_set_unaligned_aio(iocb);
|
||||
}
|
||||
|
||||
/*
|
||||
* To later detect whether a journal commit for sync writes is
|
||||
* necessary, we sample i_size, and cluster count here.
|
||||
*/
|
||||
old_size = i_size_read(inode);
|
||||
old_clusters = OCFS2_I(inode)->ip_clusters;
|
||||
|
||||
/* communicate with ocfs2_dio_end_io */
|
||||
ocfs2_iocb_set_rw_locked(iocb, rw_level);
|
||||
|
||||
|
@ -2378,6 +2379,20 @@ relock:
|
|||
/* buffered aio wouldn't have proper lock coverage today */
|
||||
BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
|
||||
|
||||
/*
|
||||
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
|
||||
* function pointer which is called when o_direct io completes so that
|
||||
* it can unlock our rw lock.
|
||||
* Unfortunately there are error cases which call end_io and others
|
||||
* that don't. so we don't have to unlock the rw_lock if either an
|
||||
* async dio is going to do it in the future or an end_io after an
|
||||
* error has already done it.
|
||||
*/
|
||||
if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) {
|
||||
rw_level = -1;
|
||||
unaligned_dio = 0;
|
||||
}
|
||||
|
||||
if (unlikely(written <= 0))
|
||||
goto no_sync;
|
||||
|
||||
|
@ -2402,21 +2417,7 @@ relock:
|
|||
}
|
||||
|
||||
no_sync:
|
||||
/*
|
||||
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
|
||||
* function pointer which is called when o_direct io completes so that
|
||||
* it can unlock our rw lock.
|
||||
* Unfortunately there are error cases which call end_io and others
|
||||
* that don't. so we don't have to unlock the rw_lock if either an
|
||||
* async dio is going to do it in the future or an end_io after an
|
||||
* error has already done it.
|
||||
*/
|
||||
if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) {
|
||||
rw_level = -1;
|
||||
unaligned_dio = 0;
|
||||
}
|
||||
|
||||
if (unaligned_dio) {
|
||||
if (unaligned_dio && ocfs2_iocb_is_unaligned_aio(iocb)) {
|
||||
ocfs2_iocb_clear_unaligned_aio(iocb);
|
||||
mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
|
||||
}
|
||||
|
|
|
@ -971,6 +971,7 @@ static void ocfs2_delete_inode(struct inode *inode)
|
|||
int wipe, status;
|
||||
sigset_t oldset;
|
||||
struct buffer_head *di_bh = NULL;
|
||||
struct ocfs2_dinode *di = NULL;
|
||||
|
||||
trace_ocfs2_delete_inode(inode->i_ino,
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
|
@ -1025,6 +1026,14 @@ static void ocfs2_delete_inode(struct inode *inode)
|
|||
goto bail_unlock_nfs_sync;
|
||||
}
|
||||
|
||||
di = (struct ocfs2_dinode *)di_bh->b_data;
|
||||
/* Skip inode deletion and wait for dio orphan entry recovered
|
||||
* first */
|
||||
if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
|
||||
ocfs2_cleanup_delete_inode(inode, 0);
|
||||
goto bail_unlock_inode;
|
||||
}
|
||||
|
||||
/* Query the cluster. This will be the final decision made
|
||||
* before we go ahead and wipe the inode. */
|
||||
status = ocfs2_query_inode_wipe(inode, di_bh, &wipe);
|
||||
|
@ -1191,17 +1200,19 @@ void ocfs2_evict_inode(struct inode *inode)
|
|||
int ocfs2_drop_inode(struct inode *inode)
|
||||
{
|
||||
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||
int res;
|
||||
|
||||
trace_ocfs2_drop_inode((unsigned long long)oi->ip_blkno,
|
||||
inode->i_nlink, oi->ip_flags);
|
||||
|
||||
if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
|
||||
res = 1;
|
||||
else
|
||||
res = generic_drop_inode(inode);
|
||||
assert_spin_locked(&inode->i_lock);
|
||||
inode->i_state |= I_WILL_FREE;
|
||||
spin_unlock(&inode->i_lock);
|
||||
write_inode_now(inode, 1);
|
||||
spin_lock(&inode->i_lock);
|
||||
WARN_ON(inode->i_state & I_NEW);
|
||||
inode->i_state &= ~I_WILL_FREE;
|
||||
|
||||
return res;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1350,32 +1361,32 @@ int ocfs2_validate_inode_block(struct super_block *sb,
|
|||
rc = -EINVAL;
|
||||
|
||||
if (!OCFS2_IS_VALID_DINODE(di)) {
|
||||
ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n",
|
||||
(unsigned long long)bh->b_blocknr, 7,
|
||||
di->i_signature);
|
||||
rc = ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n",
|
||||
(unsigned long long)bh->b_blocknr, 7,
|
||||
di->i_signature);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
|
||||
ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(di->i_blkno));
|
||||
rc = ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(di->i_blkno));
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
|
||||
ocfs2_error(sb,
|
||||
"Invalid dinode #%llu: OCFS2_VALID_FL not set\n",
|
||||
(unsigned long long)bh->b_blocknr);
|
||||
rc = ocfs2_error(sb,
|
||||
"Invalid dinode #%llu: OCFS2_VALID_FL not set\n",
|
||||
(unsigned long long)bh->b_blocknr);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(di->i_fs_generation) !=
|
||||
OCFS2_SB(sb)->fs_generation) {
|
||||
ocfs2_error(sb,
|
||||
"Invalid dinode #%llu: fs_generation is %u\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le32_to_cpu(di->i_fs_generation));
|
||||
rc = ocfs2_error(sb,
|
||||
"Invalid dinode #%llu: fs_generation is %u\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le32_to_cpu(di->i_fs_generation));
|
||||
goto bail;
|
||||
}
|
||||
|
||||
|
|
|
@ -81,8 +81,6 @@ struct ocfs2_inode_info
|
|||
tid_t i_sync_tid;
|
||||
tid_t i_datasync_tid;
|
||||
|
||||
wait_queue_head_t append_dio_wq;
|
||||
|
||||
struct dquot *i_dquot[MAXQUOTAS];
|
||||
};
|
||||
|
||||
|
|
|
@ -374,7 +374,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
|
|||
mlog_errno(PTR_ERR(handle));
|
||||
|
||||
if (is_journal_aborted(journal)) {
|
||||
ocfs2_abort(osb->sb, "Detected aborted journal");
|
||||
ocfs2_abort(osb->sb, "Detected aborted journal\n");
|
||||
handle = ERR_PTR(-EROFS);
|
||||
}
|
||||
} else {
|
||||
|
@ -668,7 +668,23 @@ static int __ocfs2_journal_access(handle_t *handle,
|
|||
mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n");
|
||||
mlog(ML_ERROR, "b_blocknr=%llu\n",
|
||||
(unsigned long long)bh->b_blocknr);
|
||||
BUG();
|
||||
|
||||
lock_buffer(bh);
|
||||
/*
|
||||
* A previous attempt to write this buffer head failed.
|
||||
* Nothing we can do but to retry the write and hope for
|
||||
* the best.
|
||||
*/
|
||||
if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) {
|
||||
clear_buffer_write_io_error(bh);
|
||||
set_buffer_uptodate(bh);
|
||||
}
|
||||
|
||||
if (!buffer_uptodate(bh)) {
|
||||
unlock_buffer(bh);
|
||||
return -EIO;
|
||||
}
|
||||
unlock_buffer(bh);
|
||||
}
|
||||
|
||||
/* Set the current transaction information on the ci so
|
||||
|
@ -2170,6 +2186,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
|
|||
iter = oi->ip_next_orphan;
|
||||
oi->ip_next_orphan = NULL;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
ret = ocfs2_rw_lock(inode, 1);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
|
@ -2193,7 +2210,9 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
|
|||
* ocfs2_delete_inode. */
|
||||
oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
|
||||
spin_unlock(&oi->ip_lock);
|
||||
} else if ((orphan_reco_type == ORPHAN_NEED_TRUNCATE) &&
|
||||
}
|
||||
|
||||
if ((orphan_reco_type == ORPHAN_NEED_TRUNCATE) &&
|
||||
(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
|
||||
ret = ocfs2_truncate_file(inode, di_bh,
|
||||
i_size_read(inode));
|
||||
|
@ -2206,17 +2225,16 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
|
|||
ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 0, 0);
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
||||
wake_up(&OCFS2_I(inode)->append_dio_wq);
|
||||
} /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */
|
||||
unlock_inode:
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
di_bh = NULL;
|
||||
unlock_rw:
|
||||
ocfs2_rw_unlock(inode, 1);
|
||||
next:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
iput(inode);
|
||||
brelse(di_bh);
|
||||
di_bh = NULL;
|
||||
inode = iter;
|
||||
}
|
||||
|
||||
|
|
|
@ -665,8 +665,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
|
|||
#ifdef CONFIG_OCFS2_DEBUG_FS
|
||||
if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
|
||||
ocfs2_local_alloc_count_bits(alloc)) {
|
||||
ocfs2_error(osb->sb, "local alloc inode %llu says it has "
|
||||
"%u used bits, but a count shows %u",
|
||||
ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n",
|
||||
(unsigned long long)le64_to_cpu(alloc->i_blkno),
|
||||
le32_to_cpu(alloc->id1.bitmap1.i_used),
|
||||
ocfs2_local_alloc_count_bits(alloc));
|
||||
|
|
|
@ -99,11 +99,9 @@ static int __ocfs2_move_extent(handle_t *handle,
|
|||
|
||||
index = ocfs2_search_extent_list(el, cpos);
|
||||
if (index == -1) {
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Inode %llu has an extent at cpos %u which can no "
|
||||
"longer be found.\n",
|
||||
(unsigned long long)ino, cpos);
|
||||
ret = -EROFS;
|
||||
ret = ocfs2_error(inode->i_sb,
|
||||
"Inode %llu has an extent at cpos %u which can no longer be found\n",
|
||||
(unsigned long long)ino, cpos);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
|
|
@ -1035,11 +1035,6 @@ leave:
|
|||
if (handle)
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
|
||||
if (child_locked)
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
|
||||
ocfs2_inode_unlock(dir, 1);
|
||||
|
||||
if (orphan_dir) {
|
||||
/* This was locked for us in ocfs2_prepare_orphan_dir() */
|
||||
ocfs2_inode_unlock(orphan_dir, 1);
|
||||
|
@ -1047,6 +1042,11 @@ leave:
|
|||
iput(orphan_dir);
|
||||
}
|
||||
|
||||
if (child_locked)
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
|
||||
ocfs2_inode_unlock(dir, 1);
|
||||
|
||||
brelse(fe_bh);
|
||||
brelse(parent_node_bh);
|
||||
|
||||
|
@ -1309,6 +1309,11 @@ static int ocfs2_rename(struct inode *old_dir,
|
|||
}
|
||||
parents_locked = 1;
|
||||
|
||||
if (!new_dir->i_nlink) {
|
||||
status = -EACCES;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* make sure both dirs have bhs
|
||||
* get an extra ref on old_dir_bh if old==new */
|
||||
if (!new_dir_bh) {
|
||||
|
@ -1569,12 +1574,25 @@ static int ocfs2_rename(struct inode *old_dir,
|
|||
status = ocfs2_find_entry(old_dentry->d_name.name,
|
||||
old_dentry->d_name.len, old_dir,
|
||||
&old_entry_lookup);
|
||||
if (status)
|
||||
if (status) {
|
||||
if (!is_journal_aborted(osb->journal->j_journal)) {
|
||||
ocfs2_error(osb->sb, "new entry %.*s is added, but old entry %.*s "
|
||||
"is not deleted.",
|
||||
new_dentry->d_name.len, new_dentry->d_name.name,
|
||||
old_dentry->d_name.len, old_dentry->d_name.name);
|
||||
}
|
||||
goto bail;
|
||||
}
|
||||
|
||||
status = ocfs2_delete_entry(handle, old_dir, &old_entry_lookup);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
if (!is_journal_aborted(osb->journal->j_journal)) {
|
||||
ocfs2_error(osb->sb, "new entry %.*s is added, but old entry %.*s "
|
||||
"is not deleted.",
|
||||
new_dentry->d_name.len, new_dentry->d_name.name,
|
||||
old_dentry->d_name.len, old_dentry->d_name.name);
|
||||
}
|
||||
goto bail;
|
||||
}
|
||||
|
||||
|
@ -1633,21 +1651,9 @@ static int ocfs2_rename(struct inode *old_dir,
|
|||
ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
|
||||
status = 0;
|
||||
bail:
|
||||
if (rename_lock)
|
||||
ocfs2_rename_unlock(osb);
|
||||
|
||||
if (handle)
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
|
||||
if (parents_locked)
|
||||
ocfs2_double_unlock(old_dir, new_dir);
|
||||
|
||||
if (old_child_locked)
|
||||
ocfs2_inode_unlock(old_inode, 1);
|
||||
|
||||
if (new_child_locked)
|
||||
ocfs2_inode_unlock(new_inode, 1);
|
||||
|
||||
if (orphan_dir) {
|
||||
/* This was locked for us in ocfs2_prepare_orphan_dir() */
|
||||
ocfs2_inode_unlock(orphan_dir, 1);
|
||||
|
@ -1655,6 +1661,18 @@ bail:
|
|||
iput(orphan_dir);
|
||||
}
|
||||
|
||||
if (new_child_locked)
|
||||
ocfs2_inode_unlock(new_inode, 1);
|
||||
|
||||
if (old_child_locked)
|
||||
ocfs2_inode_unlock(old_inode, 1);
|
||||
|
||||
if (parents_locked)
|
||||
ocfs2_double_unlock(old_dir, new_dir);
|
||||
|
||||
if (rename_lock)
|
||||
ocfs2_rename_unlock(osb);
|
||||
|
||||
if (new_inode)
|
||||
sync_mapping_buffers(old_inode->i_mapping);
|
||||
|
||||
|
@ -2601,27 +2619,6 @@ leave:
|
|||
return status;
|
||||
}
|
||||
|
||||
static int ocfs2_dio_orphan_recovered(struct inode *inode)
|
||||
{
|
||||
int ret;
|
||||
struct buffer_head *di_bh = NULL;
|
||||
struct ocfs2_dinode *di = NULL;
|
||||
|
||||
ret = ocfs2_inode_lock(inode, &di_bh, 1);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
di = (struct ocfs2_dinode *) di_bh->b_data;
|
||||
ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL));
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL 10000
|
||||
int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
|
||||
struct inode *inode)
|
||||
{
|
||||
|
@ -2633,7 +2630,6 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
|
|||
handle_t *handle = NULL;
|
||||
struct ocfs2_dinode *di = NULL;
|
||||
|
||||
restart:
|
||||
status = ocfs2_inode_lock(inode, &di_bh, 1);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
|
@ -2643,15 +2639,21 @@ restart:
|
|||
di = (struct ocfs2_dinode *) di_bh->b_data;
|
||||
/*
|
||||
* Another append dio crashed?
|
||||
* If so, wait for recovery first.
|
||||
* If so, manually recover it first.
|
||||
*/
|
||||
if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
wait_event_interruptible_timeout(OCFS2_I(inode)->append_dio_wq,
|
||||
ocfs2_dio_orphan_recovered(inode),
|
||||
msecs_to_jiffies(OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL));
|
||||
goto restart;
|
||||
status = ocfs2_truncate_file(inode, di_bh, i_size_read(inode));
|
||||
if (status < 0) {
|
||||
if (status != -ENOSPC)
|
||||
mlog_errno(status);
|
||||
goto bail_unlock_inode;
|
||||
}
|
||||
|
||||
status = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 0, 0);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail_unlock_inode;
|
||||
}
|
||||
}
|
||||
|
||||
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir_inode,
|
||||
|
|
|
@ -286,6 +286,8 @@ enum ocfs2_mount_options
|
|||
OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
|
||||
|
||||
OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
|
||||
OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
|
||||
OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
|
||||
};
|
||||
|
||||
#define OCFS2_OSB_SOFT_RO 0x0001
|
||||
|
|
|
@ -138,8 +138,7 @@ static int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
|
|||
|
||||
if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) {
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Quota file %llu is probably corrupted! Requested "
|
||||
"to read block %Lu but file has size only %Lu\n",
|
||||
"Quota file %llu is probably corrupted! Requested to read block %Lu but file has size only %Lu\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
(unsigned long long)v_block,
|
||||
(unsigned long long)i_size_read(inode));
|
||||
|
|
|
@ -102,32 +102,30 @@ static int ocfs2_validate_refcount_block(struct super_block *sb,
|
|||
|
||||
|
||||
if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
|
||||
ocfs2_error(sb,
|
||||
"Refcount block #%llu has bad signature %.*s",
|
||||
(unsigned long long)bh->b_blocknr, 7,
|
||||
rb->rf_signature);
|
||||
return -EINVAL;
|
||||
rc = ocfs2_error(sb,
|
||||
"Refcount block #%llu has bad signature %.*s\n",
|
||||
(unsigned long long)bh->b_blocknr, 7,
|
||||
rb->rf_signature);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
|
||||
ocfs2_error(sb,
|
||||
"Refcount block #%llu has an invalid rf_blkno "
|
||||
"of %llu",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(rb->rf_blkno));
|
||||
return -EINVAL;
|
||||
rc = ocfs2_error(sb,
|
||||
"Refcount block #%llu has an invalid rf_blkno of %llu\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(rb->rf_blkno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
|
||||
ocfs2_error(sb,
|
||||
"Refcount block #%llu has an invalid "
|
||||
"rf_fs_generation of #%u",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le32_to_cpu(rb->rf_fs_generation));
|
||||
return -EINVAL;
|
||||
rc = ocfs2_error(sb,
|
||||
"Refcount block #%llu has an invalid rf_fs_generation of #%u\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le32_to_cpu(rb->rf_fs_generation));
|
||||
goto out;
|
||||
}
|
||||
|
||||
return 0;
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
|
||||
|
@ -1102,12 +1100,10 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
|
|||
el = &eb->h_list;
|
||||
|
||||
if (el->l_tree_depth) {
|
||||
ocfs2_error(sb,
|
||||
"refcount tree %llu has non zero tree "
|
||||
"depth in leaf btree tree block %llu\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(ci),
|
||||
(unsigned long long)eb_bh->b_blocknr);
|
||||
ret = -EROFS;
|
||||
ret = ocfs2_error(sb,
|
||||
"refcount tree %llu has non zero tree depth in leaf btree tree block %llu\n",
|
||||
(unsigned long long)ocfs2_metadata_cache_owner(ci),
|
||||
(unsigned long long)eb_bh->b_blocknr);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
@ -2359,10 +2355,8 @@ static int ocfs2_mark_extent_refcounted(struct inode *inode,
|
|||
cpos, len, phys);
|
||||
|
||||
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
|
||||
ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
|
||||
"tree, but the feature bit is not set in the "
|
||||
"super block.", inode->i_ino);
|
||||
ret = -EROFS;
|
||||
ret = ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
|
||||
inode->i_ino);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -2545,10 +2539,8 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
|
|||
u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
|
||||
|
||||
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
|
||||
ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
|
||||
"tree, but the feature bit is not set in the "
|
||||
"super block.", inode->i_ino);
|
||||
ret = -EROFS;
|
||||
ret = ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
|
||||
inode->i_ino);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -2672,11 +2664,10 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
|
|||
el = &eb->h_list;
|
||||
|
||||
if (el->l_tree_depth) {
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has non zero tree depth in "
|
||||
"leaf block %llu\n", inode->i_ino,
|
||||
(unsigned long long)eb_bh->b_blocknr);
|
||||
ret = -EROFS;
|
||||
ret = ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has non zero tree depth in leaf block %llu\n",
|
||||
inode->i_ino,
|
||||
(unsigned long long)eb_bh->b_blocknr);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
@ -3106,11 +3097,9 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
|
|||
|
||||
index = ocfs2_search_extent_list(el, cpos);
|
||||
if (index == -1) {
|
||||
ocfs2_error(sb,
|
||||
"Inode %llu has an extent at cpos %u which can no "
|
||||
"longer be found.\n",
|
||||
(unsigned long long)ino, cpos);
|
||||
ret = -EROFS;
|
||||
ret = ocfs2_error(sb,
|
||||
"Inode %llu has an extent at cpos %u which can no longer be found\n",
|
||||
(unsigned long long)ino, cpos);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -3376,10 +3365,8 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
|
|||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
|
||||
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
|
||||
ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
|
||||
"tree, but the feature bit is not set in the "
|
||||
"super block.", inode->i_ino);
|
||||
return -EROFS;
|
||||
return ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
|
||||
inode->i_ino);
|
||||
}
|
||||
|
||||
ocfs2_init_dealloc_ctxt(&context->dealloc);
|
||||
|
|
|
@ -149,10 +149,8 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
|
|||
brelse(ac->ac_bh);
|
||||
ac->ac_bh = NULL;
|
||||
ac->ac_resv = NULL;
|
||||
if (ac->ac_find_loc_priv) {
|
||||
kfree(ac->ac_find_loc_priv);
|
||||
ac->ac_find_loc_priv = NULL;
|
||||
}
|
||||
kfree(ac->ac_find_loc_priv);
|
||||
ac->ac_find_loc_priv = NULL;
|
||||
}
|
||||
|
||||
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
|
||||
|
@ -167,12 +165,12 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
|
|||
}
|
||||
|
||||
#define do_error(fmt, ...) \
|
||||
do{ \
|
||||
if (resize) \
|
||||
mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \
|
||||
else \
|
||||
ocfs2_error(sb, fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
do { \
|
||||
if (resize) \
|
||||
mlog(ML_ERROR, fmt, ##__VA_ARGS__); \
|
||||
else \
|
||||
return ocfs2_error(sb, fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
static int ocfs2_validate_gd_self(struct super_block *sb,
|
||||
struct buffer_head *bh,
|
||||
|
@ -181,44 +179,35 @@ static int ocfs2_validate_gd_self(struct super_block *sb,
|
|||
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
|
||||
|
||||
if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
|
||||
do_error("Group descriptor #%llu has bad signature %.*s",
|
||||
do_error("Group descriptor #%llu has bad signature %.*s\n",
|
||||
(unsigned long long)bh->b_blocknr, 7,
|
||||
gd->bg_signature);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
|
||||
do_error("Group descriptor #%llu has an invalid bg_blkno "
|
||||
"of %llu",
|
||||
do_error("Group descriptor #%llu has an invalid bg_blkno of %llu\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(gd->bg_blkno));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
|
||||
do_error("Group descriptor #%llu has an invalid "
|
||||
"fs_generation of #%u",
|
||||
do_error("Group descriptor #%llu has an invalid fs_generation of #%u\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le32_to_cpu(gd->bg_generation));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
|
||||
do_error("Group descriptor #%llu has bit count %u but "
|
||||
"claims that %u are free",
|
||||
do_error("Group descriptor #%llu has bit count %u but claims that %u are free\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le16_to_cpu(gd->bg_bits),
|
||||
le16_to_cpu(gd->bg_free_bits_count));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
|
||||
do_error("Group descriptor #%llu has bit count %u but "
|
||||
"max bitmap bits of %u",
|
||||
do_error("Group descriptor #%llu has bit count %u but max bitmap bits of %u\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le16_to_cpu(gd->bg_bits),
|
||||
8 * le16_to_cpu(gd->bg_size));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -233,20 +222,17 @@ static int ocfs2_validate_gd_parent(struct super_block *sb,
|
|||
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
|
||||
|
||||
if (di->i_blkno != gd->bg_parent_dinode) {
|
||||
do_error("Group descriptor #%llu has bad parent "
|
||||
"pointer (%llu, expected %llu)",
|
||||
do_error("Group descriptor #%llu has bad parent pointer (%llu, expected %llu)\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
|
||||
(unsigned long long)le64_to_cpu(di->i_blkno));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
|
||||
if (le16_to_cpu(gd->bg_bits) > max_bits) {
|
||||
do_error("Group descriptor #%llu has bit count of %u",
|
||||
do_error("Group descriptor #%llu has bit count of %u\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le16_to_cpu(gd->bg_bits));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* In resize, we may meet the case bg_chain == cl_next_free_rec. */
|
||||
|
@ -254,10 +240,9 @@ static int ocfs2_validate_gd_parent(struct super_block *sb,
|
|||
le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) ||
|
||||
((le16_to_cpu(gd->bg_chain) ==
|
||||
le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) {
|
||||
do_error("Group descriptor #%llu has bad chain %u",
|
||||
do_error("Group descriptor #%llu has bad chain %u\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le16_to_cpu(gd->bg_chain));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -384,11 +369,10 @@ static int ocfs2_block_group_fill(handle_t *handle,
|
|||
struct super_block * sb = alloc_inode->i_sb;
|
||||
|
||||
if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
|
||||
ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
|
||||
"b_blocknr (%llu)",
|
||||
(unsigned long long)group_blkno,
|
||||
(unsigned long long) bg_bh->b_blocknr);
|
||||
status = -EIO;
|
||||
status = ocfs2_error(alloc_inode->i_sb,
|
||||
"group block (%llu) != b_blocknr (%llu)\n",
|
||||
(unsigned long long)group_blkno,
|
||||
(unsigned long long) bg_bh->b_blocknr);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
|
@ -834,9 +818,9 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
|
|||
BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
|
||||
|
||||
if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
|
||||
ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
|
||||
(unsigned long long)le64_to_cpu(fe->i_blkno));
|
||||
status = -EIO;
|
||||
status = ocfs2_error(alloc_inode->i_sb,
|
||||
"Invalid chain allocator %llu\n",
|
||||
(unsigned long long)le64_to_cpu(fe->i_blkno));
|
||||
goto bail;
|
||||
}
|
||||
|
||||
|
@ -1370,12 +1354,11 @@ int ocfs2_block_group_set_bits(handle_t *handle,
|
|||
|
||||
le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
|
||||
if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
|
||||
ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
|
||||
" count %u but claims %u are freed. num_bits %d",
|
||||
(unsigned long long)le64_to_cpu(bg->bg_blkno),
|
||||
le16_to_cpu(bg->bg_bits),
|
||||
le16_to_cpu(bg->bg_free_bits_count), num_bits);
|
||||
return -EROFS;
|
||||
return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
|
||||
(unsigned long long)le64_to_cpu(bg->bg_blkno),
|
||||
le16_to_cpu(bg->bg_bits),
|
||||
le16_to_cpu(bg->bg_free_bits_count),
|
||||
num_bits);
|
||||
}
|
||||
while(num_bits--)
|
||||
ocfs2_set_bit(bit_off++, bitmap);
|
||||
|
@ -1905,13 +1888,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
|
|||
|
||||
if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
|
||||
le32_to_cpu(fe->id1.bitmap1.i_total)) {
|
||||
ocfs2_error(ac->ac_inode->i_sb,
|
||||
"Chain allocator dinode %llu has %u used "
|
||||
"bits but only %u total.",
|
||||
(unsigned long long)le64_to_cpu(fe->i_blkno),
|
||||
le32_to_cpu(fe->id1.bitmap1.i_used),
|
||||
le32_to_cpu(fe->id1.bitmap1.i_total));
|
||||
status = -EIO;
|
||||
status = ocfs2_error(ac->ac_inode->i_sb,
|
||||
"Chain allocator dinode %llu has %u used bits but only %u total\n",
|
||||
(unsigned long long)le64_to_cpu(fe->i_blkno),
|
||||
le32_to_cpu(fe->id1.bitmap1.i_used),
|
||||
le32_to_cpu(fe->id1.bitmap1.i_total));
|
||||
goto bail;
|
||||
}
|
||||
|
||||
|
@ -2429,12 +2410,11 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
|
|||
}
|
||||
le16_add_cpu(&bg->bg_free_bits_count, num_bits);
|
||||
if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
|
||||
ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
|
||||
" count %u but claims %u are freed. num_bits %d",
|
||||
(unsigned long long)le64_to_cpu(bg->bg_blkno),
|
||||
le16_to_cpu(bg->bg_bits),
|
||||
le16_to_cpu(bg->bg_free_bits_count), num_bits);
|
||||
return -EROFS;
|
||||
return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
|
||||
(unsigned long long)le64_to_cpu(bg->bg_blkno),
|
||||
le16_to_cpu(bg->bg_bits),
|
||||
le16_to_cpu(bg->bg_free_bits_count),
|
||||
num_bits);
|
||||
}
|
||||
|
||||
if (undo_fn)
|
||||
|
|
|
@ -192,6 +192,7 @@ enum {
|
|||
Opt_resv_level,
|
||||
Opt_dir_resv_level,
|
||||
Opt_journal_async_commit,
|
||||
Opt_err_cont,
|
||||
Opt_err,
|
||||
};
|
||||
|
||||
|
@ -224,6 +225,7 @@ static const match_table_t tokens = {
|
|||
{Opt_resv_level, "resv_level=%u"},
|
||||
{Opt_dir_resv_level, "dir_resv_level=%u"},
|
||||
{Opt_journal_async_commit, "journal_async_commit"},
|
||||
{Opt_err_cont, "errors=continue"},
|
||||
{Opt_err, NULL}
|
||||
};
|
||||
|
||||
|
@ -1330,10 +1332,19 @@ static int ocfs2_parse_options(struct super_block *sb,
|
|||
mopt->mount_opt |= OCFS2_MOUNT_NOINTR;
|
||||
break;
|
||||
case Opt_err_panic:
|
||||
mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT;
|
||||
mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS;
|
||||
mopt->mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
|
||||
break;
|
||||
case Opt_err_ro:
|
||||
mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT;
|
||||
mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
|
||||
mopt->mount_opt |= OCFS2_MOUNT_ERRORS_ROFS;
|
||||
break;
|
||||
case Opt_err_cont:
|
||||
mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS;
|
||||
mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
|
||||
mopt->mount_opt |= OCFS2_MOUNT_ERRORS_CONT;
|
||||
break;
|
||||
case Opt_data_ordered:
|
||||
mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
|
||||
|
@ -1530,6 +1541,8 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
|
|||
|
||||
if (opts & OCFS2_MOUNT_ERRORS_PANIC)
|
||||
seq_printf(s, ",errors=panic");
|
||||
else if (opts & OCFS2_MOUNT_ERRORS_CONT)
|
||||
seq_printf(s, ",errors=continue");
|
||||
else
|
||||
seq_printf(s, ",errors=remount-ro");
|
||||
|
||||
|
@ -1550,8 +1563,8 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
|
|||
seq_printf(s, ",localflocks,");
|
||||
|
||||
if (osb->osb_cluster_stack[0])
|
||||
seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
|
||||
osb->osb_cluster_stack);
|
||||
seq_show_option_n(s, "cluster_stack", osb->osb_cluster_stack,
|
||||
OCFS2_STACK_LABEL_LEN);
|
||||
if (opts & OCFS2_MOUNT_USRQUOTA)
|
||||
seq_printf(s, ",usrquota");
|
||||
if (opts & OCFS2_MOUNT_GRPQUOTA)
|
||||
|
@ -1746,8 +1759,6 @@ static void ocfs2_inode_init_once(void *data)
|
|||
ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
|
||||
ocfs2_lock_res_init_once(&oi->ip_open_lockres);
|
||||
|
||||
init_waitqueue_head(&oi->append_dio_wq);
|
||||
|
||||
ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
|
||||
&ocfs2_inode_caching_ops);
|
||||
|
||||
|
@ -2541,31 +2552,43 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
|
|||
memset(osb, 0, sizeof(struct ocfs2_super));
|
||||
}
|
||||
|
||||
/* Put OCFS2 into a readonly state, or (if the user specifies it),
|
||||
* panic(). We do not support continue-on-error operation. */
|
||||
static void ocfs2_handle_error(struct super_block *sb)
|
||||
/* Depending on the mount option passed, perform one of the following:
|
||||
* Put OCFS2 into a readonly state (default)
|
||||
* Return EIO so that only the process errs
|
||||
* Fix the error as if fsck.ocfs2 -y
|
||||
* panic
|
||||
*/
|
||||
static int ocfs2_handle_error(struct super_block *sb)
|
||||
{
|
||||
struct ocfs2_super *osb = OCFS2_SB(sb);
|
||||
|
||||
if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_PANIC)
|
||||
panic("OCFS2: (device %s): panic forced after error\n",
|
||||
sb->s_id);
|
||||
int rv = 0;
|
||||
|
||||
ocfs2_set_osb_flag(osb, OCFS2_OSB_ERROR_FS);
|
||||
pr_crit("On-disk corruption discovered. "
|
||||
"Please run fsck.ocfs2 once the filesystem is unmounted.\n");
|
||||
|
||||
if (sb->s_flags & MS_RDONLY &&
|
||||
(ocfs2_is_soft_readonly(osb) ||
|
||||
ocfs2_is_hard_readonly(osb)))
|
||||
return;
|
||||
if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_PANIC) {
|
||||
panic("OCFS2: (device %s): panic forced after error\n",
|
||||
sb->s_id);
|
||||
} else if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_CONT) {
|
||||
pr_crit("OCFS2: Returning error to the calling process.\n");
|
||||
rv = -EIO;
|
||||
} else { /* default option */
|
||||
rv = -EROFS;
|
||||
if (sb->s_flags & MS_RDONLY &&
|
||||
(ocfs2_is_soft_readonly(osb) ||
|
||||
ocfs2_is_hard_readonly(osb)))
|
||||
return rv;
|
||||
|
||||
printk(KERN_CRIT "File system is now read-only due to the potential "
|
||||
"of on-disk corruption. Please run fsck.ocfs2 once the file "
|
||||
"system is unmounted.\n");
|
||||
sb->s_flags |= MS_RDONLY;
|
||||
ocfs2_set_ro_flag(osb, 0);
|
||||
pr_crit("OCFS2: File system is now read-only.\n");
|
||||
sb->s_flags |= MS_RDONLY;
|
||||
ocfs2_set_ro_flag(osb, 0);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
void __ocfs2_error(struct super_block *sb, const char *function,
|
||||
int __ocfs2_error(struct super_block *sb, const char *function,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct va_format vaf;
|
||||
|
@ -2577,12 +2600,12 @@ void __ocfs2_error(struct super_block *sb, const char *function,
|
|||
|
||||
/* Not using mlog here because we want to show the actual
|
||||
* function the error came from. */
|
||||
printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n",
|
||||
printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV",
|
||||
sb->s_id, function, &vaf);
|
||||
|
||||
va_end(args);
|
||||
|
||||
ocfs2_handle_error(sb);
|
||||
return ocfs2_handle_error(sb);
|
||||
}
|
||||
|
||||
/* Handle critical errors. This is intentionally more drastic than
|
||||
|
@ -2599,7 +2622,7 @@ void __ocfs2_abort(struct super_block *sb, const char *function,
|
|||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
|
||||
printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n",
|
||||
printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV",
|
||||
sb->s_id, function, &vaf);
|
||||
|
||||
va_end(args);
|
||||
|
|
|
@ -32,16 +32,18 @@ int ocfs2_publish_get_mount_state(struct ocfs2_super *osb,
|
|||
int node_num);
|
||||
|
||||
__printf(3, 4)
|
||||
void __ocfs2_error(struct super_block *sb, const char *function,
|
||||
int __ocfs2_error(struct super_block *sb, const char *function,
|
||||
const char *fmt, ...);
|
||||
|
||||
#define ocfs2_error(sb, fmt, args...) __ocfs2_error(sb, __PRETTY_FUNCTION__, fmt, ##args)
|
||||
#define ocfs2_error(sb, fmt, ...) \
|
||||
__ocfs2_error(sb, __PRETTY_FUNCTION__, fmt, ##__VA_ARGS__)
|
||||
|
||||
__printf(3, 4)
|
||||
void __ocfs2_abort(struct super_block *sb, const char *function,
|
||||
const char *fmt, ...);
|
||||
|
||||
#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
|
||||
#define ocfs2_abort(sb, fmt, ...) \
|
||||
__ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##__VA_ARGS__)
|
||||
|
||||
/*
|
||||
* Void signal blockers, because in-kernel sigprocmask() only fails
|
||||
|
|
|
@ -499,30 +499,24 @@ static int ocfs2_validate_xattr_block(struct super_block *sb,
|
|||
*/
|
||||
|
||||
if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
|
||||
ocfs2_error(sb,
|
||||
"Extended attribute block #%llu has bad "
|
||||
"signature %.*s",
|
||||
(unsigned long long)bh->b_blocknr, 7,
|
||||
xb->xb_signature);
|
||||
return -EINVAL;
|
||||
return ocfs2_error(sb,
|
||||
"Extended attribute block #%llu has bad signature %.*s\n",
|
||||
(unsigned long long)bh->b_blocknr, 7,
|
||||
xb->xb_signature);
|
||||
}
|
||||
|
||||
if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
|
||||
ocfs2_error(sb,
|
||||
"Extended attribute block #%llu has an "
|
||||
"invalid xb_blkno of %llu",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(xb->xb_blkno));
|
||||
return -EINVAL;
|
||||
return ocfs2_error(sb,
|
||||
"Extended attribute block #%llu has an invalid xb_blkno of %llu\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
(unsigned long long)le64_to_cpu(xb->xb_blkno));
|
||||
}
|
||||
|
||||
if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
|
||||
ocfs2_error(sb,
|
||||
"Extended attribute block #%llu has an invalid "
|
||||
"xb_fs_generation of #%u",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le32_to_cpu(xb->xb_fs_generation));
|
||||
return -EINVAL;
|
||||
return ocfs2_error(sb,
|
||||
"Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le32_to_cpu(xb->xb_fs_generation));
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -3694,11 +3688,10 @@ static int ocfs2_xattr_get_rec(struct inode *inode,
|
|||
el = &eb->h_list;
|
||||
|
||||
if (el->l_tree_depth) {
|
||||
ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has non zero tree depth in "
|
||||
"xattr tree block %llu\n", inode->i_ino,
|
||||
(unsigned long long)eb_bh->b_blocknr);
|
||||
ret = -EROFS;
|
||||
ret = ocfs2_error(inode->i_sb,
|
||||
"Inode %lu has non zero tree depth in xattr tree block %llu\n",
|
||||
inode->i_ino,
|
||||
(unsigned long long)eb_bh->b_blocknr);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
@ -3713,11 +3706,10 @@ static int ocfs2_xattr_get_rec(struct inode *inode,
|
|||
}
|
||||
|
||||
if (!e_blkno) {
|
||||
ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
|
||||
"record (%u, %u, 0) in xattr", inode->i_ino,
|
||||
le32_to_cpu(rec->e_cpos),
|
||||
ocfs2_rec_clusters(el, rec));
|
||||
ret = -EROFS;
|
||||
ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
|
||||
inode->i_ino,
|
||||
le32_to_cpu(rec->e_cpos),
|
||||
ocfs2_rec_clusters(el, rec));
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -7334,6 +7326,9 @@ static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
|
|||
const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
|
||||
const size_t total_len = prefix_len + name_len + 1;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return 0;
|
||||
|
||||
if (list && total_len <= list_size) {
|
||||
memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
|
||||
memcpy(list + prefix_len, name, name_len);
|
||||
|
|
|
@ -588,10 +588,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
|
|||
struct super_block *sb = dentry->d_sb;
|
||||
struct ovl_fs *ufs = sb->s_fs_info;
|
||||
|
||||
seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir);
|
||||
seq_show_option(m, "lowerdir", ufs->config.lowerdir);
|
||||
if (ufs->config.upperdir) {
|
||||
seq_printf(m, ",upperdir=%s", ufs->config.upperdir);
|
||||
seq_printf(m, ",workdir=%s", ufs->config.workdir);
|
||||
seq_show_option(m, "upperdir", ufs->config.upperdir);
|
||||
seq_show_option(m, "workdir", ufs->config.workdir);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -308,7 +308,8 @@ static void render_cap_t(struct seq_file *m, const char *header,
|
|||
static inline void task_cap(struct seq_file *m, struct task_struct *p)
|
||||
{
|
||||
const struct cred *cred;
|
||||
kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset;
|
||||
kernel_cap_t cap_inheritable, cap_permitted, cap_effective,
|
||||
cap_bset, cap_ambient;
|
||||
|
||||
rcu_read_lock();
|
||||
cred = __task_cred(p);
|
||||
|
@ -316,12 +317,14 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
|
|||
cap_permitted = cred->cap_permitted;
|
||||
cap_effective = cred->cap_effective;
|
||||
cap_bset = cred->cap_bset;
|
||||
cap_ambient = cred->cap_ambient;
|
||||
rcu_read_unlock();
|
||||
|
||||
render_cap_t(m, "CapInh:\t", &cap_inheritable);
|
||||
render_cap_t(m, "CapPrm:\t", &cap_permitted);
|
||||
render_cap_t(m, "CapEff:\t", &cap_effective);
|
||||
render_cap_t(m, "CapBnd:\t", &cap_bset);
|
||||
render_cap_t(m, "CapAmb:\t", &cap_ambient);
|
||||
}
|
||||
|
||||
static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
|
||||
|
|
|
@ -597,6 +597,8 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
|
|||
[ilog2(VM_HUGEPAGE)] = "hg",
|
||||
[ilog2(VM_NOHUGEPAGE)] = "nh",
|
||||
[ilog2(VM_MERGEABLE)] = "mg",
|
||||
[ilog2(VM_UFFD_MISSING)]= "um",
|
||||
[ilog2(VM_UFFD_WP)] = "uw",
|
||||
};
|
||||
size_t i;
|
||||
|
||||
|
|
|
@ -714,18 +714,20 @@ static int reiserfs_show_options(struct seq_file *seq, struct dentry *root)
|
|||
seq_puts(seq, ",acl");
|
||||
|
||||
if (REISERFS_SB(s)->s_jdev)
|
||||
seq_printf(seq, ",jdev=%s", REISERFS_SB(s)->s_jdev);
|
||||
seq_show_option(seq, "jdev", REISERFS_SB(s)->s_jdev);
|
||||
|
||||
if (journal->j_max_commit_age != journal->j_default_max_commit_age)
|
||||
seq_printf(seq, ",commit=%d", journal->j_max_commit_age);
|
||||
|
||||
#ifdef CONFIG_QUOTA
|
||||
if (REISERFS_SB(s)->s_qf_names[USRQUOTA])
|
||||
seq_printf(seq, ",usrjquota=%s", REISERFS_SB(s)->s_qf_names[USRQUOTA]);
|
||||
seq_show_option(seq, "usrjquota",
|
||||
REISERFS_SB(s)->s_qf_names[USRQUOTA]);
|
||||
else if (opts & (1 << REISERFS_USRQUOTA))
|
||||
seq_puts(seq, ",usrquota");
|
||||
if (REISERFS_SB(s)->s_qf_names[GRPQUOTA])
|
||||
seq_printf(seq, ",grpjquota=%s", REISERFS_SB(s)->s_qf_names[GRPQUOTA]);
|
||||
seq_show_option(seq, "grpjquota",
|
||||
REISERFS_SB(s)->s_qf_names[GRPQUOTA]);
|
||||
else if (opts & (1 << REISERFS_GRPQUOTA))
|
||||
seq_puts(seq, ",grpquota");
|
||||
if (REISERFS_SB(s)->s_jquota_fmt) {
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -511,9 +511,9 @@ xfs_showargs(
|
|||
seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
|
||||
|
||||
if (mp->m_logname)
|
||||
seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
|
||||
seq_show_option(m, MNTOPT_LOGDEV, mp->m_logname);
|
||||
if (mp->m_rtname)
|
||||
seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
|
||||
seq_show_option(m, MNTOPT_RTDEV, mp->m_rtname);
|
||||
|
||||
if (mp->m_dalign > 0)
|
||||
seq_printf(m, "," MNTOPT_SUNIT "=%d",
|
||||
|
|
|
@ -137,6 +137,7 @@ struct cred {
|
|||
kernel_cap_t cap_permitted; /* caps we're permitted */
|
||||
kernel_cap_t cap_effective; /* caps we can actually use */
|
||||
kernel_cap_t cap_bset; /* capability bounding set */
|
||||
kernel_cap_t cap_ambient; /* Ambient capability set */
|
||||
#ifdef CONFIG_KEYS
|
||||
unsigned char jit_keyring; /* default keyring to attach requested
|
||||
* keys to */
|
||||
|
@ -212,6 +213,13 @@ static inline void validate_process_creds(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline bool cap_ambient_invariant_ok(const struct cred *cred)
|
||||
{
|
||||
return cap_issubset(cred->cap_ambient,
|
||||
cap_intersect(cred->cap_permitted,
|
||||
cred->cap_inheritable));
|
||||
}
|
||||
|
||||
/**
|
||||
* get_new_cred - Get a reference on a new set of credentials
|
||||
* @cred: The new credentials to reference
|
||||
|
|
|
@ -1612,7 +1612,6 @@ struct file_operations {
|
|||
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
|
||||
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
|
||||
int (*mmap) (struct file *, struct vm_area_struct *);
|
||||
int (*mremap)(struct file *, struct vm_area_struct *);
|
||||
int (*open) (struct inode *, struct file *);
|
||||
int (*flush) (struct file *, fl_owner_t id);
|
||||
int (*release) (struct inode *, struct file *);
|
||||
|
|
|
@ -195,40 +195,49 @@ struct fsnotify_group {
|
|||
#define FSNOTIFY_EVENT_INODE 2
|
||||
|
||||
/*
|
||||
* a mark is simply an object attached to an in core inode which allows an
|
||||
* A mark is simply an object attached to an in core inode which allows an
|
||||
* fsnotify listener to indicate they are either no longer interested in events
|
||||
* of a type matching mask or only interested in those events.
|
||||
*
|
||||
* these are flushed when an inode is evicted from core and may be flushed
|
||||
* when the inode is modified (as seen by fsnotify_access). Some fsnotify users
|
||||
* (such as dnotify) will flush these when the open fd is closed and not at
|
||||
* inode eviction or modification.
|
||||
* These are flushed when an inode is evicted from core and may be flushed
|
||||
* when the inode is modified (as seen by fsnotify_access). Some fsnotify
|
||||
* users (such as dnotify) will flush these when the open fd is closed and not
|
||||
* at inode eviction or modification.
|
||||
*
|
||||
* Text in brackets is showing the lock(s) protecting modifications of a
|
||||
* particular entry. obj_lock means either inode->i_lock or
|
||||
* mnt->mnt_root->d_lock depending on the mark type.
|
||||
*/
|
||||
struct fsnotify_mark {
|
||||
__u32 mask; /* mask this mark is for */
|
||||
/* we hold ref for each i_list and g_list. also one ref for each 'thing'
|
||||
/* Mask this mark is for [mark->lock, group->mark_mutex] */
|
||||
__u32 mask;
|
||||
/* We hold one for presence in g_list. Also one ref for each 'thing'
|
||||
* in kernel that found and may be using this mark. */
|
||||
atomic_t refcnt; /* active things looking at this mark */
|
||||
struct fsnotify_group *group; /* group this mark is for */
|
||||
struct list_head g_list; /* list of marks by group->i_fsnotify_marks
|
||||
* Also reused for queueing mark into
|
||||
* destroy_list when it's waiting for
|
||||
* the end of SRCU period before it can
|
||||
* be freed */
|
||||
spinlock_t lock; /* protect group and inode */
|
||||
struct hlist_node obj_list; /* list of marks for inode / vfsmount */
|
||||
struct list_head free_list; /* tmp list used when freeing this mark */
|
||||
union {
|
||||
atomic_t refcnt;
|
||||
/* Group this mark is for. Set on mark creation, stable until last ref
|
||||
* is dropped */
|
||||
struct fsnotify_group *group;
|
||||
/* List of marks by group->i_fsnotify_marks. Also reused for queueing
|
||||
* mark into destroy_list when it's waiting for the end of SRCU period
|
||||
* before it can be freed. [group->mark_mutex] */
|
||||
struct list_head g_list;
|
||||
/* Protects inode / mnt pointers, flags, masks */
|
||||
spinlock_t lock;
|
||||
/* List of marks for inode / vfsmount [obj_lock] */
|
||||
struct hlist_node obj_list;
|
||||
union { /* Object pointer [mark->lock, group->mark_mutex] */
|
||||
struct inode *inode; /* inode this mark is associated with */
|
||||
struct vfsmount *mnt; /* vfsmount this mark is associated with */
|
||||
};
|
||||
__u32 ignored_mask; /* events types to ignore */
|
||||
/* Events types to ignore [mark->lock, group->mark_mutex] */
|
||||
__u32 ignored_mask;
|
||||
#define FSNOTIFY_MARK_FLAG_INODE 0x01
|
||||
#define FSNOTIFY_MARK_FLAG_VFSMOUNT 0x02
|
||||
#define FSNOTIFY_MARK_FLAG_OBJECT_PINNED 0x04
|
||||
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08
|
||||
#define FSNOTIFY_MARK_FLAG_ALIVE 0x10
|
||||
unsigned int flags; /* vfsmount or inode mark? */
|
||||
#define FSNOTIFY_MARK_FLAG_ATTACHED 0x20
|
||||
unsigned int flags; /* flags [mark->lock] */
|
||||
void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */
|
||||
};
|
||||
|
||||
|
@ -345,8 +354,10 @@ extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_
|
|||
/* given a group and a mark, flag mark to be freed when all references are dropped */
|
||||
extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
|
||||
struct fsnotify_group *group);
|
||||
extern void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
|
||||
struct fsnotify_group *group);
|
||||
/* detach mark from inode / mount list, group list, drop inode reference */
|
||||
extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
|
||||
/* free mark */
|
||||
extern void fsnotify_free_mark(struct fsnotify_mark *mark);
|
||||
/* run all the marks in a group, and clear all of the vfsmount marks */
|
||||
extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group);
|
||||
/* run all the marks in a group, and clear all of the inode marks */
|
||||
|
|
|
@ -59,6 +59,8 @@ struct gen_pool {
|
|||
|
||||
genpool_algo_t algo; /* allocation function */
|
||||
void *data;
|
||||
|
||||
const char *name;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -118,8 +120,8 @@ extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size,
|
|||
unsigned long start, unsigned int nr, void *data);
|
||||
|
||||
extern struct gen_pool *devm_gen_pool_create(struct device *dev,
|
||||
int min_alloc_order, int nid);
|
||||
extern struct gen_pool *gen_pool_get(struct device *dev);
|
||||
int min_alloc_order, int nid, const char *name);
|
||||
extern struct gen_pool *gen_pool_get(struct device *dev, const char *name);
|
||||
|
||||
bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
|
||||
size_t size);
|
||||
|
|
|
@ -11,7 +11,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
|
|||
const char namefmt[], ...);
|
||||
|
||||
#define kthread_create(threadfn, data, namefmt, arg...) \
|
||||
kthread_create_on_node(threadfn, data, -1, namefmt, ##arg)
|
||||
kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg)
|
||||
|
||||
|
||||
struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
|
||||
|
|
|
@ -124,8 +124,10 @@ extern unsigned int kobjsize(const void *objp);
|
|||
#define VM_MAYSHARE 0x00000080
|
||||
|
||||
#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
|
||||
#define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */
|
||||
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
|
||||
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
|
||||
#define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */
|
||||
|
||||
#define VM_LOCKED 0x00002000
|
||||
#define VM_IO 0x00004000 /* Memory mapped I/O or similar */
|
||||
|
@ -245,6 +247,7 @@ struct vm_fault {
|
|||
struct vm_operations_struct {
|
||||
void (*open)(struct vm_area_struct * area);
|
||||
void (*close)(struct vm_area_struct * area);
|
||||
int (*mremap)(struct vm_area_struct * area);
|
||||
int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
|
||||
void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);
|
||||
|
||||
|
@ -1833,7 +1836,7 @@ extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
|||
extern struct vm_area_struct *vma_merge(struct mm_struct *,
|
||||
struct vm_area_struct *prev, unsigned long addr, unsigned long end,
|
||||
unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
|
||||
struct mempolicy *);
|
||||
struct mempolicy *, struct vm_userfaultfd_ctx);
|
||||
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
|
||||
extern int split_vma(struct mm_struct *,
|
||||
struct vm_area_struct *, unsigned long addr, int new_below);
|
||||
|
|
|
@ -256,6 +256,16 @@ struct vm_region {
|
|||
* this region */
|
||||
};
|
||||
|
||||
#ifdef CONFIG_USERFAULTFD
|
||||
#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, })
|
||||
struct vm_userfaultfd_ctx {
|
||||
struct userfaultfd_ctx *ctx;
|
||||
};
|
||||
#else /* CONFIG_USERFAULTFD */
|
||||
#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {})
|
||||
struct vm_userfaultfd_ctx {};
|
||||
#endif /* CONFIG_USERFAULTFD */
|
||||
|
||||
/*
|
||||
* This struct defines a memory VMM memory area. There is one of these
|
||||
* per VM-area/task. A VM area is any part of the process virtual memory
|
||||
|
@ -322,6 +332,7 @@ struct vm_area_struct {
|
|||
#ifdef CONFIG_NUMA
|
||||
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
|
||||
#endif
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
|
||||
};
|
||||
|
||||
struct core_thread {
|
||||
|
@ -543,6 +554,7 @@ enum tlb_flush_reason {
|
|||
TLB_REMOTE_SHOOTDOWN,
|
||||
TLB_LOCAL_SHOOTDOWN,
|
||||
TLB_LOCAL_MM_SHOOTDOWN,
|
||||
TLB_REMOTE_SEND_IPI,
|
||||
NR_TLB_FLUSH_REASONS,
|
||||
};
|
||||
|
||||
|
|
|
@ -690,14 +690,6 @@ struct zonelist {
|
|||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
||||
struct node_active_region {
|
||||
unsigned long start_pfn;
|
||||
unsigned long end_pfn;
|
||||
int nid;
|
||||
};
|
||||
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
|
||||
|
||||
#ifndef CONFIG_DISCONTIGMEM
|
||||
/* The array of struct pages - for discontigmem use pgdat->lmem_map */
|
||||
extern struct page *mem_map;
|
||||
|
|
|
@ -27,9 +27,7 @@ static inline void touch_nmi_watchdog(void)
|
|||
#if defined(CONFIG_HARDLOCKUP_DETECTOR)
|
||||
extern void hardlockup_detector_disable(void);
|
||||
#else
|
||||
static inline void hardlockup_detector_disable(void)
|
||||
{
|
||||
}
|
||||
static inline void hardlockup_detector_disable(void) {}
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -80,6 +78,17 @@ extern int proc_watchdog_thresh(struct ctl_table *, int ,
|
|||
void __user *, size_t *, loff_t *);
|
||||
extern int proc_watchdog_cpumask(struct ctl_table *, int,
|
||||
void __user *, size_t *, loff_t *);
|
||||
extern int lockup_detector_suspend(void);
|
||||
extern void lockup_detector_resume(void);
|
||||
#else
|
||||
static inline int lockup_detector_suspend(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void lockup_detector_resume(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
|
||||
|
|
|
@ -89,6 +89,9 @@ enum ttu_flags {
|
|||
TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
|
||||
TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
|
||||
TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
|
||||
TTU_BATCH_FLUSH = (1 << 11), /* Batch TLB flushes where possible
|
||||
* and caller guarantees they will
|
||||
* do a final flush if necessary */
|
||||
};
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
|
|
@ -1344,6 +1344,25 @@ enum perf_event_task_context {
|
|||
perf_nr_task_contexts,
|
||||
};
|
||||
|
||||
/* Track pages that require TLB flushes */
|
||||
struct tlbflush_unmap_batch {
|
||||
/*
|
||||
* Each bit set is a CPU that potentially has a TLB entry for one of
|
||||
* the PFNs being flushed. See set_tlb_ubc_flush_pending().
|
||||
*/
|
||||
struct cpumask cpumask;
|
||||
|
||||
/* True if any bit in cpumask is set */
|
||||
bool flush_required;
|
||||
|
||||
/*
|
||||
* If true then the PTE was dirty when unmapped. The entry must be
|
||||
* flushed before IO is initiated or a stale TLB entry potentially
|
||||
* allows an update without redirtying the page.
|
||||
*/
|
||||
bool writable;
|
||||
};
|
||||
|
||||
struct task_struct {
|
||||
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
|
||||
void *stack;
|
||||
|
@ -1700,6 +1719,10 @@ struct task_struct {
|
|||
unsigned long numa_pages_migrated;
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
|
||||
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
struct tlbflush_unmap_batch tlb_ubc;
|
||||
#endif
|
||||
|
||||
struct rcu_head rcu;
|
||||
|
||||
/*
|
||||
|
|
|
@ -149,6 +149,41 @@ static inline struct user_namespace *seq_user_ns(struct seq_file *seq)
|
|||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* seq_show_options - display mount options with appropriate escapes.
|
||||
* @m: the seq_file handle
|
||||
* @name: the mount option name
|
||||
* @value: the mount option name's value, can be NULL
|
||||
*/
|
||||
static inline void seq_show_option(struct seq_file *m, const char *name,
|
||||
const char *value)
|
||||
{
|
||||
seq_putc(m, ',');
|
||||
seq_escape(m, name, ",= \t\n\\");
|
||||
if (value) {
|
||||
seq_putc(m, '=');
|
||||
seq_escape(m, value, ", \t\n\\");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* seq_show_option_n - display mount options with appropriate escapes
|
||||
* where @value must be a specific length.
|
||||
* @m: the seq_file handle
|
||||
* @name: the mount option name
|
||||
* @value: the mount option name's value, cannot be NULL
|
||||
* @length: the length of @value to display
|
||||
*
|
||||
* This is a macro since this uses "length" to define the size of the
|
||||
* stack buffer.
|
||||
*/
|
||||
#define seq_show_option_n(m, name, value, length) { \
|
||||
char val_buf[length + 1]; \
|
||||
strncpy(val_buf, value, length); \
|
||||
val_buf[length] = '\0'; \
|
||||
seq_show_option(m, name, val_buf); \
|
||||
}
|
||||
|
||||
#define SEQ_START_TOKEN ((void *)1)
|
||||
/*
|
||||
* Helpers for iteration over list_head-s in seq_files
|
||||
|
|
|
@ -290,6 +290,16 @@ void *__kmalloc(size_t size, gfp_t flags);
|
|||
void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags);
|
||||
void kmem_cache_free(struct kmem_cache *, void *);
|
||||
|
||||
/*
|
||||
* Bulk allocation and freeing operations. These are accellerated in an
|
||||
* allocator specific way to avoid taking locks repeatedly or building
|
||||
* metadata structures unnecessarily.
|
||||
*
|
||||
* Note that interrupts must be enabled when calling these functions.
|
||||
*/
|
||||
void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
|
||||
bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
void *__kmalloc_node(size_t size, gfp_t flags, int node);
|
||||
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
|
||||
|
|
|
@ -48,7 +48,16 @@ struct smp_hotplug_thread {
|
|||
const char *thread_comm;
|
||||
};
|
||||
|
||||
int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread);
|
||||
int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,
|
||||
const struct cpumask *cpumask);
|
||||
|
||||
static inline int
|
||||
smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
|
||||
{
|
||||
return smpboot_register_percpu_thread_cpumask(plug_thread,
|
||||
cpu_possible_mask);
|
||||
}
|
||||
|
||||
void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
|
||||
int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
|
||||
const struct cpumask *);
|
||||
|
|
|
@ -810,6 +810,7 @@ asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
|
|||
asmlinkage long sys_eventfd(unsigned int count);
|
||||
asmlinkage long sys_eventfd2(unsigned int count, int flags);
|
||||
asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
|
||||
asmlinkage long sys_userfaultfd(int flags);
|
||||
asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
|
||||
asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int);
|
||||
asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* include/linux/userfaultfd_k.h
|
||||
*
|
||||
* Copyright (C) 2015 Red Hat, Inc.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_USERFAULTFD_K_H
|
||||
#define _LINUX_USERFAULTFD_K_H
|
||||
|
||||
#ifdef CONFIG_USERFAULTFD
|
||||
|
||||
#include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */
|
||||
|
||||
#include <linux/fcntl.h>
|
||||
|
||||
/*
|
||||
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
|
||||
* new flags, since they might collide with O_* ones. We want
|
||||
* to re-use O_* flags that couldn't possibly have a meaning
|
||||
* from userfaultfd, in order to leave a free define-space for
|
||||
* shared O_* flags.
|
||||
*/
|
||||
#define UFFD_CLOEXEC O_CLOEXEC
|
||||
#define UFFD_NONBLOCK O_NONBLOCK
|
||||
|
||||
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
|
||||
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
|
||||
|
||||
extern int handle_userfault(struct vm_area_struct *vma, unsigned long address,
|
||||
unsigned int flags, unsigned long reason);
|
||||
|
||||
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
|
||||
unsigned long src_start, unsigned long len);
|
||||
extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
|
||||
unsigned long dst_start,
|
||||
unsigned long len);
|
||||
|
||||
/* mm helpers */
|
||||
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
|
||||
struct vm_userfaultfd_ctx vm_ctx)
|
||||
{
|
||||
return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx;
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_missing(struct vm_area_struct *vma)
|
||||
{
|
||||
return vma->vm_flags & VM_UFFD_MISSING;
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_armed(struct vm_area_struct *vma)
|
||||
{
|
||||
return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP);
|
||||
}
|
||||
|
||||
#else /* CONFIG_USERFAULTFD */
|
||||
|
||||
/* mm helpers */
|
||||
static inline int handle_userfault(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
unsigned int flags,
|
||||
unsigned long reason)
|
||||
{
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
|
||||
struct vm_userfaultfd_ctx vm_ctx)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_missing(struct vm_area_struct *vma)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_armed(struct vm_area_struct *vma)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_USERFAULTFD */
|
||||
|
||||
#endif /* _LINUX_USERFAULTFD_K_H */
|
|
@ -147,7 +147,8 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
|
|||
|
||||
typedef int wait_bit_action_f(struct wait_bit_key *);
|
||||
void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
|
||||
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
|
||||
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr,
|
||||
void *key);
|
||||
void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
|
||||
void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
|
||||
void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
|
||||
|
@ -179,7 +180,7 @@ wait_queue_head_t *bit_waitqueue(void *, int);
|
|||
#define wake_up_poll(x, m) \
|
||||
__wake_up(x, TASK_NORMAL, 1, (void *) (m))
|
||||
#define wake_up_locked_poll(x, m) \
|
||||
__wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
|
||||
__wake_up_locked_key((x), TASK_NORMAL, 1, (void *) (m))
|
||||
#define wake_up_interruptible_poll(x, m) \
|
||||
__wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
|
||||
#define wake_up_interruptible_sync_poll(x, m) \
|
||||
|
|
|
@ -140,12 +140,4 @@ extern int watchdog_init_timeout(struct watchdog_device *wdd,
|
|||
extern int watchdog_register_device(struct watchdog_device *);
|
||||
extern void watchdog_unregister_device(struct watchdog_device *);
|
||||
|
||||
#ifdef CONFIG_HARDLOCKUP_DETECTOR
|
||||
void watchdog_nmi_disable_all(void);
|
||||
void watchdog_nmi_enable_all(void);
|
||||
#else
|
||||
static inline void watchdog_nmi_disable_all(void) {}
|
||||
static inline void watchdog_nmi_enable_all(void) {}
|
||||
#endif
|
||||
|
||||
#endif /* ifndef _LINUX_WATCHDOG_H */
|
||||
|
|
|
@ -11,7 +11,8 @@
|
|||
EM( TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" ) \
|
||||
EM( TLB_REMOTE_SHOOTDOWN, "remote shootdown" ) \
|
||||
EM( TLB_LOCAL_SHOOTDOWN, "local shootdown" ) \
|
||||
EMe( TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" )
|
||||
EM( TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" ) \
|
||||
EMe( TLB_REMOTE_SEND_IPI, "remote ipi send" )
|
||||
|
||||
/*
|
||||
* First define the enums in TLB_FLUSH_REASON to be exported to userspace
|
||||
|
|
|
@ -456,3 +456,4 @@ header-y += xfrm.h
|
|||
header-y += xilinx-v4l2-controls.h
|
||||
header-y += zorro.h
|
||||
header-y += zorro_ids.h
|
||||
header-y += userfaultfd.h
|
||||
|
|
|
@ -190,4 +190,11 @@ struct prctl_mm_map {
|
|||
# define PR_FP_MODE_FR (1 << 0) /* 64b FP registers */
|
||||
# define PR_FP_MODE_FRE (1 << 1) /* 32b compatibility */
|
||||
|
||||
/* Control the ambient capability set */
|
||||
#define PR_CAP_AMBIENT 47
|
||||
# define PR_CAP_AMBIENT_IS_SET 1
|
||||
# define PR_CAP_AMBIENT_RAISE 2
|
||||
# define PR_CAP_AMBIENT_LOWER 3
|
||||
# define PR_CAP_AMBIENT_CLEAR_ALL 4
|
||||
|
||||
#endif /* _LINUX_PRCTL_H */
|
||||
|
|
|
@ -43,9 +43,18 @@
|
|||
#define SECBIT_KEEP_CAPS (issecure_mask(SECURE_KEEP_CAPS))
|
||||
#define SECBIT_KEEP_CAPS_LOCKED (issecure_mask(SECURE_KEEP_CAPS_LOCKED))
|
||||
|
||||
/* When set, a process cannot add new capabilities to its ambient set. */
|
||||
#define SECURE_NO_CAP_AMBIENT_RAISE 6
|
||||
#define SECURE_NO_CAP_AMBIENT_RAISE_LOCKED 7 /* make bit-6 immutable */
|
||||
|
||||
#define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
|
||||
#define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \
|
||||
(issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED))
|
||||
|
||||
#define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \
|
||||
issecure_mask(SECURE_NO_SETUID_FIXUP) | \
|
||||
issecure_mask(SECURE_KEEP_CAPS))
|
||||
issecure_mask(SECURE_KEEP_CAPS) | \
|
||||
issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
|
||||
#define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1)
|
||||
|
||||
#endif /* _UAPI_LINUX_SECUREBITS_H */
|
||||
|
|
|
@ -0,0 +1,169 @@
|
|||
/*
|
||||
* include/linux/userfaultfd.h
|
||||
*
|
||||
* Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
|
||||
* Copyright (C) 2015 Red Hat, Inc.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_USERFAULTFD_H
|
||||
#define _LINUX_USERFAULTFD_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <linux/compiler.h>
|
||||
|
||||
#define UFFD_API ((__u64)0xAA)
|
||||
/*
|
||||
* After implementing the respective features it will become:
|
||||
* #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
|
||||
* UFFD_FEATURE_EVENT_FORK)
|
||||
*/
|
||||
#define UFFD_API_FEATURES (0)
|
||||
#define UFFD_API_IOCTLS \
|
||||
((__u64)1 << _UFFDIO_REGISTER | \
|
||||
(__u64)1 << _UFFDIO_UNREGISTER | \
|
||||
(__u64)1 << _UFFDIO_API)
|
||||
#define UFFD_API_RANGE_IOCTLS \
|
||||
((__u64)1 << _UFFDIO_WAKE | \
|
||||
(__u64)1 << _UFFDIO_COPY | \
|
||||
(__u64)1 << _UFFDIO_ZEROPAGE)
|
||||
|
||||
/*
|
||||
* Valid ioctl command number range with this API is from 0x00 to
|
||||
* 0x3F. UFFDIO_API is the fixed number, everything else can be
|
||||
* changed by implementing a different UFFD_API. If sticking to the
|
||||
* same UFFD_API more ioctl can be added and userland will be aware of
|
||||
* which ioctl the running kernel implements through the ioctl command
|
||||
* bitmask written by the UFFDIO_API.
|
||||
*/
|
||||
#define _UFFDIO_REGISTER (0x00)
|
||||
#define _UFFDIO_UNREGISTER (0x01)
|
||||
#define _UFFDIO_WAKE (0x02)
|
||||
#define _UFFDIO_COPY (0x03)
|
||||
#define _UFFDIO_ZEROPAGE (0x04)
|
||||
#define _UFFDIO_API (0x3F)
|
||||
|
||||
/* userfaultfd ioctl ids */
|
||||
#define UFFDIO 0xAA
|
||||
#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \
|
||||
struct uffdio_api)
|
||||
#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \
|
||||
struct uffdio_register)
|
||||
#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \
|
||||
struct uffdio_range)
|
||||
#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \
|
||||
struct uffdio_range)
|
||||
#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \
|
||||
struct uffdio_copy)
|
||||
#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
|
||||
struct uffdio_zeropage)
|
||||
|
||||
/* read() structure */
|
||||
struct uffd_msg {
|
||||
__u8 event;
|
||||
|
||||
__u8 reserved1;
|
||||
__u16 reserved2;
|
||||
__u32 reserved3;
|
||||
|
||||
union {
|
||||
struct {
|
||||
__u64 flags;
|
||||
__u64 address;
|
||||
} pagefault;
|
||||
|
||||
struct {
|
||||
/* unused reserved fields */
|
||||
__u64 reserved1;
|
||||
__u64 reserved2;
|
||||
__u64 reserved3;
|
||||
} reserved;
|
||||
} arg;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Start at 0x12 and not at 0 to be more strict against bugs.
|
||||
*/
|
||||
#define UFFD_EVENT_PAGEFAULT 0x12
|
||||
#if 0 /* not available yet */
|
||||
#define UFFD_EVENT_FORK 0x13
|
||||
#endif
|
||||
|
||||
/* flags for UFFD_EVENT_PAGEFAULT */
|
||||
#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
|
||||
#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */
|
||||
|
||||
struct uffdio_api {
|
||||
/* userland asks for an API number and the features to enable */
|
||||
__u64 api;
|
||||
/*
|
||||
* Kernel answers below with the all available features for
|
||||
* the API, this notifies userland of which events and/or
|
||||
* which flags for each event are enabled in the current
|
||||
* kernel.
|
||||
*
|
||||
* Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
|
||||
* are to be considered implicitly always enabled in all kernels as
|
||||
* long as the uffdio_api.api requested matches UFFD_API.
|
||||
*/
|
||||
#if 0 /* not available yet */
|
||||
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
|
||||
#define UFFD_FEATURE_EVENT_FORK (1<<1)
|
||||
#endif
|
||||
__u64 features;
|
||||
|
||||
__u64 ioctls;
|
||||
};
|
||||
|
||||
struct uffdio_range {
|
||||
__u64 start;
|
||||
__u64 len;
|
||||
};
|
||||
|
||||
struct uffdio_register {
|
||||
struct uffdio_range range;
|
||||
#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0)
|
||||
#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1)
|
||||
__u64 mode;
|
||||
|
||||
/*
|
||||
* kernel answers which ioctl commands are available for the
|
||||
* range, keep at the end as the last 8 bytes aren't read.
|
||||
*/
|
||||
__u64 ioctls;
|
||||
};
|
||||
|
||||
struct uffdio_copy {
|
||||
__u64 dst;
|
||||
__u64 src;
|
||||
__u64 len;
|
||||
/*
|
||||
* There will be a wrprotection flag later that allows to map
|
||||
* pages wrprotected on the fly. And such a flag will be
|
||||
* available if the wrprotection ioctl are implemented for the
|
||||
* range according to the uffdio_register.ioctls.
|
||||
*/
|
||||
#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0)
|
||||
__u64 mode;
|
||||
|
||||
/*
|
||||
* "copy" is written by the ioctl and must be at the end: the
|
||||
* copy_from_user will not read the last 8 bytes.
|
||||
*/
|
||||
__s64 copy;
|
||||
};
|
||||
|
||||
struct uffdio_zeropage {
|
||||
struct uffdio_range range;
|
||||
#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0)
|
||||
__u64 mode;
|
||||
|
||||
/*
|
||||
* "zeropage" is written by the ioctl and must be at the end:
|
||||
* the copy_from_user will not read the last 8 bytes.
|
||||
*/
|
||||
__s64 zeropage;
|
||||
};
|
||||
|
||||
#endif /* _LINUX_USERFAULTFD_H */
|
18
init/Kconfig
18
init/Kconfig
|
@ -882,6 +882,16 @@ config GENERIC_SCHED_CLOCK
|
|||
config ARCH_SUPPORTS_NUMA_BALANCING
|
||||
bool
|
||||
|
||||
#
|
||||
# For architectures that prefer to flush all TLBs after a number of pages
|
||||
# are unmapped instead of sending one IPI per page to flush. The architecture
|
||||
# must provide guarantees on what happens if a clean TLB cache entry is
|
||||
# written after the unmap. Details are in mm/rmap.c near the check for
|
||||
# should_defer_flush. The architecture should also consider if the full flush
|
||||
# and the refill costs are offset by the savings of sending fewer IPIs.
|
||||
config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
bool
|
||||
|
||||
#
|
||||
# For architectures that know their GCC __int128 support is sound
|
||||
#
|
||||
|
@ -1576,6 +1586,14 @@ config ADVISE_SYSCALLS
|
|||
applications use these syscalls, you can disable this option to save
|
||||
space.
|
||||
|
||||
config USERFAULTFD
|
||||
bool "Enable userfaultfd() system call"
|
||||
select ANON_INODES
|
||||
depends on MMU
|
||||
help
|
||||
Enable the userfaultfd() system call that allows to intercept and
|
||||
handle page faults in userland.
|
||||
|
||||
config PCI_QUIRKS
|
||||
default y
|
||||
bool "Enable PCI quirk workarounds" if EXPERT
|
||||
|
|
|
@ -1342,7 +1342,7 @@ static int cgroup_show_options(struct seq_file *seq,
|
|||
if (root != &cgrp_dfl_root)
|
||||
for_each_subsys(ss, ssid)
|
||||
if (root->subsys_mask & (1 << ssid))
|
||||
seq_printf(seq, ",%s", ss->legacy_name);
|
||||
seq_show_option(seq, ss->name, NULL);
|
||||
if (root->flags & CGRP_ROOT_NOPREFIX)
|
||||
seq_puts(seq, ",noprefix");
|
||||
if (root->flags & CGRP_ROOT_XATTR)
|
||||
|
@ -1350,13 +1350,14 @@ static int cgroup_show_options(struct seq_file *seq,
|
|||
|
||||
spin_lock(&release_agent_path_lock);
|
||||
if (strlen(root->release_agent_path))
|
||||
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
|
||||
seq_show_option(seq, "release_agent",
|
||||
root->release_agent_path);
|
||||
spin_unlock(&release_agent_path_lock);
|
||||
|
||||
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
|
||||
seq_puts(seq, ",clone_children");
|
||||
if (strlen(root->name))
|
||||
seq_printf(seq, ",name=%s", root->name);
|
||||
seq_show_option(seq, "name", root->name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -454,8 +454,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
|
|||
tmp->vm_mm = mm;
|
||||
if (anon_vma_fork(tmp, mpnt))
|
||||
goto fail_nomem_anon_vma_fork;
|
||||
tmp->vm_flags &= ~VM_LOCKED;
|
||||
tmp->vm_flags &= ~(VM_LOCKED|VM_UFFD_MISSING|VM_UFFD_WP);
|
||||
tmp->vm_next = tmp->vm_prev = NULL;
|
||||
tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
||||
file = tmp->vm_file;
|
||||
if (file) {
|
||||
struct inode *inode = file_inode(file);
|
||||
|
|
|
@ -248,15 +248,16 @@ static void create_kthread(struct kthread_create_info *create)
|
|||
* kthread_create_on_node - create a kthread.
|
||||
* @threadfn: the function to run until signal_pending(current).
|
||||
* @data: data ptr for @threadfn.
|
||||
* @node: memory node number.
|
||||
* @node: task and thread structures for the thread are allocated on this node
|
||||
* @namefmt: printf-style name for the thread.
|
||||
*
|
||||
* Description: This helper function creates and names a kernel
|
||||
* thread. The thread will be stopped: use wake_up_process() to start
|
||||
* it. See also kthread_run().
|
||||
* it. See also kthread_run(). The new thread has SCHED_NORMAL policy and
|
||||
* is affine to all CPUs.
|
||||
*
|
||||
* If thread is going to be bound on a particular cpu, give its node
|
||||
* in @node, to get NUMA affinity for kthread stack, or else give -1.
|
||||
* in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
|
||||
* When woken, the thread will run @threadfn() with @data as its
|
||||
* argument. @threadfn() can either call do_exit() directly if it is a
|
||||
* standalone thread for which no one will call kthread_stop(), or
|
||||
|
|
|
@ -106,9 +106,10 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_locked);
|
||||
|
||||
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
|
||||
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr,
|
||||
void *key)
|
||||
{
|
||||
__wake_up_common(q, mode, 1, 0, key);
|
||||
__wake_up_common(q, mode, nr, 0, key);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_locked_key);
|
||||
|
||||
|
@ -283,7 +284,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
|
|||
if (!list_empty(&wait->task_list))
|
||||
list_del_init(&wait->task_list);
|
||||
else if (waitqueue_active(q))
|
||||
__wake_up_locked_key(q, mode, key);
|
||||
__wake_up_locked_key(q, mode, 1, key);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(abort_exclusive_wait);
|
||||
|
|
|
@ -113,7 +113,8 @@ static int smpboot_thread_fn(void *data)
|
|||
if (kthread_should_stop()) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
preempt_enable();
|
||||
if (ht->cleanup)
|
||||
/* cleanup must mirror setup */
|
||||
if (ht->cleanup && td->status != HP_THREAD_NONE)
|
||||
ht->cleanup(td->cpu, cpu_online(td->cpu));
|
||||
kfree(td);
|
||||
return 0;
|
||||
|
@ -259,15 +260,6 @@ static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
|
|||
{
|
||||
unsigned int cpu;
|
||||
|
||||
/* Unpark any threads that were voluntarily parked. */
|
||||
for_each_cpu_not(cpu, ht->cpumask) {
|
||||
if (cpu_online(cpu)) {
|
||||
struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
|
||||
if (tsk)
|
||||
kthread_unpark(tsk);
|
||||
}
|
||||
}
|
||||
|
||||
/* We need to destroy also the parked threads of offline cpus */
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
|
||||
|
@ -281,19 +273,22 @@ static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
|
|||
}
|
||||
|
||||
/**
|
||||
* smpboot_register_percpu_thread - Register a per_cpu thread related to hotplug
|
||||
* smpboot_register_percpu_thread_cpumask - Register a per_cpu thread related
|
||||
* to hotplug
|
||||
* @plug_thread: Hotplug thread descriptor
|
||||
* @cpumask: The cpumask where threads run
|
||||
*
|
||||
* Creates and starts the threads on all online cpus.
|
||||
*/
|
||||
int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
|
||||
int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,
|
||||
const struct cpumask *cpumask)
|
||||
{
|
||||
unsigned int cpu;
|
||||
int ret = 0;
|
||||
|
||||
if (!alloc_cpumask_var(&plug_thread->cpumask, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
cpumask_copy(plug_thread->cpumask, cpu_possible_mask);
|
||||
cpumask_copy(plug_thread->cpumask, cpumask);
|
||||
|
||||
get_online_cpus();
|
||||
mutex_lock(&smpboot_threads_lock);
|
||||
|
@ -301,9 +296,11 @@ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
|
|||
ret = __smpboot_create_thread(plug_thread, cpu);
|
||||
if (ret) {
|
||||
smpboot_destroy_threads(plug_thread);
|
||||
free_cpumask_var(plug_thread->cpumask);
|
||||
goto out;
|
||||
}
|
||||
smpboot_unpark_thread(plug_thread, cpu);
|
||||
if (cpumask_test_cpu(cpu, cpumask))
|
||||
smpboot_unpark_thread(plug_thread, cpu);
|
||||
}
|
||||
list_add(&plug_thread->list, &hotplug_threads);
|
||||
out:
|
||||
|
@ -311,7 +308,7 @@ out:
|
|||
put_online_cpus();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread);
|
||||
EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread_cpumask);
|
||||
|
||||
/**
|
||||
* smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug
|
||||
|
|
|
@ -219,6 +219,7 @@ cond_syscall(compat_sys_timerfd_gettime);
|
|||
cond_syscall(sys_eventfd);
|
||||
cond_syscall(sys_eventfd2);
|
||||
cond_syscall(sys_memfd_create);
|
||||
cond_syscall(sys_userfaultfd);
|
||||
|
||||
/* performance counters: */
|
||||
cond_syscall(sys_perf_event_open);
|
||||
|
|
|
@ -39,6 +39,7 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
|
|||
cred->cap_inheritable = CAP_EMPTY_SET;
|
||||
cred->cap_permitted = CAP_FULL_SET;
|
||||
cred->cap_effective = CAP_FULL_SET;
|
||||
cred->cap_ambient = CAP_EMPTY_SET;
|
||||
cred->cap_bset = CAP_FULL_SET;
|
||||
#ifdef CONFIG_KEYS
|
||||
key_put(cred->request_key_auth);
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче