Merge branch 'drbd-8.4_ed6' into for-3.8-drivers-drbd-8.4_ed6

This commit is contained in:
Philipp Reisner 2012-11-09 14:18:43 +01:00
Родитель ccae7868b0 328e0f125b
Коммит 986836503e
30 изменённых файлов: 12120 добавлений и 8633 удалений

Просмотреть файл

@ -1,5 +1,7 @@
drbd-y := drbd_bitmap.o drbd_proc.o
drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
drbd-y += drbd_interval.o drbd_state.o
drbd-y += drbd_nla.o
obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -119,13 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
if (!__ratelimit(&drbd_ratelimit_state))
return;
dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
current == mdev->receiver.task ? "receiver" :
current == mdev->asender.task ? "asender" :
current == mdev->worker.task ? "worker" : current->comm,
func, b->bm_why ?: "?",
b->bm_task == mdev->receiver.task ? "receiver" :
b->bm_task == mdev->asender.task ? "asender" :
b->bm_task == mdev->worker.task ? "worker" : "?");
drbd_task_to_thread_name(mdev->tconn, current),
func, b->bm_why ?: "?",
drbd_task_to_thread_name(mdev->tconn, b->bm_task));
}
void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
@ -142,13 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
if (trylock_failed) {
dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
current == mdev->receiver.task ? "receiver" :
current == mdev->asender.task ? "asender" :
current == mdev->worker.task ? "worker" : current->comm,
why, b->bm_why ?: "?",
b->bm_task == mdev->receiver.task ? "receiver" :
b->bm_task == mdev->asender.task ? "asender" :
b->bm_task == mdev->worker.task ? "worker" : "?");
drbd_task_to_thread_name(mdev->tconn, current),
why, b->bm_why ?: "?",
drbd_task_to_thread_name(mdev->tconn, b->bm_task));
mutex_lock(&b->bm_change);
}
if (BM_LOCKED_MASK & b->bm_flags)
@ -196,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
/* to mark for lazy writeout once syncer cleared all clearable bits,
* we if bits have been cleared since last IO. */
#define BM_PAGE_LAZY_WRITEOUT 28
/* pages marked with this "HINT" will be considered for writeout
* on activity log transactions */
#define BM_PAGE_HINT_WRITEOUT 27
/* store_page_idx uses non-atomic assignment. It is only used directly after
* allocating the page. All other bm_set_page_* and bm_clear_page_* need to
@ -227,8 +222,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
{
struct drbd_bitmap *b = mdev->bitmap;
void *addr = &page_private(b->bm_pages[page_nr]);
clear_bit(BM_PAGE_IO_LOCK, addr);
smp_mb__after_clear_bit();
clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
wake_up(&mdev->bitmap->bm_io_wait);
}
@ -246,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page)
set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
}
/**
* drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
* @mdev: DRBD device.
* @page_nr: the bitmap page to mark with the "hint" flag
*
* From within an activity log transaction, we mark a few pages with these
* hints, then call drbd_bm_write_hinted(), which will only write out changed
* pages which are flagged with this mark.
*/
void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr)
{
struct page *page;
if (page_nr >= mdev->bitmap->bm_number_of_pages) {
dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n",
page_nr, (int)mdev->bitmap->bm_number_of_pages);
return;
}
page = mdev->bitmap->bm_pages[page_nr];
set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
}
static int bm_test_page_unchanged(struct page *page)
{
volatile const unsigned long *addr = &page_private(page);
@ -376,7 +391,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
* GFP_NOIO, as this is called while drbd IO is "suspended",
* and during resize or attach on diskless Primary,
* we must not block on IO to ourselves.
* Context is receiver thread or cqueue thread/dmsetup. */
* Context is receiver thread or dmsetup. */
bytes = sizeof(struct page *)*want;
new_pages = kzalloc(bytes, GFP_NOIO);
if (!new_pages) {
@ -441,7 +456,8 @@ int drbd_bm_init(struct drbd_conf *mdev)
sector_t drbd_bm_capacity(struct drbd_conf *mdev)
{
ERR_IF(!mdev->bitmap) return 0;
if (!expect(mdev->bitmap))
return 0;
return mdev->bitmap->bm_dev_capacity;
}
@ -449,7 +465,8 @@ sector_t drbd_bm_capacity(struct drbd_conf *mdev)
*/
void drbd_bm_cleanup(struct drbd_conf *mdev)
{
ERR_IF (!mdev->bitmap) return;
if (!expect(mdev->bitmap))
return;
bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
kfree(mdev->bitmap);
@ -612,7 +629,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
int err = 0, growing;
int opages_vmalloced;
ERR_IF(!b) return -ENOMEM;
if (!expect(b))
return -ENOMEM;
drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
@ -734,8 +752,10 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
unsigned long s;
unsigned long flags;
ERR_IF(!b) return 0;
ERR_IF(!b->bm_pages) return 0;
if (!expect(b))
return 0;
if (!expect(b->bm_pages))
return 0;
spin_lock_irqsave(&b->bm_lock, flags);
s = b->bm_set;
@ -758,8 +778,10 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
size_t drbd_bm_words(struct drbd_conf *mdev)
{
struct drbd_bitmap *b = mdev->bitmap;
ERR_IF(!b) return 0;
ERR_IF(!b->bm_pages) return 0;
if (!expect(b))
return 0;
if (!expect(b->bm_pages))
return 0;
return b->bm_words;
}
@ -767,7 +789,8 @@ size_t drbd_bm_words(struct drbd_conf *mdev)
unsigned long drbd_bm_bits(struct drbd_conf *mdev)
{
struct drbd_bitmap *b = mdev->bitmap;
ERR_IF(!b) return 0;
if (!expect(b))
return 0;
return b->bm_bits;
}
@ -788,8 +811,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
end = offset + number;
ERR_IF(!b) return;
ERR_IF(!b->bm_pages) return;
if (!expect(b))
return;
if (!expect(b->bm_pages))
return;
if (number == 0)
return;
WARN_ON(offset >= b->bm_words);
@ -833,8 +858,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
end = offset + number;
ERR_IF(!b) return;
ERR_IF(!b->bm_pages) return;
if (!expect(b))
return;
if (!expect(b->bm_pages))
return;
spin_lock_irq(&b->bm_lock);
if ((offset >= b->bm_words) ||
@ -862,8 +889,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
void drbd_bm_set_all(struct drbd_conf *mdev)
{
struct drbd_bitmap *b = mdev->bitmap;
ERR_IF(!b) return;
ERR_IF(!b->bm_pages) return;
if (!expect(b))
return;
if (!expect(b->bm_pages))
return;
spin_lock_irq(&b->bm_lock);
bm_memset(b, 0, 0xff, b->bm_words);
@ -876,8 +905,10 @@ void drbd_bm_set_all(struct drbd_conf *mdev)
void drbd_bm_clear_all(struct drbd_conf *mdev)
{
struct drbd_bitmap *b = mdev->bitmap;
ERR_IF(!b) return;
ERR_IF(!b->bm_pages) return;
if (!expect(b))
return;
if (!expect(b->bm_pages))
return;
spin_lock_irq(&b->bm_lock);
bm_memset(b, 0, 0, b->bm_words);
@ -891,7 +922,8 @@ struct bm_aio_ctx {
unsigned int done;
unsigned flags;
#define BM_AIO_COPY_PAGES 1
#define BM_WRITE_ALL_PAGES 2
#define BM_AIO_WRITE_HINTED 2
#define BM_WRITE_ALL_PAGES 4
int error;
struct kref kref;
};
@ -1062,6 +1094,11 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
break;
if (rw & WRITE) {
if ((flags & BM_AIO_WRITE_HINTED) &&
!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
&page_private(b->bm_pages[i])))
continue;
if (!(flags & BM_WRITE_ALL_PAGES) &&
bm_test_page_unchanged(b->bm_pages[i])) {
dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
@ -1094,9 +1131,11 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
else
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
rw == WRITE ? "WRITE" : "READ",
count, jiffies - now);
/* summary for global bitmap IO */
if (flags == 0)
dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
rw == WRITE ? "WRITE" : "READ",
count, jiffies - now);
if (ctx->error) {
dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
@ -1117,8 +1156,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
}
now = b->bm_set;
dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
if (flags == 0)
dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
return err;
@ -1181,9 +1221,17 @@ int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local)
return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
}
/**
* drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
* @mdev: DRBD device.
*/
int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
{
return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
}
/**
* drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap
* drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
* @mdev: DRBD device.
* @idx: bitmap page index
*
@ -1291,8 +1339,10 @@ static unsigned long bm_find_next(struct drbd_conf *mdev,
struct drbd_bitmap *b = mdev->bitmap;
unsigned long i = DRBD_END_OF_BITMAP;
ERR_IF(!b) return i;
ERR_IF(!b->bm_pages) return i;
if (!expect(b))
return i;
if (!expect(b->bm_pages))
return i;
spin_lock_irq(&b->bm_lock);
if (BM_DONT_TEST & b->bm_flags)
@ -1393,8 +1443,10 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
struct drbd_bitmap *b = mdev->bitmap;
int c = 0;
ERR_IF(!b) return 1;
ERR_IF(!b->bm_pages) return 0;
if (!expect(b))
return 1;
if (!expect(b->bm_pages))
return 0;
spin_lock_irqsave(&b->bm_lock, flags);
if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
@ -1425,13 +1477,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
{
int i;
int bits;
int changed = 0;
unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
for (i = first_word; i < last_word; i++) {
bits = hweight_long(paddr[i]);
paddr[i] = ~0UL;
b->bm_set += BITS_PER_LONG - bits;
changed += BITS_PER_LONG - bits;
}
kunmap_atomic(paddr);
if (changed) {
/* We only need lazy writeout, the information is still in the
* remote bitmap as well, and is reconstructed during the next
* bitmap exchange, if lost locally due to a crash. */
bm_set_page_lazy_writeout(b->bm_pages[page_nr]);
b->bm_set += changed;
}
}
/* Same thing as drbd_bm_set_bits,
@ -1526,8 +1586,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
unsigned long *p_addr;
int i;
ERR_IF(!b) return 0;
ERR_IF(!b->bm_pages) return 0;
if (!expect(b))
return 0;
if (!expect(b->bm_pages))
return 0;
spin_lock_irqsave(&b->bm_lock, flags);
if (BM_DONT_TEST & b->bm_flags)
@ -1561,8 +1623,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* robust in case we screwed up elsewhere, in that case pretend there
* was one dirty bit in the requested area, so we won't try to do a
* local read there (no bitmap probably implies no disk) */
ERR_IF(!b) return 1;
ERR_IF(!b->bm_pages) return 1;
if (!expect(b))
return 1;
if (!expect(b->bm_pages))
return 1;
spin_lock_irqsave(&b->bm_lock, flags);
if (BM_DONT_TEST & b->bm_flags)
@ -1575,11 +1639,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
bm_unmap(p_addr);
p_addr = bm_map_pidx(b, idx);
}
ERR_IF (bitnr >= b->bm_bits) {
dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
} else {
if (expect(bitnr < b->bm_bits))
c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
}
else
dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
}
if (p_addr)
bm_unmap(p_addr);
@ -1609,8 +1672,10 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
unsigned long flags;
unsigned long *p_addr, *bm;
ERR_IF(!b) return 0;
ERR_IF(!b->bm_pages) return 0;
if (!expect(b))
return 0;
if (!expect(b->bm_pages))
return 0;
spin_lock_irqsave(&b->bm_lock, flags);
if (BM_DONT_TEST & b->bm_flags)
@ -1632,47 +1697,3 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
spin_unlock_irqrestore(&b->bm_lock, flags);
return count;
}
/* Set all bits covered by the AL-extent al_enr.
* Returns number of bits changed. */
unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
{
struct drbd_bitmap *b = mdev->bitmap;
unsigned long *p_addr, *bm;
unsigned long weight;
unsigned long s, e;
int count, i, do_now;
ERR_IF(!b) return 0;
ERR_IF(!b->bm_pages) return 0;
spin_lock_irq(&b->bm_lock);
if (BM_DONT_SET & b->bm_flags)
bm_print_lock_info(mdev);
weight = b->bm_set;
s = al_enr * BM_WORDS_PER_AL_EXT;
e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
/* assert that s and e are on the same page */
D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
== s >> (PAGE_SHIFT - LN2_BPL + 3));
count = 0;
if (s < b->bm_words) {
i = do_now = e-s;
p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
bm = p_addr + MLPP(s);
while (i--) {
count += hweight_long(*bm);
*bm = -1UL;
bm++;
}
bm_unmap(p_addr);
b->bm_set += do_now*BITS_PER_LONG - count;
if (e == b->bm_words)
b->bm_set -= bm_clear_surplus(b);
} else {
dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
}
weight = b->bm_set - weight;
spin_unlock_irq(&b->bm_lock);
return weight;
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,207 @@
#include <asm/bug.h>
#include <linux/rbtree_augmented.h>
#include "drbd_interval.h"
/**
* interval_end - return end of @node
*/
static inline
sector_t interval_end(struct rb_node *node)
{
struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb);
return this->end;
}
/**
* compute_subtree_last - compute end of @node
*
* The end of an interval is the highest (start + (size >> 9)) value of this
* node and of its children. Called for @node and its parents whenever the end
* may have changed.
*/
static inline sector_t
compute_subtree_last(struct drbd_interval *node)
{
sector_t max = node->sector + (node->size >> 9);
if (node->rb.rb_left) {
sector_t left = interval_end(node->rb.rb_left);
if (left > max)
max = left;
}
if (node->rb.rb_right) {
sector_t right = interval_end(node->rb.rb_right);
if (right > max)
max = right;
}
return max;
}
static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
{
while (rb != stop) {
struct drbd_interval *node = rb_entry(rb, struct drbd_interval, rb);
sector_t subtree_last = compute_subtree_last(node);
if (node->end == subtree_last)
break;
node->end = subtree_last;
rb = rb_parent(&node->rb);
}
}
static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
{
struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
new->end = old->end;
}
static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
{
struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
new->end = old->end;
old->end = compute_subtree_last(old);
}
static const struct rb_augment_callbacks augment_callbacks = {
augment_propagate,
augment_copy,
augment_rotate,
};
/**
* drbd_insert_interval - insert a new interval into a tree
*/
bool
drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
{
struct rb_node **new = &root->rb_node, *parent = NULL;
BUG_ON(!IS_ALIGNED(this->size, 512));
while (*new) {
struct drbd_interval *here =
rb_entry(*new, struct drbd_interval, rb);
parent = *new;
if (this->sector < here->sector)
new = &(*new)->rb_left;
else if (this->sector > here->sector)
new = &(*new)->rb_right;
else if (this < here)
new = &(*new)->rb_left;
else if (this > here)
new = &(*new)->rb_right;
else
return false;
}
rb_link_node(&this->rb, parent, new);
rb_insert_augmented(&this->rb, root, &augment_callbacks);
return true;
}
/**
* drbd_contains_interval - check if a tree contains a given interval
* @sector: start sector of @interval
* @interval: may not be a valid pointer
*
* Returns if the tree contains the node @interval with start sector @start.
* Does not dereference @interval until @interval is known to be a valid object
* in @tree. Returns %false if @interval is in the tree but with a different
* sector number.
*/
bool
drbd_contains_interval(struct rb_root *root, sector_t sector,
struct drbd_interval *interval)
{
struct rb_node *node = root->rb_node;
while (node) {
struct drbd_interval *here =
rb_entry(node, struct drbd_interval, rb);
if (sector < here->sector)
node = node->rb_left;
else if (sector > here->sector)
node = node->rb_right;
else if (interval < here)
node = node->rb_left;
else if (interval > here)
node = node->rb_right;
else
return true;
}
return false;
}
/**
* drbd_remove_interval - remove an interval from a tree
*/
void
drbd_remove_interval(struct rb_root *root, struct drbd_interval *this)
{
rb_erase_augmented(&this->rb, root, &augment_callbacks);
}
/**
* drbd_find_overlap - search for an interval overlapping with [sector, sector + size)
* @sector: start sector
* @size: size, aligned to 512 bytes
*
* Returns an interval overlapping with [sector, sector + size), or NULL if
* there is none. When there is more than one overlapping interval in the
* tree, the interval with the lowest start sector is returned, and all other
* overlapping intervals will be on the right side of the tree, reachable with
* rb_next().
*/
struct drbd_interval *
drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size)
{
struct rb_node *node = root->rb_node;
struct drbd_interval *overlap = NULL;
sector_t end = sector + (size >> 9);
BUG_ON(!IS_ALIGNED(size, 512));
while (node) {
struct drbd_interval *here =
rb_entry(node, struct drbd_interval, rb);
if (node->rb_left &&
sector < interval_end(node->rb_left)) {
/* Overlap if any must be on left side */
node = node->rb_left;
} else if (here->sector < end &&
sector < here->sector + (here->size >> 9)) {
overlap = here;
break;
} else if (sector >= here->sector) {
/* Overlap if any must be on right side */
node = node->rb_right;
} else
break;
}
return overlap;
}
struct drbd_interval *
drbd_next_overlap(struct drbd_interval *i, sector_t sector, unsigned int size)
{
sector_t end = sector + (size >> 9);
struct rb_node *node;
for (;;) {
node = rb_next(&i->rb);
if (!node)
return NULL;
i = rb_entry(node, struct drbd_interval, rb);
if (i->sector >= end)
return NULL;
if (sector < i->sector + (i->size >> 9))
return i;
}
}

Просмотреть файл

@ -0,0 +1,40 @@
#ifndef __DRBD_INTERVAL_H
#define __DRBD_INTERVAL_H
#include <linux/types.h>
#include <linux/rbtree.h>
struct drbd_interval {
struct rb_node rb;
sector_t sector; /* start sector of the interval */
unsigned int size; /* size in bytes */
sector_t end; /* highest interval end in subtree */
int local:1 /* local or remote request? */;
int waiting:1;
};
static inline void drbd_clear_interval(struct drbd_interval *i)
{
RB_CLEAR_NODE(&i->rb);
}
static inline bool drbd_interval_empty(struct drbd_interval *i)
{
return RB_EMPTY_NODE(&i->rb);
}
extern bool drbd_insert_interval(struct rb_root *, struct drbd_interval *);
extern bool drbd_contains_interval(struct rb_root *, sector_t,
struct drbd_interval *);
extern void drbd_remove_interval(struct rb_root *, struct drbd_interval *);
extern struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t,
unsigned int);
extern struct drbd_interval *drbd_next_overlap(struct drbd_interval *, sector_t,
unsigned int);
#define drbd_for_each_overlap(i, root, sector, size) \
for (i = drbd_find_overlap(root, sector, size); \
i; \
i = drbd_next_overlap(i, sector, size))
#endif /* __DRBD_INTERVAL_H */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,55 @@
#include "drbd_wrappers.h"
#include <linux/kernel.h>
#include <net/netlink.h>
#include <linux/drbd_genl_api.h>
#include "drbd_nla.h"
static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
{
struct nlattr *head = nla_data(nla);
int len = nla_len(nla);
int rem;
/*
* validate_nla (called from nla_parse_nested) ignores attributes
* beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
* In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
* flag set also, check and remove that flag before calling
* nla_parse_nested.
*/
nla_for_each_attr(nla, head, len, rem) {
if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
if (nla_type(nla) > maxtype)
return -EOPNOTSUPP;
}
}
return 0;
}
int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
const struct nla_policy *policy)
{
int err;
err = drbd_nla_check_mandatory(maxtype, nla);
if (!err)
err = nla_parse_nested(tb, maxtype, nla, policy);
return err;
}
struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
{
int err;
/*
* If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and
* we don't know about that attribute, reject all the nested
* attributes.
*/
err = drbd_nla_check_mandatory(maxtype, nla);
if (err)
return ERR_PTR(err);
return nla_find_nested(nla, attrtype);
}

Просмотреть файл

@ -0,0 +1,8 @@
#ifndef __DRBD_NLA_H
#define __DRBD_NLA_H
extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
const struct nla_policy *policy);
extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
#endif /* __DRBD_NLA_H */

Просмотреть файл

@ -171,7 +171,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
if (mdev->state.conn == C_VERIFY_S ||
mdev->state.conn == C_VERIFY_T) {
bit_pos = bm_bits - mdev->ov_left;
if (mdev->agreed_pro_version >= 97)
if (verify_can_do_stop_sector(mdev))
stop_sector = mdev->ov_stop_sector;
} else
bit_pos = mdev->bm_resync_fo;
@ -200,9 +200,11 @@ static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
static int drbd_seq_show(struct seq_file *seq, void *v)
{
int i, hole = 0;
int i, prev_i = -1;
const char *sn;
struct drbd_conf *mdev;
struct net_conf *nc;
char wp;
static char write_ordering_chars[] = {
[WO_none] = 'n',
@ -233,16 +235,11 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
oos .. known out-of-sync kB
*/
for (i = 0; i < minor_count; i++) {
mdev = minor_to_mdev(i);
if (!mdev) {
hole = 1;
continue;
}
if (hole) {
hole = 0;
rcu_read_lock();
idr_for_each_entry(&minors, mdev, i) {
if (prev_i != i - 1)
seq_printf(seq, "\n");
}
prev_i = i;
sn = drbd_conn_str(mdev->state.conn);
@ -254,6 +251,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
/* reset mdev->congestion_reason */
bdi_rw_congested(&mdev->rq_queue->backing_dev_info);
nc = rcu_dereference(mdev->tconn->net_conf);
wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
seq_printf(seq,
"%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
" ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
@ -263,14 +262,13 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
drbd_role_str(mdev->state.peer),
drbd_disk_str(mdev->state.disk),
drbd_disk_str(mdev->state.pdsk),
(mdev->net_conf == NULL ? ' ' :
(mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
is_susp(mdev->state) ? 's' : 'r',
wp,
drbd_suspended(mdev) ? 's' : 'r',
mdev->state.aftr_isp ? 'a' : '-',
mdev->state.peer_isp ? 'p' : '-',
mdev->state.user_isp ? 'u' : '-',
mdev->congestion_reason ?: '-',
drbd_test_flag(mdev, AL_SUSPENDED) ? 's' : '-',
test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-',
mdev->send_cnt/2,
mdev->recv_cnt/2,
mdev->writ_cnt/2,
@ -282,8 +280,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
atomic_read(&mdev->rs_pending_cnt),
atomic_read(&mdev->unacked_cnt),
atomic_read(&mdev->ap_bio_cnt),
mdev->epochs,
write_ordering_chars[mdev->write_ordering]
mdev->tconn->epochs,
write_ordering_chars[mdev->tconn->write_ordering]
);
seq_printf(seq, " oos:%llu\n",
Bit2KB((unsigned long long)
@ -308,6 +306,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
}
}
}
rcu_read_unlock();
return 0;
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -77,40 +77,41 @@
*/
enum drbd_req_event {
created,
to_be_send,
to_be_submitted,
CREATED,
TO_BE_SENT,
TO_BE_SUBMITTED,
/* XXX yes, now I am inconsistent...
* these are not "events" but "actions"
* oh, well... */
queue_for_net_write,
queue_for_net_read,
queue_for_send_oos,
QUEUE_FOR_NET_WRITE,
QUEUE_FOR_NET_READ,
QUEUE_FOR_SEND_OOS,
send_canceled,
send_failed,
handed_over_to_network,
oos_handed_to_network,
connection_lost_while_pending,
read_retry_remote_canceled,
recv_acked_by_peer,
write_acked_by_peer,
write_acked_by_peer_and_sis, /* and set_in_sync */
conflict_discarded_by_peer,
neg_acked,
barrier_acked, /* in protocol A and B */
data_received, /* (remote read) */
SEND_CANCELED,
SEND_FAILED,
HANDED_OVER_TO_NETWORK,
OOS_HANDED_TO_NETWORK,
CONNECTION_LOST_WHILE_PENDING,
READ_RETRY_REMOTE_CANCELED,
RECV_ACKED_BY_PEER,
WRITE_ACKED_BY_PEER,
WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */
CONFLICT_RESOLVED,
POSTPONE_WRITE,
NEG_ACKED,
BARRIER_ACKED, /* in protocol A and B */
DATA_RECEIVED, /* (remote read) */
read_completed_with_error,
read_ahead_completed_with_error,
write_completed_with_error,
abort_disk_io,
completed_ok,
resend,
fail_frozen_disk_io,
restart_frozen_disk_io,
nothing, /* for tracing only */
READ_COMPLETED_WITH_ERROR,
READ_AHEAD_COMPLETED_WITH_ERROR,
WRITE_COMPLETED_WITH_ERROR,
ABORT_DISK_IO,
COMPLETED_OK,
RESEND,
FAIL_FROZEN_DISK_IO,
RESTART_FROZEN_DISK_IO,
NOTHING,
};
/* encoding of request states for now. we don't actually need that many bits.
@ -142,8 +143,8 @@ enum drbd_req_state_bits {
* recv_ack (B) or implicit "ack" (A),
* still waiting for the barrier ack.
* master_bio may already be completed and invalidated.
* 11100: write_acked (C),
* data_received (for remote read, any protocol)
* 11100: write acked (C),
* data received (for remote read, any protocol)
* or finally the barrier ack has arrived (B,A)...
* request can be freed
* 01100: neg-acked (write, protocol C)
@ -198,6 +199,22 @@ enum drbd_req_state_bits {
/* Should call drbd_al_complete_io() for this request... */
__RQ_IN_ACT_LOG,
/* The peer has sent a retry ACK */
__RQ_POSTPONED,
/* would have been completed,
* but was not, because of drbd_suspended() */
__RQ_COMPLETION_SUSP,
/* We expect a receive ACK (wire proto B) */
__RQ_EXP_RECEIVE_ACK,
/* We expect a write ACK (wite proto C) */
__RQ_EXP_WRITE_ACK,
/* waiting for a barrier ack, did an extra kref_get */
__RQ_EXP_BARR_ACK,
};
#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
@ -219,56 +236,16 @@ enum drbd_req_state_bits {
#define RQ_WRITE (1UL << __RQ_WRITE)
#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
#define RQ_POSTPONED (1UL << __RQ_POSTPONED)
#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
#define RQ_EXP_WRITE_ACK (1UL << __RQ_EXP_WRITE_ACK)
#define RQ_EXP_BARR_ACK (1UL << __RQ_EXP_BARR_ACK)
/* For waking up the frozen transfer log mod_req() has to return if the request
should be counted in the epoch object*/
#define MR_WRITE_SHIFT 0
#define MR_WRITE (1 << MR_WRITE_SHIFT)
#define MR_READ_SHIFT 1
#define MR_READ (1 << MR_READ_SHIFT)
/* epoch entries */
static inline
struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
{
BUG_ON(mdev->ee_hash_s == 0);
return mdev->ee_hash +
((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s);
}
/* transfer log (drbd_request objects) */
static inline
struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector)
{
BUG_ON(mdev->tl_hash_s == 0);
return mdev->tl_hash +
((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s);
}
/* application reads (drbd_request objects) */
static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector)
{
return mdev->app_reads_hash
+ ((unsigned int)(sector) % APP_R_HSIZE);
}
/* when we receive the answer for a read request,
* verify that we actually know about it */
static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
u64 id, sector_t sector)
{
struct hlist_head *slot = ar_hash_slot(mdev, sector);
struct hlist_node *n;
struct drbd_request *req;
hlist_for_each_entry(req, n, slot, collision) {
if ((unsigned long)req == (unsigned long)id) {
D_ASSERT(req->sector == sector);
return req;
}
}
return NULL;
}
#define MR_WRITE 1
#define MR_READ 2
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
{
@ -278,41 +255,10 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi
req->private_bio = bio;
bio->bi_private = req;
bio->bi_end_io = drbd_endio_pri;
bio->bi_end_io = drbd_request_endio;
bio->bi_next = NULL;
}
static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
struct bio *bio_src)
{
struct drbd_request *req =
mempool_alloc(drbd_request_mempool, GFP_NOIO);
if (likely(req)) {
drbd_req_make_private_bio(req, bio_src);
req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
req->mdev = mdev;
req->master_bio = bio_src;
req->epoch = 0;
req->sector = bio_src->bi_sector;
req->size = bio_src->bi_size;
INIT_HLIST_NODE(&req->collision);
INIT_LIST_HEAD(&req->tl_requests);
INIT_LIST_HEAD(&req->w.list);
}
return req;
}
static inline void drbd_req_free(struct drbd_request *req)
{
mempool_free(req, drbd_request_mempool);
}
static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
{
return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
}
/* Short lived temporary struct on the stack.
* We could squirrel the error to be returned into
* bio->bi_size, or similar. But that would be too ugly. */
@ -321,6 +267,7 @@ struct bio_and_error {
int error;
};
extern void drbd_req_destroy(struct kref *kref);
extern void _req_may_be_done(struct drbd_request *req,
struct bio_and_error *m);
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
@ -328,13 +275,17 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
extern void complete_master_bio(struct drbd_conf *mdev,
struct bio_and_error *m);
extern void request_timer_fn(unsigned long data);
extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
/* this is in drbd_main.c */
extern void drbd_restart_request(struct drbd_request *req);
/* use this if you don't want to deal with calling complete_master_bio()
* outside the spinlock, e.g. when walking some list on cleanup. */
static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
{
struct drbd_conf *mdev = req->mdev;
struct drbd_conf *mdev = req->w.mdev;
struct bio_and_error m;
int rv;
@ -354,13 +305,13 @@ static inline int req_mod(struct drbd_request *req,
enum drbd_req_event what)
{
unsigned long flags;
struct drbd_conf *mdev = req->mdev;
struct drbd_conf *mdev = req->w.mdev;
struct bio_and_error m;
int rv;
spin_lock_irqsave(&mdev->req_lock, flags);
spin_lock_irqsave(&mdev->tconn->req_lock, flags);
rv = __req_mod(req, what, &m);
spin_unlock_irqrestore(&mdev->req_lock, flags);
spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
if (m.bio)
complete_master_bio(mdev, &m);
@ -368,7 +319,7 @@ static inline int req_mod(struct drbd_request *req,
return rv;
}
static inline bool drbd_should_do_remote(union drbd_state s)
static inline bool drbd_should_do_remote(union drbd_dev_state s)
{
return s.pdsk == D_UP_TO_DATE ||
(s.pdsk >= D_INCONSISTENT &&
@ -378,7 +329,7 @@ static inline bool drbd_should_do_remote(union drbd_state s)
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
states. */
}
static inline bool drbd_should_send_oos(union drbd_state s)
static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
{
return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,161 @@
#ifndef DRBD_STATE_H
#define DRBD_STATE_H
struct drbd_conf;
struct drbd_tconn;
/**
* DOC: DRBD State macros
*
* These macros are used to express state changes in easily readable form.
*
* The NS macros expand to a mask and a value, that can be bit ored onto the
* current state as soon as the spinlock (req_lock) was taken.
*
* The _NS macros are used for state functions that get called with the
* spinlock. These macros expand directly to the new state value.
*
* Besides the basic forms NS() and _NS() additional _?NS[23] are defined
* to express state changes that affect more than one aspect of the state.
*
* E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
* Means that the network connection was established and that the peer
* is in secondary role.
*/
#define role_MASK R_MASK
#define peer_MASK R_MASK
#define disk_MASK D_MASK
#define pdsk_MASK D_MASK
#define conn_MASK C_MASK
#define susp_MASK 1
#define user_isp_MASK 1
#define aftr_isp_MASK 1
#define susp_nod_MASK 1
#define susp_fen_MASK 1
#define NS(T, S) \
({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
({ union drbd_state val; val.i = 0; val.T = (S); val; })
#define NS2(T1, S1, T2, S2) \
({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
mask.T2 = T2##_MASK; mask; }), \
({ union drbd_state val; val.i = 0; val.T1 = (S1); \
val.T2 = (S2); val; })
#define NS3(T1, S1, T2, S2, T3, S3) \
({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
({ union drbd_state val; val.i = 0; val.T1 = (S1); \
val.T2 = (S2); val.T3 = (S3); val; })
#define _NS(D, T, S) \
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T = (S); __ns; })
#define _NS2(D, T1, S1, T2, S2) \
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
__ns.T2 = (S2); __ns; })
#define _NS3(D, T1, S1, T2, S2, T3, S3) \
D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
enum chg_state_flags {
CS_HARD = 1 << 0,
CS_VERBOSE = 1 << 1,
CS_WAIT_COMPLETE = 1 << 2,
CS_SERIALIZE = 1 << 3,
CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE,
CS_LOCAL_ONLY = 1 << 4, /* Do not consider a device pair wide state change */
CS_DC_ROLE = 1 << 5, /* DC = display as connection state change */
CS_DC_PEER = 1 << 6,
CS_DC_CONN = 1 << 7,
CS_DC_DISK = 1 << 8,
CS_DC_PDSK = 1 << 9,
CS_DC_SUSP = 1 << 10,
CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
CS_IGN_OUTD_FAIL = 1 << 11,
};
/* drbd_dev_state and drbd_state are different types. This is to stress the
small difference. There is no suspended flag (.susp), and no suspended
while fence handler runs flas (susp_fen). */
union drbd_dev_state {
struct {
#if defined(__LITTLE_ENDIAN_BITFIELD)
unsigned role:2 ; /* 3/4 primary/secondary/unknown */
unsigned peer:2 ; /* 3/4 primary/secondary/unknown */
unsigned conn:5 ; /* 17/32 cstates */
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned _unused:1 ;
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
unsigned peer_isp:1 ;
unsigned user_isp:1 ;
unsigned _pad:11; /* 0 unused */
#elif defined(__BIG_ENDIAN_BITFIELD)
unsigned _pad:11;
unsigned user_isp:1 ;
unsigned peer_isp:1 ;
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
unsigned _unused:1 ;
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned conn:5 ; /* 17/32 cstates */
unsigned peer:2 ; /* 3/4 primary/secondary/unknown */
unsigned role:2 ; /* 3/4 primary/secondary/unknown */
#else
# error "this endianess is not supported"
#endif
};
unsigned int i;
};
extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
enum chg_state_flags f,
union drbd_state mask,
union drbd_state val);
extern void drbd_force_state(struct drbd_conf *, union drbd_state,
union drbd_state);
extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
union drbd_state,
union drbd_state,
enum chg_state_flags);
extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
enum chg_state_flags,
struct completion *done);
extern void print_st_err(struct drbd_conf *, union drbd_state,
union drbd_state, int);
enum drbd_state_rv
_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
enum chg_state_flags flags);
enum drbd_state_rv
conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
enum chg_state_flags flags);
extern void drbd_resume_al(struct drbd_conf *mdev);
extern bool conn_all_vols_unconf(struct drbd_tconn *tconn);
/**
* drbd_request_state() - Reqest a state change
* @mdev: DRBD device.
* @mask: mask of state bits to change.
* @val: value of new state bits.
*
* This is the most graceful way of requesting a state change. It is verbose
* quite verbose in case the state change is not possible, and all those
* state changes are globally serialized.
*/
static inline int drbd_request_state(struct drbd_conf *mdev,
union drbd_state mask,
union drbd_state val)
{
return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
}
enum drbd_role conn_highest_role(struct drbd_tconn *tconn);
enum drbd_role conn_highest_peer(struct drbd_tconn *tconn);
enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn);
enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn);
enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn);
enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn);
#endif

Просмотреть файл

@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = {
[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
};
const char *drbd_conn_str(enum drbd_conns s)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -3,6 +3,7 @@
#include <linux/ctype.h>
#include <linux/mm.h>
#include "drbd_int.h"
/* see get_sb_bdev and bd_claim */
extern char *drbd_sec_holder;
@ -20,8 +21,8 @@ static inline void drbd_set_my_capacity(struct drbd_conf *mdev,
/* bi_end_io handlers */
extern void drbd_md_io_complete(struct bio *bio, int error);
extern void drbd_endio_sec(struct bio *bio, int error);
extern void drbd_endio_pri(struct bio *bio, int error);
extern void drbd_peer_request_endio(struct bio *bio, int error);
extern void drbd_request_endio(struct bio *bio, int error);
/*
* used to submit our private bio
@ -45,12 +46,6 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev,
generic_make_request(bio);
}
static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
{
return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK)
== CRYPTO_ALG_TYPE_HASH;
}
#ifndef __CHECKER__
# undef __cond_lock
# define __cond_lock(x,c) (c)

Просмотреть файл

@ -51,12 +51,11 @@
#endif
extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.14"
#define API_VERSION 88
#define REL_VERSION "8.4.2"
#define API_VERSION 1
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 97
#define PRO_VERSION_MAX 101
enum drbd_io_error_p {
@ -66,7 +65,8 @@ enum drbd_io_error_p {
};
enum drbd_fencing_p {
FP_DONT_CARE,
FP_NOT_AVAIL = -1, /* Not a policy */
FP_DONT_CARE = 0,
FP_RESOURCE,
FP_STONITH
};
@ -102,6 +102,20 @@ enum drbd_on_congestion {
OC_DISCONNECT,
};
enum drbd_read_balancing {
RB_PREFER_LOCAL,
RB_PREFER_REMOTE,
RB_ROUND_ROBIN,
RB_LEAST_PENDING,
RB_CONGESTED_REMOTE,
RB_32K_STRIPING,
RB_64K_STRIPING,
RB_128K_STRIPING,
RB_256K_STRIPING,
RB_512K_STRIPING,
RB_1M_STRIPING,
};
/* KEEP the order, do not delete or insert. Only append. */
enum drbd_ret_code {
ERR_CODE_BASE = 100,
@ -122,7 +136,7 @@ enum drbd_ret_code {
ERR_AUTH_ALG = 120,
ERR_AUTH_ALG_ND = 121,
ERR_NOMEM = 122,
ERR_DISCARD = 123,
ERR_DISCARD_IMPOSSIBLE = 123,
ERR_DISK_CONFIGURED = 124,
ERR_NET_CONFIGURED = 125,
ERR_MANDATORY_TAG = 126,
@ -130,8 +144,8 @@ enum drbd_ret_code {
ERR_INTR = 129, /* EINTR */
ERR_RESIZE_RESYNC = 130,
ERR_NO_PRIMARY = 131,
ERR_SYNC_AFTER = 132,
ERR_SYNC_AFTER_CYCLE = 133,
ERR_RESYNC_AFTER = 132,
ERR_RESYNC_AFTER_CYCLE = 133,
ERR_PAUSE_IS_SET = 134,
ERR_PAUSE_IS_CLEAR = 135,
ERR_PACKET_NR = 137,
@ -155,6 +169,14 @@ enum drbd_ret_code {
ERR_CONG_NOT_PROTO_A = 155,
ERR_PIC_AFTER_DEP = 156,
ERR_PIC_PEER_DEP = 157,
ERR_RES_NOT_KNOWN = 158,
ERR_RES_IN_USE = 159,
ERR_MINOR_CONFIGURED = 160,
ERR_MINOR_EXISTS = 161,
ERR_INVALID_REQUEST = 162,
ERR_NEED_APV_100 = 163,
ERR_NEED_ALLOW_TWO_PRI = 164,
ERR_MD_UNCLEAN = 165,
/* insert new ones above this line */
AFTER_LAST_ERR_CODE
@ -296,7 +318,8 @@ enum drbd_state_rv {
SS_NOT_SUPPORTED = -17, /* drbd-8.2 only */
SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */
SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */
SS_AFTER_LAST_ERROR = -20, /* Keep this at bottom */
SS_O_VOL_PEER_PRI = -20,
SS_AFTER_LAST_ERROR = -21, /* Keep this at bottom */
};
/* from drbd_strings.c */
@ -313,7 +336,9 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv);
#define MDF_FULL_SYNC (1 << 3)
#define MDF_WAS_UP_TO_DATE (1 << 4)
#define MDF_PEER_OUT_DATED (1 << 5)
#define MDF_CRASHED_PRIMARY (1 << 6)
#define MDF_CRASHED_PRIMARY (1 << 6)
#define MDF_AL_CLEAN (1 << 7)
#define MDF_AL_DISABLED (1 << 8)
enum drbd_uuid_index {
UI_CURRENT,
@ -333,37 +358,23 @@ enum drbd_timeout_flag {
#define UUID_JUST_CREATED ((__u64)4)
/* magic numbers used in meta data and network packets */
#define DRBD_MAGIC 0x83740267
#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
#define DRBD_MAGIC_BIG 0x835a
#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
#define DRBD_MAGIC_100 0x8620ec20
#define DRBD_MD_MAGIC_07 (DRBD_MAGIC+3)
#define DRBD_MD_MAGIC_08 (DRBD_MAGIC+4)
#define DRBD_MD_MAGIC_84_UNCLEAN (DRBD_MAGIC+5)
/* how I came up with this magic?
* base64 decode "actlog==" ;) */
#define DRBD_AL_MAGIC 0x69cb65a2
/* these are of type "int" */
#define DRBD_MD_INDEX_INTERNAL -1
#define DRBD_MD_INDEX_FLEX_EXT -2
#define DRBD_MD_INDEX_FLEX_INT -3
/* Start of the new netlink/connector stuff */
#define DRBD_NL_CREATE_DEVICE 0x01
#define DRBD_NL_SET_DEFAULTS 0x02
/* For searching a vacant cn_idx value */
#define CN_IDX_STEP 6977
struct drbd_nl_cfg_req {
int packet_type;
unsigned int drbd_minor;
int flags;
unsigned short tag_list[];
};
struct drbd_nl_cfg_reply {
int packet_type;
unsigned int minor;
int ret_code; /* enum ret_code or set_st_err_t */
unsigned short tag_list[]; /* only used with get_* calls */
};
#endif

378
include/linux/drbd_genl.h Normal file
Просмотреть файл

@ -0,0 +1,378 @@
/*
* General overview:
* full generic netlink message:
* |nlmsghdr|genlmsghdr|<payload>
*
* payload:
* |optional fixed size family header|<sequence of netlink attributes>
*
* sequence of netlink attributes:
* I chose to have all "top level" attributes NLA_NESTED,
* corresponding to some real struct.
* So we have a sequence of |tla, len|<nested nla sequence>
*
* nested nla sequence:
* may be empty, or contain a sequence of netlink attributes
* representing the struct fields.
*
* The tag number of any field (regardless of containing struct)
* will be available as T_ ## field_name,
* so you cannot have the same field name in two differnt structs.
*
* The tag numbers themselves are per struct, though,
* so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type,
* which we won't use here).
* The tag numbers are used as index in the respective nla_policy array.
*
* GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy
* genl_magic_struct.h
* generates the struct declaration,
* generates an entry in the tla enum,
* genl_magic_func.h
* generates an entry in the static tla policy
* with .type = NLA_NESTED
* generates the static <struct_name>_nl_policy definition,
* and static conversion functions
*
* genl_magic_func.h
*
* GENL_mc_group(group)
* genl_magic_struct.h
* does nothing
* genl_magic_func.h
* defines and registers the mcast group,
* and provides a send helper
*
* GENL_notification(op_name, op_num, mcast_group, tla list)
* These are notifications to userspace.
*
* genl_magic_struct.h
* generates an entry in the genl_ops enum,
* genl_magic_func.h
* does nothing
*
* mcast group: the name of the mcast group this notification should be
* expected on
* tla list: the list of expected top level attributes,
* for documentation and sanity checking.
*
* GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations"
* These are requests from userspace.
*
* _op and _notification share the same "number space",
* op_nr will be assigned to "genlmsghdr->cmd"
*
* genl_magic_struct.h
* generates an entry in the genl_ops enum,
* genl_magic_func.h
* generates an entry in the static genl_ops array,
* and static register/unregister functions to
* genl_register_family_with_ops().
*
* flags and handler:
* GENL_op_init( .doit = x, .dumpit = y, .flags = something)
* GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM
* tla list: the list of expected top level attributes,
* for documentation and sanity checking.
*/
/*
* STRUCTS
*/
/* this is sent kernel -> userland on various error conditions, and contains
* informational textual info, which is supposedly human readable.
* The computer relevant return code is in the drbd_genlmsghdr.
*/
GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
/* "arbitrary" size strings, nla_policy.len = 0 */
__str_field(1, DRBD_GENLA_F_MANDATORY, info_text, 0)
)
/* Configuration requests typically need a context to operate on.
* Possible keys are device minor (fits in the drbd_genlmsghdr),
* the replication link (aka connection) name,
* and/or the replication group (aka resource) name,
* and the volume id within the resource. */
GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
__u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume)
__str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128)
__bin_field(3, DRBD_GENLA_F_MANDATORY, ctx_my_addr, 128)
__bin_field(4, DRBD_GENLA_F_MANDATORY, ctx_peer_addr, 128)
)
GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
__str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, backing_dev, 128)
__str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev, 128)
__s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev_idx)
/* use the resize command to try and change the disk_size */
__u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, disk_size)
/* we could change the max_bio_bvecs,
* but it won't propagate through the stack */
__u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, max_bio_bvecs)
__u32_field_def(6, DRBD_GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF)
__u32_field_def(7, DRBD_GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF)
__u32_field_def(8, DRBD_GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF)
__s32_field_def(9, DRBD_GENLA_F_MANDATORY, resync_after, DRBD_MINOR_NUMBER_DEF)
__u32_field_def(10, DRBD_GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF)
__u32_field_def(11, DRBD_GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
__u32_field_def(12, DRBD_GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF)
__u32_field_def(13, DRBD_GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF)
__u32_field_def(14, DRBD_GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF)
__u32_field_def(15, DRBD_GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF)
__flg_field_def(16, DRBD_GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF)
__flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF)
__flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF)
__flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF)
__u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF)
__u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF)
/* 9: __u32_field_def(22, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */
__flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF)
)
GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
__str_field_def(1, DRBD_GENLA_F_MANDATORY, cpu_mask, 32)
__u32_field_def(2, DRBD_GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF)
)
GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
__str_field_def(1, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
shared_secret, SHARED_SECRET_MAX)
__str_field_def(2, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX)
__str_field_def(3, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX)
__str_field_def(4, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX)
__str_field_def(5, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX)
__u32_field_def(6, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF)
__u32_field_def(7, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF)
__u32_field_def(8, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF)
__u32_field_def(9, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF)
__u32_field_def(10, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF)
__u32_field_def(11, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
__u32_field_def(12, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
__u32_field_def(13, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF)
__u32_field_def(14, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF)
__u32_field_def(15, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
__u32_field_def(16, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
__u32_field_def(17, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF)
__u32_field_def(18, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF)
__u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF)
__u32_field_def(20, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF)
__u32_field_def(21, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF)
__u32_field_def(22, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF)
__u32_field_def(23, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF)
__flg_field_def(24, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data)
__flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF)
__flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF)
__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative)
__flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF)
/* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */
)
GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
__flg_field(1, DRBD_GENLA_F_MANDATORY, assume_uptodate)
)
GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
__u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size)
__flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force)
__flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync)
)
GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
/* the reason of the broadcast,
* if this is an event triggered broadcast. */
__u32_field(1, DRBD_GENLA_F_MANDATORY, sib_reason)
__u32_field(2, DRBD_F_REQUIRED, current_state)
__u64_field(3, DRBD_GENLA_F_MANDATORY, capacity)
__u64_field(4, DRBD_GENLA_F_MANDATORY, ed_uuid)
/* These are for broadcast from after state change work.
* prev_state and new_state are from the moment the state change took
* place, new_state is not neccessarily the same as current_state,
* there may have been more state changes since. Which will be
* broadcasted soon, in their respective after state change work. */
__u32_field(5, DRBD_GENLA_F_MANDATORY, prev_state)
__u32_field(6, DRBD_GENLA_F_MANDATORY, new_state)
/* if we have a local disk: */
__bin_field(7, DRBD_GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64)))
__u32_field(8, DRBD_GENLA_F_MANDATORY, disk_flags)
__u64_field(9, DRBD_GENLA_F_MANDATORY, bits_total)
__u64_field(10, DRBD_GENLA_F_MANDATORY, bits_oos)
/* and in case resync or online verify is active */
__u64_field(11, DRBD_GENLA_F_MANDATORY, bits_rs_total)
__u64_field(12, DRBD_GENLA_F_MANDATORY, bits_rs_failed)
/* for pre and post notifications of helper execution */
__str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32)
__u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code)
__u64_field(15, 0, send_cnt)
__u64_field(16, 0, recv_cnt)
__u64_field(17, 0, read_cnt)
__u64_field(18, 0, writ_cnt)
__u64_field(19, 0, al_writ_cnt)
__u64_field(20, 0, bm_writ_cnt)
__u32_field(21, 0, ap_bio_cnt)
__u32_field(22, 0, ap_pending_cnt)
__u32_field(23, 0, rs_pending_cnt)
)
GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
__u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector)
__u64_field(2, DRBD_GENLA_F_MANDATORY, ov_stop_sector)
)
GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
__flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm)
)
GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
__u32_field(1, DRBD_F_REQUIRED, timeout_type)
)
GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
__flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect)
)
GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
__flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach)
)
/*
* Notifications and commands (genlmsghdr->cmd)
*/
GENL_mc_group(events)
/* kernel -> userspace announcement of changes */
GENL_notification(
DRBD_EVENT, 1, events,
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY)
GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY)
GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY)
)
/* query kernel for specific or all info */
GENL_op(
DRBD_ADM_GET_STATUS, 2,
GENL_op_init(
.doit = drbd_adm_get_status,
.dumpit = drbd_adm_get_status_all,
/* anyone may ask for the status,
* it is broadcasted anyways */
),
/* To select the object .doit.
* Or a subset of objects in .dumpit. */
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
)
/* add DRBD minor devices as volumes to resources */
GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
/* add or delete resources */
GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
GENL_doit(drbd_adm_resource_opts),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY)
)
GENL_op(
DRBD_ADM_CONNECT, 10,
GENL_doit(drbd_adm_connect),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
)
GENL_op(
DRBD_ADM_CHG_NET_OPTS, 29,
GENL_doit(drbd_adm_net_opts),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
)
GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_ATTACH, 12,
GENL_doit(drbd_adm_attach),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED)
)
GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28,
GENL_doit(drbd_adm_disk_opts),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED)
)
GENL_op(
DRBD_ADM_RESIZE, 13,
GENL_doit(drbd_adm_resize),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY)
)
GENL_op(
DRBD_ADM_PRIMARY, 14,
GENL_doit(drbd_adm_set_role),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
)
GENL_op(
DRBD_ADM_SECONDARY, 15,
GENL_doit(drbd_adm_set_role),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
)
GENL_op(
DRBD_ADM_NEW_C_UUID, 16,
GENL_doit(drbd_adm_new_c_uuid),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY)
)
GENL_op(
DRBD_ADM_START_OV, 17,
GENL_doit(drbd_adm_start_ov),
GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY)
)
GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY))
GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))

Просмотреть файл

@ -0,0 +1,55 @@
#ifndef DRBD_GENL_STRUCT_H
#define DRBD_GENL_STRUCT_H
/**
* struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests
* @minor:
* For admin requests (user -> kernel): which minor device to operate on.
* For (unicast) replies or informational (broadcast) messages
* (kernel -> user): which minor device the information is about.
* If we do not operate on minors, but on connections or resources,
* the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT
* is used instead.
* @flags: possible operation modifiers (relevant only for user->kernel):
* DRBD_GENL_F_SET_DEFAULTS
* @volume:
* When creating a new minor (adding it to a resource), the resource needs
* to know which volume number within the resource this is supposed to be.
* The volume number corresponds to the same volume number on the remote side,
* whereas the minor number on the remote side may be different
* (union with flags).
* @ret_code: kernel->userland unicast cfg reply return code (union with flags);
*/
struct drbd_genlmsghdr {
__u32 minor;
union {
__u32 flags;
__s32 ret_code;
};
};
/* To be used in drbd_genlmsghdr.flags */
enum {
DRBD_GENL_F_SET_DEFAULTS = 1,
};
enum drbd_state_info_bcast_reason {
SIB_GET_STATUS_REPLY = 1,
SIB_STATE_CHANGE = 2,
SIB_HELPER_PRE = 3,
SIB_HELPER_POST = 4,
SIB_SYNC_PROGRESS = 5,
};
/* hack around predefined gcc/cpp "linux=1",
* we cannot possibly include <1/drbd_genl.h> */
#undef linux
#include <linux/drbd.h>
#define GENL_MAGIC_VERSION API_VERSION
#define GENL_MAGIC_FAMILY drbd
#define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr)
#define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h>
#include <linux/genl_magic_struct.h>
#endif

Просмотреть файл

@ -16,29 +16,37 @@
#define DEBUG_RANGE_CHECK 0
#define DRBD_MINOR_COUNT_MIN 1
#define DRBD_MINOR_COUNT_MAX 256
#define DRBD_MINOR_COUNT_MAX 255
#define DRBD_MINOR_COUNT_DEF 32
#define DRBD_MINOR_COUNT_SCALE '1'
#define DRBD_VOLUME_MAX 65535
#define DRBD_DIALOG_REFRESH_MIN 0
#define DRBD_DIALOG_REFRESH_MAX 600
#define DRBD_DIALOG_REFRESH_SCALE '1'
/* valid port number */
#define DRBD_PORT_MIN 1
#define DRBD_PORT_MAX 0xffff
#define DRBD_PORT_SCALE '1'
/* startup { */
/* if you want more than 3.4 days, disable */
#define DRBD_WFC_TIMEOUT_MIN 0
#define DRBD_WFC_TIMEOUT_MAX 300000
#define DRBD_WFC_TIMEOUT_DEF 0
#define DRBD_WFC_TIMEOUT_SCALE '1'
#define DRBD_DEGR_WFC_TIMEOUT_MIN 0
#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
#define DRBD_DEGR_WFC_TIMEOUT_DEF 0
#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1'
#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1'
/* }*/
/* net { */
@ -47,75 +55,91 @@
#define DRBD_TIMEOUT_MIN 1
#define DRBD_TIMEOUT_MAX 600
#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */
#define DRBD_TIMEOUT_SCALE '1'
/* If backing disk takes longer than disk_timeout, mark the disk as failed */
#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */
#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */
#define DRBD_DISK_TIMEOUT_SCALE '1'
/* active connection retries when C_WF_CONNECTION */
#define DRBD_CONNECT_INT_MIN 1
#define DRBD_CONNECT_INT_MAX 120
#define DRBD_CONNECT_INT_DEF 10 /* seconds */
#define DRBD_CONNECT_INT_SCALE '1'
/* keep-alive probes when idle */
#define DRBD_PING_INT_MIN 1
#define DRBD_PING_INT_MAX 120
#define DRBD_PING_INT_DEF 10
#define DRBD_PING_INT_SCALE '1'
/* timeout for the ping packets.*/
#define DRBD_PING_TIMEO_MIN 1
#define DRBD_PING_TIMEO_MAX 300
#define DRBD_PING_TIMEO_DEF 5
#define DRBD_PING_TIMEO_SCALE '1'
/* max number of write requests between write barriers */
#define DRBD_MAX_EPOCH_SIZE_MIN 1
#define DRBD_MAX_EPOCH_SIZE_MAX 20000
#define DRBD_MAX_EPOCH_SIZE_DEF 2048
#define DRBD_MAX_EPOCH_SIZE_SCALE '1'
/* I don't think that a tcp send buffer of more than 10M is useful */
#define DRBD_SNDBUF_SIZE_MIN 0
#define DRBD_SNDBUF_SIZE_MAX (10<<20)
#define DRBD_SNDBUF_SIZE_DEF 0
#define DRBD_SNDBUF_SIZE_SCALE '1'
#define DRBD_RCVBUF_SIZE_MIN 0
#define DRBD_RCVBUF_SIZE_MAX (10<<20)
#define DRBD_RCVBUF_SIZE_DEF 0
#define DRBD_RCVBUF_SIZE_SCALE '1'
/* @4k PageSize -> 128kB - 512MB */
#define DRBD_MAX_BUFFERS_MIN 32
#define DRBD_MAX_BUFFERS_MAX 131072
#define DRBD_MAX_BUFFERS_DEF 2048
#define DRBD_MAX_BUFFERS_SCALE '1'
/* @4k PageSize -> 4kB - 512MB */
#define DRBD_UNPLUG_WATERMARK_MIN 1
#define DRBD_UNPLUG_WATERMARK_MAX 131072
#define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
#define DRBD_UNPLUG_WATERMARK_SCALE '1'
/* 0 is disabled.
* 200 should be more than enough even for very short timeouts */
#define DRBD_KO_COUNT_MIN 0
#define DRBD_KO_COUNT_MAX 200
#define DRBD_KO_COUNT_DEF 0
#define DRBD_KO_COUNT_DEF 7
#define DRBD_KO_COUNT_SCALE '1'
/* } */
/* syncer { */
/* FIXME allow rate to be zero? */
#define DRBD_RATE_MIN 1
#define DRBD_RESYNC_RATE_MIN 1
/* channel bonding 10 GbE, or other hardware */
#define DRBD_RATE_MAX (4 << 20)
#define DRBD_RATE_DEF 250 /* kb/second */
#define DRBD_RESYNC_RATE_MAX (4 << 20)
#define DRBD_RESYNC_RATE_DEF 250
#define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */
/* less than 7 would hit performance unnecessarily.
* 3833 is the largest prime that still does fit
* into 64 sectors of activity log */
* 919 slots context information per transaction,
* 32k activity log, 4k transaction size,
* one transaction in flight:
* 919 * 7 = 6433 */
#define DRBD_AL_EXTENTS_MIN 7
#define DRBD_AL_EXTENTS_MAX 3833
#define DRBD_AL_EXTENTS_DEF 127
#define DRBD_AL_EXTENTS_MAX 6433
#define DRBD_AL_EXTENTS_DEF 1237
#define DRBD_AL_EXTENTS_SCALE '1'
#define DRBD_AFTER_MIN -1
#define DRBD_AFTER_MAX 255
#define DRBD_AFTER_DEF -1
#define DRBD_MINOR_NUMBER_MIN -1
#define DRBD_MINOR_NUMBER_MAX ((1 << 20) - 1)
#define DRBD_MINOR_NUMBER_DEF -1
#define DRBD_MINOR_NUMBER_SCALE '1'
/* } */
@ -124,11 +148,12 @@
* the upper limit with 64bit kernel, enough ram and flexible meta data
* is 1 PiB, currently. */
/* DRBD_MAX_SECTORS */
#define DRBD_DISK_SIZE_SECT_MIN 0
#define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40))
#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */
#define DRBD_DISK_SIZE_MIN 0
#define DRBD_DISK_SIZE_MAX (1 * (2LLU << 40))
#define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */
#define DRBD_DISK_SIZE_SCALE 's' /* sectors */
#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
#define DRBD_ON_IO_ERROR_DEF EP_DETACH
#define DRBD_FENCING_DEF FP_DONT_CARE
#define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
#define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
@ -136,38 +161,59 @@
#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
#define DRBD_ON_CONGESTION_DEF OC_BLOCK
#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL
#define DRBD_MAX_BIO_BVECS_MIN 0
#define DRBD_MAX_BIO_BVECS_MAX 128
#define DRBD_MAX_BIO_BVECS_DEF 0
#define DRBD_MAX_BIO_BVECS_SCALE '1'
#define DRBD_C_PLAN_AHEAD_MIN 0
#define DRBD_C_PLAN_AHEAD_MAX 300
#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */
#define DRBD_C_PLAN_AHEAD_DEF 20
#define DRBD_C_PLAN_AHEAD_SCALE '1'
#define DRBD_C_DELAY_TARGET_MIN 1
#define DRBD_C_DELAY_TARGET_MAX 100
#define DRBD_C_DELAY_TARGET_DEF 10
#define DRBD_C_DELAY_TARGET_SCALE '1'
#define DRBD_C_FILL_TARGET_MIN 0
#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
#define DRBD_C_FILL_TARGET_SCALE 's' /* sectors */
#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */
#define DRBD_C_MAX_RATE_MIN 250
#define DRBD_C_MAX_RATE_MAX (4 << 20)
#define DRBD_C_MAX_RATE_DEF 102400
#define DRBD_C_MAX_RATE_SCALE 'k' /* kilobytes */
#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */
#define DRBD_C_MIN_RATE_MIN 0
#define DRBD_C_MIN_RATE_MAX (4 << 20)
#define DRBD_C_MIN_RATE_DEF 4096
#define DRBD_C_MIN_RATE_DEF 250
#define DRBD_C_MIN_RATE_SCALE 'k' /* kilobytes */
#define DRBD_CONG_FILL_MIN 0
#define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */
#define DRBD_CONG_FILL_DEF 0
#define DRBD_CONG_FILL_SCALE 's' /* sectors */
#define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN
#define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX
#define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF
#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE
#define DRBD_PROTOCOL_DEF DRBD_PROT_C
#define DRBD_DISK_BARRIER_DEF 0
#define DRBD_DISK_FLUSHES_DEF 1
#define DRBD_DISK_DRAIN_DEF 1
#define DRBD_MD_FLUSHES_DEF 1
#define DRBD_TCP_CORK_DEF 1
#define DRBD_AL_UPDATES_DEF 1
#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0
#define DRBD_ALWAYS_ASBP_DEF 0
#define DRBD_USE_RLE_DEF 1
#undef RANGE
#endif

Просмотреть файл

@ -1,164 +0,0 @@
/*
PAKET( name,
TYPE ( pn, pr, member )
...
)
You may never reissue one of the pn arguments
*/
#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
#endif
NL_PACKET(primary, 1,
NL_BIT( 1, T_MAY_IGNORE, primary_force)
)
NL_PACKET(secondary, 2, )
NL_PACKET(disk_conf, 3,
NL_INT64( 2, T_MAY_IGNORE, disk_size)
NL_STRING( 3, T_MANDATORY, backing_dev, 128)
NL_STRING( 4, T_MANDATORY, meta_dev, 128)
NL_INTEGER( 5, T_MANDATORY, meta_dev_idx)
NL_INTEGER( 6, T_MAY_IGNORE, on_io_error)
NL_INTEGER( 7, T_MAY_IGNORE, fencing)
NL_BIT( 37, T_MAY_IGNORE, use_bmbv)
NL_BIT( 53, T_MAY_IGNORE, no_disk_flush)
NL_BIT( 54, T_MAY_IGNORE, no_md_flush)
/* 55 max_bio_size was available in 8.2.6rc2 */
NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs)
NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier)
NL_BIT( 58, T_MAY_IGNORE, no_disk_drain)
NL_INTEGER( 89, T_MAY_IGNORE, disk_timeout)
)
NL_PACKET(detach, 4,
NL_BIT( 88, T_MANDATORY, detach_force)
)
NL_PACKET(net_conf, 5,
NL_STRING( 8, T_MANDATORY, my_addr, 128)
NL_STRING( 9, T_MANDATORY, peer_addr, 128)
NL_STRING( 10, T_MAY_IGNORE, shared_secret, SHARED_SECRET_MAX)
NL_STRING( 11, T_MAY_IGNORE, cram_hmac_alg, SHARED_SECRET_MAX)
NL_STRING( 44, T_MAY_IGNORE, integrity_alg, SHARED_SECRET_MAX)
NL_INTEGER( 14, T_MAY_IGNORE, timeout)
NL_INTEGER( 15, T_MANDATORY, wire_protocol)
NL_INTEGER( 16, T_MAY_IGNORE, try_connect_int)
NL_INTEGER( 17, T_MAY_IGNORE, ping_int)
NL_INTEGER( 18, T_MAY_IGNORE, max_epoch_size)
NL_INTEGER( 19, T_MAY_IGNORE, max_buffers)
NL_INTEGER( 20, T_MAY_IGNORE, unplug_watermark)
NL_INTEGER( 21, T_MAY_IGNORE, sndbuf_size)
NL_INTEGER( 22, T_MAY_IGNORE, ko_count)
NL_INTEGER( 24, T_MAY_IGNORE, after_sb_0p)
NL_INTEGER( 25, T_MAY_IGNORE, after_sb_1p)
NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p)
NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict)
NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo)
NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size)
NL_INTEGER( 81, T_MAY_IGNORE, on_congestion)
NL_INTEGER( 82, T_MAY_IGNORE, cong_fill)
NL_INTEGER( 83, T_MAY_IGNORE, cong_extents)
/* 59 addr_family was available in GIT, never released */
NL_BIT( 60, T_MANDATORY, mind_af)
NL_BIT( 27, T_MAY_IGNORE, want_lose)
NL_BIT( 28, T_MAY_IGNORE, two_primaries)
NL_BIT( 41, T_MAY_IGNORE, always_asbp)
NL_BIT( 61, T_MAY_IGNORE, no_cork)
NL_BIT( 62, T_MANDATORY, auto_sndbuf_size)
NL_BIT( 70, T_MANDATORY, dry_run)
)
NL_PACKET(disconnect, 6,
NL_BIT( 84, T_MAY_IGNORE, force)
)
NL_PACKET(resize, 7,
NL_INT64( 29, T_MAY_IGNORE, resize_size)
NL_BIT( 68, T_MAY_IGNORE, resize_force)
NL_BIT( 69, T_MANDATORY, no_resync)
)
NL_PACKET(syncer_conf, 8,
NL_INTEGER( 30, T_MAY_IGNORE, rate)
NL_INTEGER( 31, T_MAY_IGNORE, after)
NL_INTEGER( 32, T_MAY_IGNORE, al_extents)
/* NL_INTEGER( 71, T_MAY_IGNORE, dp_volume)
* NL_INTEGER( 72, T_MAY_IGNORE, dp_interval)
* NL_INTEGER( 73, T_MAY_IGNORE, throttle_th)
* NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th)
* feature will be reimplemented differently with 8.3.9 */
NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX)
NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32)
NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX)
NL_BIT( 65, T_MAY_IGNORE, use_rle)
NL_INTEGER( 75, T_MAY_IGNORE, on_no_data)
NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead)
NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target)
NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target)
NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate)
NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate)
)
NL_PACKET(invalidate, 9, )
NL_PACKET(invalidate_peer, 10, )
NL_PACKET(pause_sync, 11, )
NL_PACKET(resume_sync, 12, )
NL_PACKET(suspend_io, 13, )
NL_PACKET(resume_io, 14, )
NL_PACKET(outdate, 15, )
NL_PACKET(get_config, 16, )
NL_PACKET(get_state, 17,
NL_INTEGER( 33, T_MAY_IGNORE, state_i)
)
NL_PACKET(get_uuids, 18,
NL_STRING( 34, T_MAY_IGNORE, uuids, (UI_SIZE*sizeof(__u64)))
NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags)
)
NL_PACKET(get_timeout_flag, 19,
NL_BIT( 36, T_MAY_IGNORE, use_degraded)
)
NL_PACKET(call_helper, 20,
NL_STRING( 38, T_MAY_IGNORE, helper, 32)
)
/* Tag nr 42 already allocated in drbd-8.1 development. */
NL_PACKET(sync_progress, 23,
NL_INTEGER( 43, T_MAY_IGNORE, sync_progress)
)
NL_PACKET(dump_ee, 24,
NL_STRING( 45, T_MAY_IGNORE, dump_ee_reason, 32)
NL_STRING( 46, T_MAY_IGNORE, seen_digest, SHARED_SECRET_MAX)
NL_STRING( 47, T_MAY_IGNORE, calc_digest, SHARED_SECRET_MAX)
NL_INT64( 48, T_MAY_IGNORE, ee_sector)
NL_INT64( 49, T_MAY_IGNORE, ee_block_id)
NL_STRING( 50, T_MAY_IGNORE, ee_data, 32 << 10)
)
NL_PACKET(start_ov, 25,
NL_INT64( 66, T_MAY_IGNORE, start_sector)
NL_INT64( 90, T_MANDATORY, stop_sector)
)
NL_PACKET(new_c_uuid, 26,
NL_BIT( 63, T_MANDATORY, clear_bm)
)
#ifdef NL_RESPONSE
NL_RESPONSE(return_code_only, 27)
#endif
#undef NL_PACKET
#undef NL_INTEGER
#undef NL_INT64
#undef NL_BIT
#undef NL_STRING
#undef NL_RESPONSE

Просмотреть файл

@ -1,84 +0,0 @@
#ifndef DRBD_TAG_MAGIC_H
#define DRBD_TAG_MAGIC_H
#define TT_END 0
#define TT_REMOVED 0xE000
/* declare packet_type enums */
enum packet_types {
#define NL_PACKET(name, number, fields) P_ ## name = number,
#define NL_RESPONSE(name, number) P_ ## name = number,
#define NL_INTEGER(pn, pr, member)
#define NL_INT64(pn, pr, member)
#define NL_BIT(pn, pr, member)
#define NL_STRING(pn, pr, member, len)
#include <linux/drbd_nl.h>
P_nl_after_last_packet,
};
/* These struct are used to deduce the size of the tag lists: */
#define NL_PACKET(name, number, fields) \
struct name ## _tag_len_struct { fields };
#define NL_INTEGER(pn, pr, member) \
int member; int tag_and_len ## member;
#define NL_INT64(pn, pr, member) \
__u64 member; int tag_and_len ## member;
#define NL_BIT(pn, pr, member) \
unsigned char member:1; int tag_and_len ## member;
#define NL_STRING(pn, pr, member, len) \
unsigned char member[len]; int member ## _len; \
int tag_and_len ## member;
#include <linux/drbd_nl.h>
/* declare tag-list-sizes */
static const int tag_list_sizes[] = {
#define NL_PACKET(name, number, fields) 2 fields ,
#define NL_INTEGER(pn, pr, member) + 4 + 4
#define NL_INT64(pn, pr, member) + 4 + 8
#define NL_BIT(pn, pr, member) + 4 + 1
#define NL_STRING(pn, pr, member, len) + 4 + (len)
#include <linux/drbd_nl.h>
};
/* The two highest bits are used for the tag type */
#define TT_MASK 0xC000
#define TT_INTEGER 0x0000
#define TT_INT64 0x4000
#define TT_BIT 0x8000
#define TT_STRING 0xC000
/* The next bit indicates if processing of the tag is mandatory */
#define T_MANDATORY 0x2000
#define T_MAY_IGNORE 0x0000
#define TN_MASK 0x1fff
/* The remaining 13 bits are used to enumerate the tags */
#define tag_type(T) ((T) & TT_MASK)
#define tag_number(T) ((T) & TN_MASK)
/* declare tag enums */
#define NL_PACKET(name, number, fields) fields
enum drbd_tags {
#define NL_INTEGER(pn, pr, member) T_ ## member = pn | TT_INTEGER | pr ,
#define NL_INT64(pn, pr, member) T_ ## member = pn | TT_INT64 | pr ,
#define NL_BIT(pn, pr, member) T_ ## member = pn | TT_BIT | pr ,
#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING | pr ,
#include <linux/drbd_nl.h>
};
struct tag {
const char *name;
int type_n_flags;
int max_len;
};
/* declare tag names */
#define NL_PACKET(name, number, fields) fields
static const struct tag tag_descriptions[] = {
#define NL_INTEGER(pn, pr, member) [ pn ] = { #member, TT_INTEGER | pr, sizeof(int) },
#define NL_INT64(pn, pr, member) [ pn ] = { #member, TT_INT64 | pr, sizeof(__u64) },
#define NL_BIT(pn, pr, member) [ pn ] = { #member, TT_BIT | pr, sizeof(int) },
#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING | pr, (len) },
#include <linux/drbd_nl.h>
};
#endif

Просмотреть файл

@ -0,0 +1,422 @@
#ifndef GENL_MAGIC_FUNC_H
#define GENL_MAGIC_FUNC_H
#include <linux/genl_magic_struct.h>
/*
* Magic: declare tla policy {{{1
* Magic: declare nested policies
* {{{2
*/
#undef GENL_mc_group
#define GENL_mc_group(group)
#undef GENL_notification
#define GENL_notification(op_name, op_num, mcast_group, tla_list)
#undef GENL_op
#define GENL_op(op_name, op_num, handler, tla_list)
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
[tag_name] = { .type = NLA_NESTED },
static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = {
#include GENL_MAGIC_INCLUDE_FILE
};
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
static struct nla_policy s_name ## _nl_policy[] __read_mostly = \
{ s_fields };
#undef __field
#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, \
__put, __is_signed) \
[attr_nr] = { .type = nla_type },
#undef __array
#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \
__get, __put, __is_signed) \
[attr_nr] = { .type = nla_type, \
.len = maxlen - (nla_type == NLA_NUL_STRING) },
#include GENL_MAGIC_INCLUDE_FILE
#ifndef __KERNEL__
#ifndef pr_info
#define pr_info(args...) fprintf(stderr, args);
#endif
#endif
#ifdef GENL_MAGIC_DEBUG
static void dprint_field(const char *dir, int nla_type,
const char *name, void *valp)
{
__u64 val = valp ? *(__u32 *)valp : 1;
switch (nla_type) {
case NLA_U8: val = (__u8)val;
case NLA_U16: val = (__u16)val;
case NLA_U32: val = (__u32)val;
pr_info("%s attr %s: %d 0x%08x\n", dir,
name, (int)val, (unsigned)val);
break;
case NLA_U64:
val = *(__u64*)valp;
pr_info("%s attr %s: %lld 0x%08llx\n", dir,
name, (long long)val, (unsigned long long)val);
break;
case NLA_FLAG:
if (val)
pr_info("%s attr %s: set\n", dir, name);
break;
}
}
static void dprint_array(const char *dir, int nla_type,
const char *name, const char *val, unsigned len)
{
switch (nla_type) {
case NLA_NUL_STRING:
if (len && val[len-1] == '\0')
len--;
pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val);
break;
default:
/* we can always show 4 byte,
* thats what nlattr are aligned to. */
pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n",
dir, name, len, val[0], val[1], val[2], val[3]);
}
}
#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b);
/* Name is a member field name of the struct s.
* If s is NULL (only parsing, no copy requested in *_from_attrs()),
* nla is supposed to point to the attribute containing the information
* corresponding to that struct member. */
#define DPRINT_FIELD(dir, nla_type, name, s, nla) \
do { \
if (s) \
dprint_field(dir, nla_type, #name, &s->name); \
else if (nla) \
dprint_field(dir, nla_type, #name, \
(nla_type == NLA_FLAG) ? NULL \
: nla_data(nla)); \
} while (0)
#define DPRINT_ARRAY(dir, nla_type, name, s, nla) \
do { \
if (s) \
dprint_array(dir, nla_type, #name, \
s->name, s->name ## _len); \
else if (nla) \
dprint_array(dir, nla_type, #name, \
nla_data(nla), nla_len(nla)); \
} while (0)
#else
#define DPRINT_TLA(a, op, b) do {} while (0)
#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0)
#define DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0)
#endif
/*
* Magic: provide conversion functions {{{1
* populate struct from attribute table:
* {{{2
*/
/* processing of generic netlink messages is serialized.
* use one static buffer for parsing of nested attributes */
static struct nlattr *nested_attr_tb[128];
#ifndef BUILD_BUG_ON
/* Force a compilation error if condition is true */
#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
/* Force a compilation error if condition is true, but also produce a
result (of value 0 and type size_t), so the expression can be used
e.g. in a structure initializer (or where-ever else comma expressions
aren't permitted). */
#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
#endif
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
/* *_from_attrs functions are static, but potentially unused */ \
static int __ ## s_name ## _from_attrs(struct s_name *s, \
struct genl_info *info, bool exclude_invariants) \
{ \
const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \
struct nlattr *tla = info->attrs[tag_number]; \
struct nlattr **ntb = nested_attr_tb; \
struct nlattr *nla; \
int err; \
BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb)); \
if (!tla) \
return -ENOMSG; \
DPRINT_TLA(#s_name, "<=-", #tag_name); \
err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \
if (err) \
return err; \
\
s_fields \
return 0; \
} __attribute__((unused)) \
static int s_name ## _from_attrs(struct s_name *s, \
struct genl_info *info) \
{ \
return __ ## s_name ## _from_attrs(s, info, false); \
} __attribute__((unused)) \
static int s_name ## _from_attrs_for_change(struct s_name *s, \
struct genl_info *info) \
{ \
return __ ## s_name ## _from_attrs(s, info, true); \
} __attribute__((unused)) \
#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \
nla = ntb[attr_nr]; \
if (nla) { \
if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \
pr_info("<< must not change invariant attr: %s\n", #name); \
return -EEXIST; \
} \
assignment; \
} else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \
/* attribute missing from payload, */ \
/* which was expected */ \
} else if ((attr_flag) & DRBD_F_REQUIRED) { \
pr_info("<< missing attr: %s\n", #name); \
return -ENOMSG; \
}
#undef __field
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
__is_signed) \
__assign(attr_nr, attr_flag, name, nla_type, type, \
if (s) \
s->name = __get(nla); \
DPRINT_FIELD("<<", nla_type, name, s, nla))
/* validate_nla() already checked nla_len <= maxlen appropriately. */
#undef __array
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
__get, __put, __is_signed) \
__assign(attr_nr, attr_flag, name, nla_type, type, \
if (s) \
s->name ## _len = \
__get(s->name, nla, maxlen); \
DPRINT_ARRAY("<<", nla_type, name, s, nla))
#include GENL_MAGIC_INCLUDE_FILE
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields)
/*
* Magic: define op number to op name mapping {{{1
* {{{2
*/
const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
{
switch (cmd) {
#undef GENL_op
#define GENL_op(op_name, op_num, handler, tla_list) \
case op_num: return #op_name;
#include GENL_MAGIC_INCLUDE_FILE
default:
return "unknown";
}
}
#ifdef __KERNEL__
#include <linux/stringify.h>
/*
* Magic: define genl_ops {{{1
* {{{2
*/
#undef GENL_op
#define GENL_op(op_name, op_num, handler, tla_list) \
{ \
handler \
.cmd = op_name, \
.policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \
},
#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops)
static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
#include GENL_MAGIC_INCLUDE_FILE
};
#undef GENL_op
#define GENL_op(op_name, op_num, handler, tla_list)
/*
* Define the genl_family, multicast groups, {{{1
* and provide register/unregister functions.
* {{{2
*/
#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
static struct genl_family ZZZ_genl_family __read_mostly = {
.id = GENL_ID_GENERATE,
.name = __stringify(GENL_MAGIC_FAMILY),
.version = GENL_MAGIC_VERSION,
#ifdef GENL_MAGIC_FAMILY_HDRSZ
.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
#endif
.maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
};
/*
* Magic: define multicast groups
* Magic: define multicast group registration helper
*/
#undef GENL_mc_group
#define GENL_mc_group(group) \
static struct genl_multicast_group \
CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = { \
.name = #group, \
}; \
static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \
struct sk_buff *skb, gfp_t flags) \
{ \
unsigned int group_id = \
CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id; \
if (!group_id) \
return -EINVAL; \
return genlmsg_multicast(skb, 0, group_id, flags); \
}
#include GENL_MAGIC_INCLUDE_FILE
int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
{
int err = genl_register_family_with_ops(&ZZZ_genl_family,
ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops));
if (err)
return err;
#undef GENL_mc_group
#define GENL_mc_group(group) \
err = genl_register_mc_group(&ZZZ_genl_family, \
&CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group)); \
if (err) \
goto fail; \
else \
pr_info("%s: mcg %s: %u\n", #group, \
__stringify(GENL_MAGIC_FAMILY), \
CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id);
#include GENL_MAGIC_INCLUDE_FILE
#undef GENL_mc_group
#define GENL_mc_group(group)
return 0;
fail:
genl_unregister_family(&ZZZ_genl_family);
return err;
}
void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
{
genl_unregister_family(&ZZZ_genl_family);
}
/*
* Magic: provide conversion functions {{{1
* populate skb from struct.
* {{{2
*/
#undef GENL_op
#define GENL_op(op_name, op_num, handler, tla_list)
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s, \
const bool exclude_sensitive) \
{ \
struct nlattr *tla = nla_nest_start(skb, tag_number); \
if (!tla) \
goto nla_put_failure; \
DPRINT_TLA(#s_name, "-=>", #tag_name); \
s_fields \
nla_nest_end(skb, tla); \
return 0; \
\
nla_put_failure: \
if (tla) \
nla_nest_cancel(skb, tla); \
return -EMSGSIZE; \
} \
static inline int s_name ## _to_priv_skb(struct sk_buff *skb, \
struct s_name *s) \
{ \
return s_name ## _to_skb(skb, s, 0); \
} \
static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \
struct s_name *s) \
{ \
return s_name ## _to_skb(skb, s, 1); \
}
#undef __field
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
__is_signed) \
if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \
DPRINT_FIELD(">>", nla_type, name, s, NULL); \
if (__put(skb, attr_nr, s->name)) \
goto nla_put_failure; \
}
#undef __array
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
__get, __put, __is_signed) \
if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \
DPRINT_ARRAY(">>",nla_type, name, s, NULL); \
if (__put(skb, attr_nr, min_t(int, maxlen, \
s->name ## _len + (nla_type == NLA_NUL_STRING)),\
s->name)) \
goto nla_put_failure; \
}
#include GENL_MAGIC_INCLUDE_FILE
/* Functions for initializing structs to default values. */
#undef __field
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
__is_signed)
#undef __array
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
__get, __put, __is_signed)
#undef __u32_field_def
#define __u32_field_def(attr_nr, attr_flag, name, default) \
x->name = default;
#undef __s32_field_def
#define __s32_field_def(attr_nr, attr_flag, name, default) \
x->name = default;
#undef __flg_field_def
#define __flg_field_def(attr_nr, attr_flag, name, default) \
x->name = default;
#undef __str_field_def
#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
memset(x->name, 0, sizeof(x->name)); \
x->name ## _len = 0;
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \
static void set_ ## s_name ## _defaults(struct s_name *x) { \
s_fields \
}
#include GENL_MAGIC_INCLUDE_FILE
#endif /* __KERNEL__ */
/* }}}1 */
#endif /* GENL_MAGIC_FUNC_H */
/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */

Просмотреть файл

@ -0,0 +1,277 @@
#ifndef GENL_MAGIC_STRUCT_H
#define GENL_MAGIC_STRUCT_H
#ifndef GENL_MAGIC_FAMILY
# error "you need to define GENL_MAGIC_FAMILY before inclusion"
#endif
#ifndef GENL_MAGIC_VERSION
# error "you need to define GENL_MAGIC_VERSION before inclusion"
#endif
#ifndef GENL_MAGIC_INCLUDE_FILE
# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion"
#endif
#include <linux/genetlink.h>
#include <linux/types.h>
#define CONCAT__(a,b) a ## b
#define CONCAT_(a,b) CONCAT__(a,b)
extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void);
extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
/*
* Extension of genl attribute validation policies {{{2
*/
/*
* @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not
* know about. This flag can be set in nlattr->nla_type to indicate that this
* attribute must not be ignored.
*
* We check and remove this flag in drbd_nla_check_mandatory() before
* validating the attribute types and lengths via nla_parse_nested().
*/
#define DRBD_GENLA_F_MANDATORY (1 << 14)
/*
* Flags specific to drbd and not visible at the netlink layer, used in
* <struct>_from_attrs and <struct>_to_skb:
*
* @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is
* invalid.
*
* @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be
* included in unpriviledged get requests or broadcasts.
*
* @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but
* cannot subsequently be changed.
*/
#define DRBD_F_REQUIRED (1 << 0)
#define DRBD_F_SENSITIVE (1 << 1)
#define DRBD_F_INVARIANT (1 << 2)
#define __nla_type(x) ((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY))
/* }}}1
* MAGIC
* multi-include macro expansion magic starts here
*/
/* MAGIC helpers {{{2 */
/* possible field types */
#define __flg_field(attr_nr, attr_flag, name) \
__field(attr_nr, attr_flag, name, NLA_U8, char, \
nla_get_u8, nla_put_u8, false)
#define __u8_field(attr_nr, attr_flag, name) \
__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
nla_get_u8, nla_put_u8, false)
#define __u16_field(attr_nr, attr_flag, name) \
__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
nla_get_u16, nla_put_u16, false)
#define __u32_field(attr_nr, attr_flag, name) \
__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
nla_get_u32, nla_put_u32, false)
#define __s32_field(attr_nr, attr_flag, name) \
__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
nla_get_u32, nla_put_u32, true)
#define __u64_field(attr_nr, attr_flag, name) \
__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
nla_get_u64, nla_put_u64, false)
#define __str_field(attr_nr, attr_flag, name, maxlen) \
__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
nla_strlcpy, nla_put, false)
#define __bin_field(attr_nr, attr_flag, name, maxlen) \
__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
nla_memcpy, nla_put, false)
/* fields with default values */
#define __flg_field_def(attr_nr, attr_flag, name, default) \
__flg_field(attr_nr, attr_flag, name)
#define __u32_field_def(attr_nr, attr_flag, name, default) \
__u32_field(attr_nr, attr_flag, name)
#define __s32_field_def(attr_nr, attr_flag, name, default) \
__s32_field(attr_nr, attr_flag, name)
#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
__str_field(attr_nr, attr_flag, name, maxlen)
#define GENL_op_init(args...) args
#define GENL_doit(handler) \
.doit = handler, \
.flags = GENL_ADMIN_PERM,
#define GENL_dumpit(handler) \
.dumpit = handler, \
.flags = GENL_ADMIN_PERM,
/* }}}1
* Magic: define the enum symbols for genl_ops
* Magic: define the enum symbols for top level attributes
* Magic: define the enum symbols for nested attributes
* {{{2
*/
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields)
#undef GENL_mc_group
#define GENL_mc_group(group)
#undef GENL_notification
#define GENL_notification(op_name, op_num, mcast_group, tla_list) \
op_name = op_num,
#undef GENL_op
#define GENL_op(op_name, op_num, handler, tla_list) \
op_name = op_num,
enum {
#include GENL_MAGIC_INCLUDE_FILE
};
#undef GENL_notification
#define GENL_notification(op_name, op_num, mcast_group, tla_list)
#undef GENL_op
#define GENL_op(op_name, op_num, handler, attr_list)
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
tag_name = tag_number,
enum {
#include GENL_MAGIC_INCLUDE_FILE
};
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
enum { \
s_fields \
};
#undef __field
#define __field(attr_nr, attr_flag, name, nla_type, type, \
__get, __put, __is_signed) \
T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
#undef __array
#define __array(attr_nr, attr_flag, name, nla_type, type, \
maxlen, __get, __put, __is_signed) \
T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
#include GENL_MAGIC_INCLUDE_FILE
/* }}}1
* Magic: compile time assert unique numbers for operations
* Magic: -"- unique numbers for top level attributes
* Magic: -"- unique numbers for nested attributes
* {{{2
*/
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields)
#undef GENL_op
#define GENL_op(op_name, op_num, handler, attr_list) \
case op_name:
#undef GENL_notification
#define GENL_notification(op_name, op_num, mcast_group, tla_list) \
case op_name:
static inline void ct_assert_unique_operations(void)
{
switch (0) {
#include GENL_MAGIC_INCLUDE_FILE
;
}
}
#undef GENL_op
#define GENL_op(op_name, op_num, handler, attr_list)
#undef GENL_notification
#define GENL_notification(op_name, op_num, mcast_group, tla_list)
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
case tag_number:
static inline void ct_assert_unique_top_level_attributes(void)
{
switch (0) {
#include GENL_MAGIC_INCLUDE_FILE
;
}
}
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
static inline void ct_assert_unique_ ## s_name ## _attributes(void) \
{ \
switch (0) { \
s_fields \
; \
} \
}
#undef __field
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
__is_signed) \
case attr_nr:
#undef __array
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
__get, __put, __is_signed) \
case attr_nr:
#include GENL_MAGIC_INCLUDE_FILE
/* }}}1
* Magic: declare structs
* struct <name> {
* fields
* };
* {{{2
*/
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
struct s_name { s_fields };
#undef __field
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
__is_signed) \
type name;
#undef __array
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
__get, __put, __is_signed) \
type name[maxlen]; \
__u32 name ## _len;
#include GENL_MAGIC_INCLUDE_FILE
#undef GENL_struct
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
enum { \
s_fields \
};
#undef __field
#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \
is_signed) \
F_ ## name ## _IS_SIGNED = is_signed,
#undef __array
#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \
__get, __put, is_signed) \
F_ ## name ## _IS_SIGNED = is_signed,
#include GENL_MAGIC_INCLUDE_FILE
/* }}}1 */
#endif /* GENL_MAGIC_STRUCT_H */
/* vim: set foldmethod=marker nofoldenable : */

Просмотреть файл

@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id);
void __init idr_init_cache(void);
/**
* idr_for_each_entry - iterate over an idr's elements of a given type
* @idp: idr handle
* @entry: the type * to use as cursor
* @id: id entry's key
*/
#define idr_for_each_entry(idp, entry, id) \
for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
entry != NULL; \
++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
#endif /* __IDR_H__ */

Просмотреть файл

@ -166,9 +166,11 @@ struct lc_element {
/* if we want to track a larger set of objects,
* it needs to become arch independend u64 */
unsigned lc_number;
/* special label when on free list */
#define LC_FREE (~0U)
/* for pending changes */
unsigned lc_new_number;
};
struct lru_cache {
@ -176,6 +178,7 @@ struct lru_cache {
struct list_head lru;
struct list_head free;
struct list_head in_use;
struct list_head to_be_changed;
/* the pre-created kmem cache to allocate the objects from */
struct kmem_cache *lc_cache;
@ -186,7 +189,7 @@ struct lru_cache {
size_t element_off;
/* number of elements (indices) */
unsigned int nr_elements;
unsigned int nr_elements;
/* Arbitrary limit on maximum tracked objects. Practical limit is much
* lower due to allocation failures, probably. For typical use cases,
* nr_elements should be a few thousand at most.
@ -194,18 +197,19 @@ struct lru_cache {
* 8 high bits of .lc_index to be overloaded with flags in the future. */
#define LC_MAX_ACTIVE (1<<24)
/* allow to accumulate a few (index:label) changes,
* but no more than max_pending_changes */
unsigned int max_pending_changes;
/* number of elements currently on to_be_changed list */
unsigned int pending_changes;
/* statistics */
unsigned used; /* number of lelements currently on in_use list */
unsigned long hits, misses, starving, dirty, changed;
unsigned used; /* number of elements currently on in_use list */
unsigned long hits, misses, starving, locked, changed;
/* see below: flag-bits for lru_cache */
unsigned long flags;
/* when changing the label of an index element */
unsigned int new_number;
/* for paranoia when changing the label of an index element */
struct lc_element *changing_element;
void *lc_private;
const char *name;
@ -221,10 +225,15 @@ enum {
/* debugging aid, to catch concurrent access early.
* user needs to guarantee exclusive access by proper locking! */
__LC_PARANOIA,
/* if we need to change the set, but currently there is a changing
* transaction pending, we are "dirty", and must deferr further
* changing requests */
/* annotate that the set is "dirty", possibly accumulating further
* changes, until a transaction is finally triggered */
__LC_DIRTY,
/* Locked, no further changes allowed.
* Also used to serialize changing transactions. */
__LC_LOCKED,
/* if we need to change the set, but currently there is no free nor
* unused element available, we are "starving", and must not give out
* further references, to guarantee that eventually some refcnt will
@ -236,9 +245,11 @@ enum {
};
#define LC_PARANOIA (1<<__LC_PARANOIA)
#define LC_DIRTY (1<<__LC_DIRTY)
#define LC_LOCKED (1<<__LC_LOCKED)
#define LC_STARVING (1<<__LC_STARVING)
extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
unsigned max_pending_changes,
unsigned e_count, size_t e_size, size_t e_off);
extern void lc_reset(struct lru_cache *lc);
extern void lc_destroy(struct lru_cache *lc);
@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
extern void lc_committed(struct lru_cache *lc);
struct seq_file;
extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
@ -258,16 +269,28 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char
void (*detail) (struct seq_file *, struct lc_element *));
/**
* lc_try_lock - can be used to stop lc_get() from changing the tracked set
* lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set
* @lc: the lru cache to operate on
*
* Allows (expects) the set to be "dirty". Note that the reference counts and
* order on the active and lru lists may still change. Used to serialize
* changing transactions. Returns true if we aquired the lock.
*/
static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
{
return !test_and_set_bit(__LC_LOCKED, &lc->flags);
}
/**
* lc_try_lock - variant to stop lc_get() from changing the tracked set
* @lc: the lru cache to operate on
*
* Note that the reference counts and order on the active and lru lists may
* still change. Returns true if we acquired the lock.
* still change. Only works on a "clean" set. Returns true if we aquired the
* lock, which means there are no pending changes, and any further attempt to
* change the set will not succeed until the next lc_unlock().
*/
static inline int lc_try_lock(struct lru_cache *lc)
{
return !test_and_set_bit(__LC_DIRTY, &lc->flags);
}
extern int lc_try_lock(struct lru_cache *lc);
/**
* lc_unlock - unlock @lc, allow lc_get() to change the set again
@ -276,14 +299,10 @@ static inline int lc_try_lock(struct lru_cache *lc)
static inline void lc_unlock(struct lru_cache *lc)
{
clear_bit(__LC_DIRTY, &lc->flags);
smp_mb__after_clear_bit();
clear_bit_unlock(__LC_LOCKED, &lc->flags);
}
static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
{
struct lc_element *e = lc_find(lc, enr);
return e && e->refcnt;
}
extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
#define lc_entry(ptr, type, member) \
container_of(ptr, type, member)

Просмотреть файл

@ -44,8 +44,8 @@ MODULE_LICENSE("GPL");
} while (0)
#define RETURN(x...) do { \
clear_bit(__LC_PARANOIA, &lc->flags); \
smp_mb__after_clear_bit(); return x ; } while (0)
clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
return x ; } while (0)
/* BUG() if e is not one of the elements tracked by lc */
#define PARANOIA_LC_ELEMENT(lc, e) do { \
@ -55,9 +55,40 @@ MODULE_LICENSE("GPL");
BUG_ON(i >= lc_->nr_elements); \
BUG_ON(lc_->lc_element[i] != e_); } while (0)
/* We need to atomically
* - try to grab the lock (set LC_LOCKED)
* - only if there is no pending transaction
* (neither LC_DIRTY nor LC_STARVING is set)
* Because of PARANOIA_ENTRY() above abusing lc->flags as well,
* it is not sufficient to just say
* return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
*/
int lc_try_lock(struct lru_cache *lc)
{
unsigned long val;
do {
val = cmpxchg(&lc->flags, 0, LC_LOCKED);
} while (unlikely (val == LC_PARANOIA));
/* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
return 0 == val;
#if 0
/* Alternative approach, spin in case someone enters or leaves a
* PARANOIA_ENTRY()/RETURN() section. */
unsigned long old, new, val;
do {
old = lc->flags & LC_PARANOIA;
new = old | LC_LOCKED;
val = cmpxchg(&lc->flags, old, new);
} while (unlikely (val == (old ^ LC_PARANOIA)));
return old == val;
#endif
}
/**
* lc_create - prepares to track objects in an active set
* @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
* @max_pending_changes: maximum changes to accumulate until a transaction is required
* @e_count: number of elements allowed to be active simultaneously
* @e_size: size of the tracked objects
* @e_off: offset to the &struct lc_element member in a tracked object
@ -66,6 +97,7 @@ MODULE_LICENSE("GPL");
* or NULL on (allocation) failure.
*/
struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
unsigned max_pending_changes,
unsigned e_count, size_t e_size, size_t e_off)
{
struct hlist_head *slot = NULL;
@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
INIT_LIST_HEAD(&lc->in_use);
INIT_LIST_HEAD(&lc->lru);
INIT_LIST_HEAD(&lc->free);
INIT_LIST_HEAD(&lc->to_be_changed);
lc->name = name;
lc->element_size = e_size;
lc->element_off = e_off;
lc->nr_elements = e_count;
lc->new_number = LC_FREE;
lc->max_pending_changes = max_pending_changes;
lc->lc_cache = cache;
lc->lc_element = element;
lc->lc_slot = slot;
@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
e = p + e_off;
e->lc_index = i;
e->lc_number = LC_FREE;
e->lc_new_number = LC_FREE;
list_add(&e->list, &lc->free);
element[i] = e;
}
@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc)
INIT_LIST_HEAD(&lc->in_use);
INIT_LIST_HEAD(&lc->lru);
INIT_LIST_HEAD(&lc->free);
INIT_LIST_HEAD(&lc->to_be_changed);
lc->used = 0;
lc->hits = 0;
lc->misses = 0;
lc->starving = 0;
lc->dirty = 0;
lc->locked = 0;
lc->changed = 0;
lc->pending_changes = 0;
lc->flags = 0;
lc->changing_element = NULL;
lc->new_number = LC_FREE;
memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
for (i = 0; i < lc->nr_elements; i++) {
@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc)
/* re-init it */
e->lc_index = i;
e->lc_number = LC_FREE;
e->lc_new_number = LC_FREE;
list_add(&e->list, &lc->free);
}
}
@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
/* NOTE:
* total calls to lc_get are
* (starving + hits + misses)
* misses include "dirty" count (update from an other thread in
* misses include "locked" count (update from an other thread in
* progress) and "changed", when this in fact lead to an successful
* update of the cache.
*/
return seq_printf(seq, "\t%s: used:%u/%u "
"hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
"hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
lc->name, lc->used, lc->nr_elements,
lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
}
static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
@ -224,6 +259,27 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
}
static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
bool include_changing)
{
struct hlist_node *n;
struct lc_element *e;
BUG_ON(!lc);
BUG_ON(!lc->nr_elements);
hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
/* "about to be changed" elements, pending transaction commit,
* are hashed by their "new number". "Normal" elements have
* lc_number == lc_new_number. */
if (e->lc_new_number != enr)
continue;
if (e->lc_new_number == e->lc_number || include_changing)
return e;
break;
}
return NULL;
}
/**
* lc_find - find element by label, if present in the hash table
* @lc: The lru_cache object
@ -232,38 +288,28 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
* Returns the pointer to an element, if the element with the requested
* "label" or element number is present in the hash table,
* or NULL if not found. Does not change the refcnt.
* Ignores elements that are "about to be used", i.e. not yet in the active
* set, but still pending transaction commit.
*/
struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
{
struct hlist_node *n;
struct lc_element *e;
BUG_ON(!lc);
BUG_ON(!lc->nr_elements);
hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
if (e->lc_number == enr)
return e;
}
return NULL;
return __lc_find(lc, enr, 0);
}
/* returned element will be "recycled" immediately */
static struct lc_element *lc_evict(struct lru_cache *lc)
/**
* lc_is_used - find element by label
* @lc: The lru_cache object
* @enr: element number
*
* Returns true, if the element with the requested "label" or element number is
* present in the hash table, and is used (refcnt > 0).
* Also finds elements that are not _currently_ used but only "about to be
* used", i.e. on the "to_be_changed" list, pending transaction commit.
*/
bool lc_is_used(struct lru_cache *lc, unsigned int enr)
{
struct list_head *n;
struct lc_element *e;
if (list_empty(&lc->lru))
return NULL;
n = lc->lru.prev;
e = list_entry(n, struct lc_element, list);
PARANOIA_LC_ELEMENT(lc, e);
list_del(&e->list);
hlist_del(&e->colision);
return e;
struct lc_element *e = __lc_find(lc, enr, 1);
return e && e->refcnt;
}
/**
@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e)
PARANOIA_LC_ELEMENT(lc, e);
BUG_ON(e->refcnt);
e->lc_number = LC_FREE;
e->lc_number = e->lc_new_number = LC_FREE;
hlist_del_init(&e->colision);
list_move(&e->list, &lc->free);
RETURN();
}
static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
{
struct list_head *n;
struct lc_element *e;
if (list_empty(&lc->free))
return lc_evict(lc);
if (!list_empty(&lc->free))
n = lc->free.next;
else if (!list_empty(&lc->lru))
n = lc->lru.prev;
else
return NULL;
n = lc->free.next;
list_del(n);
return list_entry(n, struct lc_element, list);
e = list_entry(n, struct lc_element, list);
PARANOIA_LC_ELEMENT(lc, e);
e->lc_new_number = new_number;
if (!hlist_unhashed(&e->colision))
__hlist_del(&e->colision);
hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
list_move(&e->list, &lc->to_be_changed);
return e;
}
static int lc_unused_element_available(struct lru_cache *lc)
@ -308,6 +366,75 @@ static int lc_unused_element_available(struct lru_cache *lc)
return 0;
}
static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change)
{
struct lc_element *e;
PARANOIA_ENTRY();
if (lc->flags & LC_STARVING) {
++lc->starving;
RETURN(NULL);
}
e = __lc_find(lc, enr, 1);
/* if lc_new_number != lc_number,
* this enr is currently being pulled in already,
* and will be available once the pending transaction
* has been committed. */
if (e && e->lc_new_number == e->lc_number) {
++lc->hits;
if (e->refcnt++ == 0)
lc->used++;
list_move(&e->list, &lc->in_use); /* Not evictable... */
RETURN(e);
}
++lc->misses;
if (!may_change)
RETURN(NULL);
/* It has been found above, but on the "to_be_changed" list, not yet
* committed. Don't pull it in twice, wait for the transaction, then
* try again */
if (e)
RETURN(NULL);
/* To avoid races with lc_try_lock(), first, mark us dirty
* (using test_and_set_bit, as it implies memory barriers), ... */
test_and_set_bit(__LC_DIRTY, &lc->flags);
/* ... only then check if it is locked anyways. If lc_unlock clears
* the dirty bit again, that's not a problem, we will come here again.
*/
if (test_bit(__LC_LOCKED, &lc->flags)) {
++lc->locked;
RETURN(NULL);
}
/* In case there is nothing available and we can not kick out
* the LRU element, we have to wait ...
*/
if (!lc_unused_element_available(lc)) {
__set_bit(__LC_STARVING, &lc->flags);
RETURN(NULL);
}
/* It was not present in the active set. We are going to recycle an
* unused (or even "free") element, but we won't accumulate more than
* max_pending_changes changes. */
if (lc->pending_changes >= lc->max_pending_changes)
RETURN(NULL);
e = lc_prepare_for_change(lc, enr);
BUG_ON(!e);
clear_bit(__LC_STARVING, &lc->flags);
BUG_ON(++e->refcnt != 1);
lc->used++;
lc->pending_changes++;
RETURN(e);
}
/**
* lc_get - get element by label, maybe change the active set
@ -336,110 +463,65 @@ static int lc_unused_element_available(struct lru_cache *lc)
* pointer to an UNUSED element with some different element number,
* where that different number may also be %LC_FREE.
*
* In this case, the cache is marked %LC_DIRTY (blocking further changes),
* and the returned element pointer is removed from the lru list and
* hash collision chains. The user now should do whatever housekeeping
* is necessary.
* Then he must call lc_changed(lc,element_pointer), to finish
* the change.
* In this case, the cache is marked %LC_DIRTY,
* so lc_try_lock() will no longer succeed.
* The returned element pointer is moved to the "to_be_changed" list,
* and registered with the new element number on the hash collision chains,
* so it is possible to pick it up from lc_is_used().
* Up to "max_pending_changes" (see lc_create()) can be accumulated.
* The user now should do whatever housekeeping is necessary,
* typically serialize on lc_try_lock_for_transaction(), then call
* lc_committed(lc) and lc_unlock(), to finish the change.
*
* NOTE: The user needs to check the lc_number on EACH use, so he recognizes
* any cache set change.
*/
struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
{
struct lc_element *e;
PARANOIA_ENTRY();
if (lc->flags & LC_STARVING) {
++lc->starving;
RETURN(NULL);
}
e = lc_find(lc, enr);
if (e) {
++lc->hits;
if (e->refcnt++ == 0)
lc->used++;
list_move(&e->list, &lc->in_use); /* Not evictable... */
RETURN(e);
}
++lc->misses;
/* In case there is nothing available and we can not kick out
* the LRU element, we have to wait ...
*/
if (!lc_unused_element_available(lc)) {
__set_bit(__LC_STARVING, &lc->flags);
RETURN(NULL);
}
/* it was not present in the active set.
* we are going to recycle an unused (or even "free") element.
* user may need to commit a transaction to record that change.
* we serialize on flags & TF_DIRTY */
if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
++lc->dirty;
RETURN(NULL);
}
e = lc_get_unused_element(lc);
BUG_ON(!e);
clear_bit(__LC_STARVING, &lc->flags);
BUG_ON(++e->refcnt != 1);
lc->used++;
lc->changing_element = e;
lc->new_number = enr;
RETURN(e);
}
/* similar to lc_get,
* but only gets a new reference on an existing element.
* you either get the requested element, or NULL.
* will be consolidated into one function.
*/
struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
{
struct lc_element *e;
PARANOIA_ENTRY();
if (lc->flags & LC_STARVING) {
++lc->starving;
RETURN(NULL);
}
e = lc_find(lc, enr);
if (e) {
++lc->hits;
if (e->refcnt++ == 0)
lc->used++;
list_move(&e->list, &lc->in_use); /* Not evictable... */
}
RETURN(e);
return __lc_get(lc, enr, 1);
}
/**
* lc_changed - tell @lc that the change has been recorded
* lc_try_get - get element by label, if present; do not change the active set
* @lc: the lru cache to operate on
* @e: the element pending label change
* @enr: the label to look up
*
* Finds an element in the cache, increases its usage count,
* "touches" and returns it.
*
* Return values:
* NULL
* The cache was marked %LC_STARVING,
* or the requested label was not in the active set
*
* pointer to the element with the REQUESTED element number.
* In this case, it can be used right away
*/
void lc_changed(struct lru_cache *lc, struct lc_element *e)
struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
{
return __lc_get(lc, enr, 0);
}
/**
* lc_committed - tell @lc that pending changes have been recorded
* @lc: the lru cache to operate on
*
* User is expected to serialize on explicit lc_try_lock_for_transaction()
* before the transaction is started, and later needs to lc_unlock() explicitly
* as well.
*/
void lc_committed(struct lru_cache *lc)
{
struct lc_element *e, *tmp;
PARANOIA_ENTRY();
BUG_ON(e != lc->changing_element);
PARANOIA_LC_ELEMENT(lc, e);
++lc->changed;
e->lc_number = lc->new_number;
list_add(&e->list, &lc->in_use);
hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
lc->changing_element = NULL;
lc->new_number = LC_FREE;
clear_bit(__LC_DIRTY, &lc->flags);
smp_mb__after_clear_bit();
list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
/* count number of changes, not number of transactions */
++lc->changed;
e->lc_number = e->lc_new_number;
list_move(&e->list, &lc->in_use);
}
lc->pending_changes = 0;
RETURN();
}
@ -458,13 +540,12 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
PARANOIA_ENTRY();
PARANOIA_LC_ELEMENT(lc, e);
BUG_ON(e->refcnt == 0);
BUG_ON(e == lc->changing_element);
BUG_ON(e->lc_number != e->lc_new_number);
if (--e->refcnt == 0) {
/* move it to the front of LRU. */
list_move(&e->list, &lc->lru);
lc->used--;
clear_bit(__LC_STARVING, &lc->flags);
smp_mb__after_clear_bit();
clear_bit_unlock(__LC_STARVING, &lc->flags);
}
RETURN(e->refcnt);
}
@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
void lc_set(struct lru_cache *lc, unsigned int enr, int index)
{
struct lc_element *e;
struct list_head *lh;
if (index < 0 || index >= lc->nr_elements)
return;
e = lc_element_by_index(lc, index);
e->lc_number = enr;
BUG_ON(e->lc_number != e->lc_new_number);
BUG_ON(e->refcnt != 0);
e->lc_number = e->lc_new_number = enr;
hlist_del_init(&e->colision);
hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
if (enr == LC_FREE)
lh = &lc->free;
else {
hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
lh = &lc->lru;
}
list_move(&e->list, lh);
}
/**
@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get);
EXPORT_SYMBOL(lc_find);
EXPORT_SYMBOL(lc_get);
EXPORT_SYMBOL(lc_put);
EXPORT_SYMBOL(lc_changed);
EXPORT_SYMBOL(lc_committed);
EXPORT_SYMBOL(lc_element_by_index);
EXPORT_SYMBOL(lc_index_of);
EXPORT_SYMBOL(lc_seq_printf_stats);
EXPORT_SYMBOL(lc_seq_dump_details);
EXPORT_SYMBOL(lc_try_lock);
EXPORT_SYMBOL(lc_is_used);