direct-io: inline the complete submission path
Add inlines to all the submission path functions. While this increases code size it also gives gcc a lot of optimization opportunities in this critical hotpath. In particular -- together with some other changes -- this allows gcc to get rid of the unnecessary clearing of sdio at the beginning and optimize the messy parameter passing. Any non inlining of a function which takes a sdio parameter would break this optimization because they cannot be done if the address of a structure is taken. Note that benefits are only seen with CONFIG_OPTIMIZE_INLINING and CONFIG_CC_OPTIMIZE_FOR_SIZE both set to off. This gives about 2.2% improvement on a large database benchmark with a high IOPS rate. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
Родитель
18772641db
Коммит
ba253fbf6d
|
@ -199,7 +199,7 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio)
|
||||||
/*
|
/*
|
||||||
* Go grab and pin some userspace pages. Typically we'll get 64 at a time.
|
* Go grab and pin some userspace pages. Typically we'll get 64 at a time.
|
||||||
*/
|
*/
|
||||||
static int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
|
static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
int nr_pages;
|
int nr_pages;
|
||||||
|
@ -245,7 +245,8 @@ out:
|
||||||
* decent number of pages, less frequently. To provide nicer use of the
|
* decent number of pages, less frequently. To provide nicer use of the
|
||||||
* L1 cache.
|
* L1 cache.
|
||||||
*/
|
*/
|
||||||
static struct page *dio_get_page(struct dio *dio, struct dio_submit *sdio)
|
static inline struct page *dio_get_page(struct dio *dio,
|
||||||
|
struct dio_submit *sdio)
|
||||||
{
|
{
|
||||||
if (dio_pages_present(sdio) == 0) {
|
if (dio_pages_present(sdio) == 0) {
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -376,7 +377,7 @@ void dio_end_io(struct bio *bio, int error)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(dio_end_io);
|
EXPORT_SYMBOL_GPL(dio_end_io);
|
||||||
|
|
||||||
static void
|
static inline void
|
||||||
dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
|
dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
|
||||||
struct block_device *bdev,
|
struct block_device *bdev,
|
||||||
sector_t first_sector, int nr_vecs)
|
sector_t first_sector, int nr_vecs)
|
||||||
|
@ -407,7 +408,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
|
||||||
*
|
*
|
||||||
* bios hold a dio reference between submit_bio and ->end_io.
|
* bios hold a dio reference between submit_bio and ->end_io.
|
||||||
*/
|
*/
|
||||||
static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
|
static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
|
||||||
{
|
{
|
||||||
struct bio *bio = sdio->bio;
|
struct bio *bio = sdio->bio;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
@ -435,7 +436,7 @@ static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
|
||||||
/*
|
/*
|
||||||
* Release any resources in case of a failure
|
* Release any resources in case of a failure
|
||||||
*/
|
*/
|
||||||
static void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
|
static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
|
||||||
{
|
{
|
||||||
while (dio_pages_present(sdio))
|
while (dio_pages_present(sdio))
|
||||||
page_cache_release(dio_get_page(dio, sdio));
|
page_cache_release(dio_get_page(dio, sdio));
|
||||||
|
@ -528,7 +529,7 @@ static void dio_await_completion(struct dio *dio)
|
||||||
*
|
*
|
||||||
* This also helps to limit the peak amount of pinned userspace memory.
|
* This also helps to limit the peak amount of pinned userspace memory.
|
||||||
*/
|
*/
|
||||||
static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
|
static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
@ -631,8 +632,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
|
||||||
/*
|
/*
|
||||||
* There is no bio. Make one now.
|
* There is no bio. Make one now.
|
||||||
*/
|
*/
|
||||||
static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
|
static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
|
||||||
sector_t start_sector, struct buffer_head *map_bh)
|
sector_t start_sector, struct buffer_head *map_bh)
|
||||||
{
|
{
|
||||||
sector_t sector;
|
sector_t sector;
|
||||||
int ret, nr_pages;
|
int ret, nr_pages;
|
||||||
|
@ -657,7 +658,7 @@ out:
|
||||||
*
|
*
|
||||||
* Return zero on success. Non-zero means the caller needs to start a new BIO.
|
* Return zero on success. Non-zero means the caller needs to start a new BIO.
|
||||||
*/
|
*/
|
||||||
static int dio_bio_add_page(struct dio_submit *sdio)
|
static inline int dio_bio_add_page(struct dio_submit *sdio)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -689,8 +690,8 @@ static int dio_bio_add_page(struct dio_submit *sdio)
|
||||||
* The caller of this function is responsible for removing cur_page from the
|
* The caller of this function is responsible for removing cur_page from the
|
||||||
* dio, and for dropping the refcount which came from that presence.
|
* dio, and for dropping the refcount which came from that presence.
|
||||||
*/
|
*/
|
||||||
static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
|
static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
|
||||||
struct buffer_head *map_bh)
|
struct buffer_head *map_bh)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
@ -759,7 +760,7 @@ out:
|
||||||
* If that doesn't work out then we put the old page into the bio and add this
|
* If that doesn't work out then we put the old page into the bio and add this
|
||||||
* page to the dio instead.
|
* page to the dio instead.
|
||||||
*/
|
*/
|
||||||
static int
|
static inline int
|
||||||
submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
|
submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
|
||||||
unsigned offset, unsigned len, sector_t blocknr,
|
unsigned offset, unsigned len, sector_t blocknr,
|
||||||
struct buffer_head *map_bh)
|
struct buffer_head *map_bh)
|
||||||
|
@ -842,8 +843,8 @@ static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
|
||||||
* `end' is zero if we're doing the start of the IO, 1 at the end of the
|
* `end' is zero if we're doing the start of the IO, 1 at the end of the
|
||||||
* IO.
|
* IO.
|
||||||
*/
|
*/
|
||||||
static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end,
|
static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
|
||||||
struct buffer_head *map_bh)
|
int end, struct buffer_head *map_bh)
|
||||||
{
|
{
|
||||||
unsigned dio_blocks_per_fs_block;
|
unsigned dio_blocks_per_fs_block;
|
||||||
unsigned this_chunk_blocks; /* In dio_blocks */
|
unsigned this_chunk_blocks; /* In dio_blocks */
|
||||||
|
@ -1042,7 +1043,7 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t
|
static inline ssize_t
|
||||||
direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
|
direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
|
||||||
const struct iovec *iov, loff_t offset, unsigned long nr_segs,
|
const struct iovec *iov, loff_t offset, unsigned long nr_segs,
|
||||||
unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
|
unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
|
||||||
|
@ -1216,6 +1217,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
|
||||||
* expected that filesystem provide exclusion between new direct I/O
|
* expected that filesystem provide exclusion between new direct I/O
|
||||||
* and truncates. For DIO_LOCKING filesystems this is done by i_mutex,
|
* and truncates. For DIO_LOCKING filesystems this is done by i_mutex,
|
||||||
* but other filesystems need to take care of this on their own.
|
* but other filesystems need to take care of this on their own.
|
||||||
|
*
|
||||||
|
* NOTE: if you pass "sdio" to anything by pointer make sure that function
|
||||||
|
* is always inlined. Otherwise gcc is unable to split the structure into
|
||||||
|
* individual fields and will generate much worse code. This is important
|
||||||
|
* for the whole file.
|
||||||
*/
|
*/
|
||||||
ssize_t
|
ssize_t
|
||||||
__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
|
__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
|
||||||
|
|
Загрузка…
Ссылка в новой задаче