From e65f30e0cb29694c4241bd9c96ea9413938fcec5 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 4 Feb 2016 10:24:52 +0100 Subject: [PATCH 01/52] s390: hypfs: Move diag implementation and data definitions Diag 204 data and function definitions currently live in the hypfs files. As KVM will be a consumer of this data, we need to make it publicly available and move it to the appropriate diag.{c,h} files. __attribute__ ((packed)) occurences were replaced with __packed for all moved structs. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Acked-by: Michael Holzheu Signed-off-by: Christian Borntraeger --- arch/s390/hypfs/hypfs_diag.c | 361 +++++++++++------------------------ arch/s390/include/asm/diag.h | 127 ++++++++++++ arch/s390/kernel/diag.c | 22 +++ 3 files changed, 256 insertions(+), 254 deletions(-) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 045035796ca7..1e28414d7275 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -19,29 +19,10 @@ #include #include "hypfs.h" -#define LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */ -#define CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */ #define TMP_SIZE 64 /* size of temporary buffers */ #define DBFS_D204_HDR_VERSION 0 -/* diag 204 subcodes */ -enum diag204_sc { - SUBC_STIB4 = 4, - SUBC_RSI = 5, - SUBC_STIB6 = 6, - SUBC_STIB7 = 7 -}; - -/* The two available diag 204 data formats */ -enum diag204_format { - INFO_SIMPLE = 0, - INFO_EXT = 0x00010000 -}; - -/* bit is set in flags, when physical cpu info is included in diag 204 data */ -#define LPAR_PHYS_FLG 0x80 - static char *diag224_cpu_names; /* diag 224 name table */ static enum diag204_sc diag204_store_sc; /* used subcode for store */ static enum diag204_format diag204_info_type; /* used diag 204 data format */ @@ -53,7 +34,7 @@ static int diag204_buf_pages; /* number of pages for diag204 data */ static struct dentry *dbfs_d204_file; /* - * DIAG 204 data structures and member access functions. + * DIAG 204 member access functions. * * Since we have two different diag 204 data formats for old and new s390 * machines, we do not access the structs directly, but use getter functions for @@ -62,302 +43,173 @@ static struct dentry *dbfs_d204_file; /* Time information block */ -struct info_blk_hdr { - __u8 npar; - __u8 flags; - __u16 tslice; - __u16 phys_cpus; - __u16 this_part; - __u64 curtod; -} __attribute__ ((packed)); - -struct x_info_blk_hdr { - __u8 npar; - __u8 flags; - __u16 tslice; - __u16 phys_cpus; - __u16 this_part; - __u64 curtod1; - __u64 curtod2; - char reserved[40]; -} __attribute__ ((packed)); - static inline int info_blk_hdr__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct info_blk_hdr); - else /* INFO_EXT */ - return sizeof(struct x_info_blk_hdr); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_info_blk_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_info_blk_hdr); } static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct info_blk_hdr *)hdr)->npar; - else /* INFO_EXT */ - return ((struct x_info_blk_hdr *)hdr)->npar; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->npar; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->npar; } static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct info_blk_hdr *)hdr)->flags; - else /* INFO_EXT */ - return ((struct x_info_blk_hdr *)hdr)->flags; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->flags; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->flags; } static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct info_blk_hdr *)hdr)->phys_cpus; - else /* INFO_EXT */ - return ((struct x_info_blk_hdr *)hdr)->phys_cpus; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->phys_cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->phys_cpus; } /* Partition header */ -struct part_hdr { - __u8 pn; - __u8 cpus; - char reserved[6]; - char part_name[LPAR_NAME_LEN]; -} __attribute__ ((packed)); - -struct x_part_hdr { - __u8 pn; - __u8 cpus; - __u8 rcpus; - __u8 pflag; - __u32 mlu; - char part_name[LPAR_NAME_LEN]; - char lpc_name[8]; - char os_name[8]; - __u64 online_cs; - __u64 online_es; - __u8 upid; - char reserved1[3]; - __u32 group_mlu; - char group_name[8]; - char reserved2[32]; -} __attribute__ ((packed)); - static inline int part_hdr__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct part_hdr); - else /* INFO_EXT */ - return sizeof(struct x_part_hdr); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_part_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_part_hdr); } static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct part_hdr *)hdr)->cpus; - else /* INFO_EXT */ - return ((struct x_part_hdr *)hdr)->rcpus; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_part_hdr *)hdr)->cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_part_hdr *)hdr)->rcpus; } static inline void part_hdr__part_name(enum diag204_format type, void *hdr, char *name) { - if (type == INFO_SIMPLE) - memcpy(name, ((struct part_hdr *)hdr)->part_name, - LPAR_NAME_LEN); - else /* INFO_EXT */ - memcpy(name, ((struct x_part_hdr *)hdr)->part_name, - LPAR_NAME_LEN); - EBCASC(name, LPAR_NAME_LEN); - name[LPAR_NAME_LEN] = 0; + if (type == DIAG204_INFO_SIMPLE) + memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name, + DIAG204_LPAR_NAME_LEN); + else /* DIAG204_INFO_EXT */ + memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name, + DIAG204_LPAR_NAME_LEN); + EBCASC(name, DIAG204_LPAR_NAME_LEN); + name[DIAG204_LPAR_NAME_LEN] = 0; strim(name); } -struct cpu_info { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - __u8 cflag; - __u16 weight; - __u64 acc_time; - __u64 lp_time; -} __attribute__ ((packed)); - -struct x_cpu_info { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - __u8 cflag; - __u16 weight; - __u64 acc_time; - __u64 lp_time; - __u16 min_weight; - __u16 cur_weight; - __u16 max_weight; - char reseved2[2]; - __u64 online_time; - __u64 wait_time; - __u32 pma_weight; - __u32 polar_weight; - char reserved3[40]; -} __attribute__ ((packed)); - /* CPU info block */ static inline int cpu_info__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct cpu_info); - else /* INFO_EXT */ - return sizeof(struct x_cpu_info); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_cpu_info); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_cpu_info); } static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->ctidx; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->ctidx; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->ctidx; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->ctidx; } static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->cpu_addr; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->cpu_addr; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->cpu_addr; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->cpu_addr; } static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->acc_time; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->acc_time; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->acc_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->acc_time; } static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->lp_time; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->lp_time; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->lp_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->lp_time; } static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) + if (type == DIAG204_INFO_SIMPLE) return 0; /* online_time not available in simple info */ - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->online_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->online_time; } /* Physical header */ -struct phys_hdr { - char reserved1[1]; - __u8 cpus; - char reserved2[6]; - char mgm_name[8]; -} __attribute__ ((packed)); - -struct x_phys_hdr { - char reserved1[1]; - __u8 cpus; - char reserved2[6]; - char mgm_name[8]; - char reserved3[80]; -} __attribute__ ((packed)); - static inline int phys_hdr__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct phys_hdr); - else /* INFO_EXT */ - return sizeof(struct x_phys_hdr); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_phys_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_phys_hdr); } static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_hdr *)hdr)->cpus; - else /* INFO_EXT */ - return ((struct x_phys_hdr *)hdr)->cpus; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_hdr *)hdr)->cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_hdr *)hdr)->cpus; } /* Physical CPU info block */ -struct phys_cpu { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - char reserved2[3]; - __u64 mgm_time; - char reserved3[8]; -} __attribute__ ((packed)); - -struct x_phys_cpu { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - char reserved2[3]; - __u64 mgm_time; - char reserved3[80]; -} __attribute__ ((packed)); - static inline int phys_cpu__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct phys_cpu); - else /* INFO_EXT */ - return sizeof(struct x_phys_cpu); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_phys_cpu); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_phys_cpu); } static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_cpu *)hdr)->cpu_addr; - else /* INFO_EXT */ - return ((struct x_phys_cpu *)hdr)->cpu_addr; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->cpu_addr; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr; } static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_cpu *)hdr)->mgm_time; - else /* INFO_EXT */ - return ((struct x_phys_cpu *)hdr)->mgm_time; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->mgm_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->mgm_time; } static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_cpu *)hdr)->ctidx; - else /* INFO_EXT */ - return ((struct x_phys_cpu *)hdr)->ctidx; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->ctidx; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->ctidx; } /* Diagnose 204 functions */ - -static inline int __diag204(unsigned long subcode, unsigned long size, void *addr) -{ - register unsigned long _subcode asm("0") = subcode; - register unsigned long _size asm("1") = size; - - asm volatile( - " diag %2,%0,0x204\n" - "0:\n" - EX_TABLE(0b,0b) - : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); - if (_subcode) - return -1; - return _size; -} - -static int diag204(unsigned long subcode, unsigned long size, void *addr) -{ - diag_stat_inc(DIAG_STAT_X204); - return __diag204(subcode, size, addr); -} - /* * For the old diag subcode 4 with simple data format we have to use real * memory. If we use subcode 6 or 7 with extended data format, we can (and @@ -409,12 +261,12 @@ static void *diag204_get_buffer(enum diag204_format fmt, int *pages) *pages = diag204_buf_pages; return diag204_buf; } - if (fmt == INFO_SIMPLE) { + if (fmt == DIAG204_INFO_SIMPLE) { *pages = 1; return diag204_alloc_rbuf(); - } else {/* INFO_EXT */ - *pages = diag204((unsigned long)SUBC_RSI | - (unsigned long)INFO_EXT, 0, NULL); + } else {/* DIAG204_INFO_EXT */ + *pages = diag204((unsigned long)DIAG204_SUBC_RSI | + (unsigned long)DIAG204_INFO_EXT, 0, NULL); if (*pages <= 0) return ERR_PTR(-ENOSYS); else @@ -441,18 +293,18 @@ static int diag204_probe(void) void *buf; int pages, rc; - buf = diag204_get_buffer(INFO_EXT, &pages); + buf = diag204_get_buffer(DIAG204_INFO_EXT, &pages); if (!IS_ERR(buf)) { - if (diag204((unsigned long)SUBC_STIB7 | - (unsigned long)INFO_EXT, pages, buf) >= 0) { - diag204_store_sc = SUBC_STIB7; - diag204_info_type = INFO_EXT; + if (diag204((unsigned long)DIAG204_SUBC_STIB7 | + (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = DIAG204_SUBC_STIB7; + diag204_info_type = DIAG204_INFO_EXT; goto out; } - if (diag204((unsigned long)SUBC_STIB6 | - (unsigned long)INFO_EXT, pages, buf) >= 0) { - diag204_store_sc = SUBC_STIB6; - diag204_info_type = INFO_EXT; + if (diag204((unsigned long)DIAG204_SUBC_STIB6 | + (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = DIAG204_SUBC_STIB6; + diag204_info_type = DIAG204_INFO_EXT; goto out; } diag204_free_buffer(); @@ -460,15 +312,15 @@ static int diag204_probe(void) /* subcodes 6 and 7 failed, now try subcode 4 */ - buf = diag204_get_buffer(INFO_SIMPLE, &pages); + buf = diag204_get_buffer(DIAG204_INFO_SIMPLE, &pages); if (IS_ERR(buf)) { rc = PTR_ERR(buf); goto fail_alloc; } - if (diag204((unsigned long)SUBC_STIB4 | - (unsigned long)INFO_SIMPLE, pages, buf) >= 0) { - diag204_store_sc = SUBC_STIB4; - diag204_info_type = INFO_SIMPLE; + if (diag204((unsigned long)DIAG204_SUBC_STIB4 | + (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) { + diag204_store_sc = DIAG204_SUBC_STIB4; + diag204_info_type = DIAG204_INFO_SIMPLE; goto out; } else { rc = -ENOSYS; @@ -543,9 +395,9 @@ static void diag224_delete_name_table(void) static int diag224_idx2name(int index, char *name) { - memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN), - CPU_NAME_LEN); - name[CPU_NAME_LEN] = 0; + memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN), + DIAG204_CPU_NAME_LEN); + name[DIAG204_CPU_NAME_LEN] = 0; strim(name); return 0; } @@ -601,7 +453,7 @@ __init int hypfs_diag_init(void) pr_err("The hardware system does not support hypfs\n"); return -ENODATA; } - if (diag204_info_type == INFO_EXT) { + if (diag204_info_type == DIAG204_INFO_EXT) { rc = hypfs_dbfs_create_file(&dbfs_file_d204); if (rc) return rc; @@ -649,7 +501,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) cpu_info__lp_time(diag204_info_type, cpu_info)); if (IS_ERR(rc)) return PTR_ERR(rc); - if (diag204_info_type == INFO_EXT) { + if (diag204_info_type == DIAG204_INFO_EXT) { rc = hypfs_create_u64(cpu_dir, "onlinetime", cpu_info__online_time(diag204_info_type, cpu_info)); @@ -665,12 +517,12 @@ static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) { struct dentry *cpus_dir; struct dentry *lpar_dir; - char lpar_name[LPAR_NAME_LEN + 1]; + char lpar_name[DIAG204_LPAR_NAME_LEN + 1]; void *cpu_info; int i; part_hdr__part_name(diag204_info_type, part_hdr, lpar_name); - lpar_name[LPAR_NAME_LEN] = 0; + lpar_name[DIAG204_LPAR_NAME_LEN] = 0; lpar_dir = hypfs_mkdir(systems_dir, lpar_name); if (IS_ERR(lpar_dir)) return lpar_dir; @@ -753,7 +605,8 @@ int hypfs_diag_create_files(struct dentry *root) goto err_out; } } - if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) { + if (info_blk_hdr__flags(diag204_info_type, time_hdr) & + DIAG204_LPAR_PHYS_FLG) { ptr = hypfs_create_phys_files(root, part_hdr); if (IS_ERR(ptr)) { rc = PTR_ERR(ptr); diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 5fac921c1c42..f72744f14e31 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -78,4 +78,131 @@ struct diag210 { extern int diag210(struct diag210 *addr); +/* bit is set in flags, when physical cpu info is included in diag 204 data */ +#define DIAG204_LPAR_PHYS_FLG 0x80 +#define DIAG204_LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */ +#define DIAG204_CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */ + +/* diag 204 subcodes */ +enum diag204_sc { + DIAG204_SUBC_STIB4 = 4, + DIAG204_SUBC_RSI = 5, + DIAG204_SUBC_STIB6 = 6, + DIAG204_SUBC_STIB7 = 7 +}; + +/* The two available diag 204 data formats */ +enum diag204_format { + DIAG204_INFO_SIMPLE = 0, + DIAG204_INFO_EXT = 0x00010000 +}; + +struct diag204_info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod; +} __packed; + +struct diag204_x_info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod1; + __u64 curtod2; + char reserved[40]; +} __packed; + +struct diag204_part_hdr { + __u8 pn; + __u8 cpus; + char reserved[6]; + char part_name[DIAG204_LPAR_NAME_LEN]; +} __packed; + +struct diag204_x_part_hdr { + __u8 pn; + __u8 cpus; + __u8 rcpus; + __u8 pflag; + __u32 mlu; + char part_name[DIAG204_LPAR_NAME_LEN]; + char lpc_name[8]; + char os_name[8]; + __u64 online_cs; + __u64 online_es; + __u8 upid; + char reserved1[3]; + __u32 group_mlu; + char group_name[8]; + char reserved2[32]; +} __packed; + +struct diag204_cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; +} __packed; + +struct diag204_x_cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; + __u16 min_weight; + __u16 cur_weight; + __u16 max_weight; + char reseved2[2]; + __u64 online_time; + __u64 wait_time; + __u32 pma_weight; + __u32 polar_weight; + char reserved3[40]; +} __packed; + +struct diag204_phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; +} __packed; + +struct diag204_x_phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; + char reserved3[80]; +} __packed; + +struct diag204_phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[3]; + __u64 mgm_time; + char reserved3[8]; +} __packed; + +struct diag204_x_phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[3]; + __u64 mgm_time; + char reserved3[80]; +} __packed; + +int diag204(unsigned long subcode, unsigned long size, void *addr); #endif /* _ASM_S390_DIAG_H */ diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 48b37b8357e6..f4ce4a248811 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -162,6 +162,28 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) } EXPORT_SYMBOL(diag14); +static inline int __diag204(unsigned long subcode, unsigned long size, void *addr) +{ + register unsigned long _subcode asm("0") = subcode; + register unsigned long _size asm("1") = size; + + asm volatile( + " diag %2,%0,0x204\n" + "0:\n" + EX_TABLE(0b,0b) + : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); + if (_subcode) + return -1; + return _size; +} + +int diag204(unsigned long subcode, unsigned long size, void *addr) +{ + diag_stat_inc(DIAG_STAT_X204); + return __diag204(subcode, size, addr); +} +EXPORT_SYMBOL(diag204); + /* * Diagnose 210: Get information about a virtual device */ From e435dc31398e63b992639cf62024d959219db191 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 8 Feb 2016 13:36:22 +0100 Subject: [PATCH 02/52] s390: Make cpc_name accessible sclp_ocf.c is the only way to get the cpc name, as it registers the sole event handler for the ocf event. By creating a new global function that copies that name, we make it accessible to the world which longs to retrieve it. Additionally we now also store the cpc name as EBCDIC, so we don't have to convert it to and from ASCII if it is requested in native encoding. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Acked-by: Heiko Carstens Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/sclp.h | 1 + drivers/s390/char/sclp_ocf.c | 23 +++++++++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index e4f6f73afe2f..49736a0d4e0e 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -101,5 +101,6 @@ int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count); int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count); void sclp_early_detect(void); void _sclp_print_early(const char *); +void sclp_ocf_cpc_name_copy(char *dst); #endif /* _ASM_S390_SCLP_H */ diff --git a/drivers/s390/char/sclp_ocf.c b/drivers/s390/char/sclp_ocf.c index 2553db0fdb52..f59b71776bbd 100644 --- a/drivers/s390/char/sclp_ocf.c +++ b/drivers/s390/char/sclp_ocf.c @@ -26,7 +26,7 @@ #define OCF_LENGTH_CPC_NAME 8UL static char hmc_network[OCF_LENGTH_HMC_NETWORK + 1]; -static char cpc_name[OCF_LENGTH_CPC_NAME + 1]; +static char cpc_name[OCF_LENGTH_CPC_NAME]; /* in EBCDIC */ static DEFINE_SPINLOCK(sclp_ocf_lock); static struct work_struct sclp_ocf_change_work; @@ -72,9 +72,8 @@ static void sclp_ocf_handler(struct evbuf_header *evbuf) } if (cpc) { size = min(OCF_LENGTH_CPC_NAME, (size_t) cpc->length); + memset(cpc_name, 0, OCF_LENGTH_CPC_NAME); memcpy(cpc_name, cpc + 1, size); - EBCASC(cpc_name, size); - cpc_name[size] = 0; } spin_unlock(&sclp_ocf_lock); schedule_work(&sclp_ocf_change_work); @@ -85,15 +84,23 @@ static struct sclp_register sclp_ocf_event = { .receiver_fn = sclp_ocf_handler, }; +void sclp_ocf_cpc_name_copy(char *dst) +{ + spin_lock_irq(&sclp_ocf_lock); + memcpy(dst, cpc_name, OCF_LENGTH_CPC_NAME); + spin_unlock_irq(&sclp_ocf_lock); +} +EXPORT_SYMBOL(sclp_ocf_cpc_name_copy); + static ssize_t cpc_name_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - int rc; + char name[OCF_LENGTH_CPC_NAME + 1]; - spin_lock_irq(&sclp_ocf_lock); - rc = snprintf(page, PAGE_SIZE, "%s\n", cpc_name); - spin_unlock_irq(&sclp_ocf_lock); - return rc; + sclp_ocf_cpc_name_copy(name); + name[OCF_LENGTH_CPC_NAME] = 0; + EBCASC(name, OCF_LENGTH_CPC_NAME); + return snprintf(page, PAGE_SIZE, "%s\n", name); } static struct kobj_attribute cpc_name_attr = From 022bd2d11cc51f62e873a09bcae8016b10950194 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 12 Feb 2016 12:52:49 +0100 Subject: [PATCH 03/52] s390: Make diag224 public Diag204's cpu structures only contain the cpu type by means of an index in the diag224 name table. Hence, to be able to use diag204 in any meaningful way, we also need a usable diag224 interface. Signed-off-by: Janosch Frank Reviewed-by: Christian Borntraeger Reviewed-by: David Hildenbrand Acked-by: Heiko Carstens Signed-off-by: Christian Borntraeger --- arch/s390/hypfs/hypfs_diag.c | 14 -------------- arch/s390/include/asm/diag.h | 1 + arch/s390/kernel/diag.c | 15 +++++++++++++++ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 1e28414d7275..28f03ca60100 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -360,20 +360,6 @@ out: /* Diagnose 224 functions */ -static int diag224(void *ptr) -{ - int rc = -EOPNOTSUPP; - - diag_stat_inc(DIAG_STAT_X224); - asm volatile( - " diag %1,%2,0x224\n" - "0: lhi %0,0x0\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) :"d" (0), "d" (ptr) : "memory"); - return rc; -} - static int diag224_get_name_table(void) { /* memory must be below 2GB */ diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index f72744f14e31..197e303a76e9 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -205,4 +205,5 @@ struct diag204_x_phys_cpu { } __packed; int diag204(unsigned long subcode, unsigned long size, void *addr); +int diag224(void *ptr); #endif /* _ASM_S390_DIAG_H */ diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index f4ce4a248811..a44faf4a0454 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -218,3 +218,18 @@ int diag210(struct diag210 *addr) return ccode; } EXPORT_SYMBOL(diag210); + +int diag224(void *ptr) +{ + int rc = -EOPNOTSUPP; + + diag_stat_inc(DIAG_STAT_X224); + asm volatile( + " diag %1,%2,0x224\n" + "0: lhi %0,0x0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) :"d" (0), "d" (ptr) : "memory"); + return rc; +} +EXPORT_SYMBOL(diag224); From a011eeb2a3d6cd778eb63bea0bf149ebbe658ab5 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 9 May 2016 14:14:01 +0200 Subject: [PATCH 04/52] KVM: s390: Add operation exception interception handler This commit introduces code that handles operation exception interceptions. With this handler we can emulate instructions by using illegal opcodes. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/intercept.c | 11 +++++++++++ arch/s390/kvm/kvm-s390.c | 1 + arch/s390/kvm/trace.h | 21 +++++++++++++++++++++ 4 files changed, 34 insertions(+) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 37b9017c6a96..093ea14109e2 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -255,6 +255,7 @@ struct kvm_vcpu_stat { u32 instruction_stctg; u32 exit_program_interruption; u32 exit_instr_and_program; + u32 exit_operation_exception; u32 deliver_external_call; u32 deliver_emergency_signal; u32 deliver_service_signal; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 2e6b54e4d3f9..09c13db1416f 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -349,6 +349,15 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +static int handle_operexc(struct kvm_vcpu *vcpu) +{ + vcpu->stat.exit_operation_exception++; + trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, + vcpu->arch.sie_block->ipb); + + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); +} + int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { if (kvm_is_ucontrol(vcpu->kvm)) @@ -370,6 +379,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) return handle_validity(vcpu); case 0x28: return handle_stop(vcpu); + case 0x2c: + return handle_operexc(vcpu); case 0x38: return handle_partial_execution(vcpu); default: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6d8ec3ac9dd8..f0addece729e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_instruction", VCPU_STAT(exit_instruction) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 916834d7a73a..90d26a6aa52c 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -412,6 +412,27 @@ TRACE_EVENT(kvm_s390_handle_stsi, __entry->addr) ); +TRACE_EVENT(kvm_s390_handle_operexc, + TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb), + TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u64, instruction) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->instruction = ((__u64)ipa << 48) | + ((__u64)ipb << 16); + ), + + VCPU_TP_PRINTK("operation exception on instruction %016llx (%s)", + __entry->instruction, + __print_symbolic(icpt_insn_decoder(__entry->instruction), + icpt_insn_codes)) + ); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ From a2d57b35c0226102b1f2ffdc2f719fcc30c99bf5 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 23 May 2016 15:09:19 +0200 Subject: [PATCH 05/52] KVM: s390: Extend diag 204 fields The new store hypervisor information instruction, which we are going to introduce, needs previously unused fields in diag 204 structures. Signed-off-by: Janosch Frank Acked-by: Heiko Carstens Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/diag.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 197e303a76e9..f4000cdb6921 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -97,6 +97,11 @@ enum diag204_format { DIAG204_INFO_EXT = 0x00010000 }; +enum diag204_cpu_flags { + DIAG204_CPU_ONLINE = 0x20, + DIAG204_CPU_CAPPED = 0x40, +}; + struct diag204_info_blk_hdr { __u8 npar; __u8 flags; @@ -136,10 +141,13 @@ struct diag204_x_part_hdr { __u64 online_cs; __u64 online_es; __u8 upid; - char reserved1[3]; + __u8 reserved:3; + __u8 mtid:5; + char reserved1[2]; __u32 group_mlu; char group_name[8]; - char reserved2[32]; + char hardware_group_name[8]; + char reserved2[24]; } __packed; struct diag204_cpu_info { @@ -168,7 +176,9 @@ struct diag204_x_cpu_info { __u64 wait_time; __u32 pma_weight; __u32 polar_weight; - char reserved3[40]; + __u32 cpu_type_cap; + __u32 group_cpu_type_cap; + char reserved3[32]; } __packed; struct diag204_phys_hdr { @@ -199,7 +209,8 @@ struct diag204_x_phys_cpu { __u16 cpu_addr; char reserved1[2]; __u8 ctidx; - char reserved2[3]; + char reserved2[1]; + __u16 weight; __u64 mgm_time; char reserved3[80]; } __packed; From 95ca2cb57985b07f5b136405f80a5106f5b06641 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 23 May 2016 15:11:58 +0200 Subject: [PATCH 06/52] KVM: s390: Add sthyi emulation Store Hypervisor Information is an emulated z/VM instruction that provides a guest with basic information about the layers it is running on. This includes information about the cpu configuration of both the machine and the lpar, as well as their names, machine model and machine type. This information enables an application to determine the maximum capacity of CPs and IFLs available to software. The instruction is available whenever the facility bit 74 is set, otherwise executing it results in an operation exception. It is important to check the validity flags in the sections before using data from any structure member. It is not guaranteed that all members will be valid on all machines / machine configurations. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/diag.h | 10 + arch/s390/include/asm/kvm_host.h | 2 + arch/s390/include/uapi/asm/sie.h | 1 + arch/s390/kvm/Makefile | 2 +- arch/s390/kvm/intercept.c | 4 + arch/s390/kvm/kvm-s390.c | 6 + arch/s390/kvm/kvm-s390.h | 3 + arch/s390/kvm/sthyi.c | 460 +++++++++++++++++++++++++++++++ arch/s390/kvm/trace.h | 20 ++ 9 files changed, 507 insertions(+), 1 deletion(-) create mode 100644 arch/s390/kvm/sthyi.c diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index f4000cdb6921..82211998ccf7 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -215,6 +215,16 @@ struct diag204_x_phys_cpu { char reserved3[80]; } __packed; +struct diag204_x_part_block { + struct diag204_x_part_hdr hdr; + struct diag204_x_cpu_info cpus[]; +} __packed; + +struct diag204_x_phys_block { + struct diag204_x_phys_hdr hdr; + struct diag204_x_phys_cpu cpus[]; +} __packed; + int diag204(unsigned long subcode, unsigned long size, void *addr); int diag224(void *ptr); #endif /* _ASM_S390_DIAG_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 093ea14109e2..7233b1c49964 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -154,6 +154,7 @@ struct kvm_s390_sie_block { #define LCTL_CR14 0x0002 __u16 lctl; /* 0x0044 */ __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 #define ICTL_PINT 0x20000000 #define ICTL_LPSW 0x00400000 #define ICTL_STCTL 0x00040000 @@ -279,6 +280,7 @@ struct kvm_vcpu_stat { u32 instruction_stfl; u32 instruction_tprot; u32 instruction_essa; + u32 instruction_sthyi; u32 instruction_sigp_sense; u32 instruction_sigp_sense_running; u32 instruction_sigp_external_call; diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index 8fb5d4a6dd25..3ac634368939 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -140,6 +140,7 @@ exit_code_ipa0(0xB2, 0x4c, "TAR"), \ exit_code_ipa0(0xB2, 0x50, "CSP"), \ exit_code_ipa0(0xB2, 0x54, "MVPG"), \ + exit_code_ipa0(0xB2, 0x56, "STHYI"), \ exit_code_ipa0(0xB2, 0x58, "BSG"), \ exit_code_ipa0(0xB2, 0x5a, "BSA"), \ exit_code_ipa0(0xB2, 0x5f, "CHSC"), \ diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index d42fa38c2429..82e73e2b953d 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-objs += diag.o gaccess.o guestdbg.o +kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 09c13db1416f..9359f65c8634 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -355,6 +355,10 @@ static int handle_operexc(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, vcpu->arch.sie_block->ipb); + if (vcpu->arch.sie_block->ipa == 0xb256 && + test_kvm_facility(vcpu->kvm, 74)) + return handle_sthyi(vcpu); + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f0addece729e..1c10254119b3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -94,6 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_stsi", VCPU_STAT(instruction_stsi) }, { "instruction_stfl", VCPU_STAT(instruction_stfl) }, { "instruction_tprot", VCPU_STAT(instruction_tprot) }, + { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, @@ -1189,6 +1190,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); + set_kvm_facility(kvm->arch.model.fac_mask, 74); + set_kvm_facility(kvm->arch.model.fac_list, 74); + kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); kvm->arch.model.ibc = sclp.ibc & 0x0fff; @@ -1679,6 +1683,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) } vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; + if (test_kvm_facility(vcpu->kvm, 74)) + vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; if (vcpu->kvm->arch.use_cmma) { rc = kvm_s390_vcpu_setup_cmma(vcpu); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 8621ab00ec8e..c5ec4d31e5e3 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -250,6 +250,9 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); +/* implemented in sthyi.c */ +int handle_sthyi(struct kvm_vcpu *vcpu); + /* implemented in kvm-s390.c */ void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c new file mode 100644 index 000000000000..894d5626f18d --- /dev/null +++ b/arch/s390/kvm/sthyi.c @@ -0,0 +1,460 @@ +/* + * store hypervisor information instruction emulation functions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Copyright IBM Corp. 2016 + * Author(s): Janosch Frank + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "kvm-s390.h" +#include "gaccess.h" +#include "trace.h" + +#define DED_WEIGHT 0xffff +/* + * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string + * as they are justified with spaces. + */ +#define CP 0xc3d7404040404040UL +#define IFL 0xc9c6d34040404040UL + +enum hdr_flags { + HDR_NOT_LPAR = 0x10, + HDR_STACK_INCM = 0x20, + HDR_STSI_UNAV = 0x40, + HDR_PERF_UNAV = 0x80, +}; + +enum mac_validity { + MAC_NAME_VLD = 0x20, + MAC_ID_VLD = 0x40, + MAC_CNT_VLD = 0x80, +}; + +enum par_flag { + PAR_MT_EN = 0x80, +}; + +enum par_validity { + PAR_GRP_VLD = 0x08, + PAR_ID_VLD = 0x10, + PAR_ABS_VLD = 0x20, + PAR_WGHT_VLD = 0x40, + PAR_PCNT_VLD = 0x80, +}; + +struct hdr_sctn { + u8 infhflg1; + u8 infhflg2; /* reserved */ + u8 infhval1; /* reserved */ + u8 infhval2; /* reserved */ + u8 reserved[3]; + u8 infhygct; + u16 infhtotl; + u16 infhdln; + u16 infmoff; + u16 infmlen; + u16 infpoff; + u16 infplen; + u16 infhoff1; + u16 infhlen1; + u16 infgoff1; + u16 infglen1; + u16 infhoff2; + u16 infhlen2; + u16 infgoff2; + u16 infglen2; + u16 infhoff3; + u16 infhlen3; + u16 infgoff3; + u16 infglen3; + u8 reserved2[4]; +} __packed; + +struct mac_sctn { + u8 infmflg1; /* reserved */ + u8 infmflg2; /* reserved */ + u8 infmval1; + u8 infmval2; /* reserved */ + u16 infmscps; + u16 infmdcps; + u16 infmsifl; + u16 infmdifl; + char infmname[8]; + char infmtype[4]; + char infmmanu[16]; + char infmseq[16]; + char infmpman[4]; + u8 reserved[4]; +} __packed; + +struct par_sctn { + u8 infpflg1; + u8 infpflg2; /* reserved */ + u8 infpval1; + u8 infpval2; /* reserved */ + u16 infppnum; + u16 infpscps; + u16 infpdcps; + u16 infpsifl; + u16 infpdifl; + u16 reserved; + char infppnam[8]; + u32 infpwbcp; + u32 infpabcp; + u32 infpwbif; + u32 infpabif; + char infplgnm[8]; + u32 infplgcp; + u32 infplgif; +} __packed; + +struct sthyi_sctns { + struct hdr_sctn hdr; + struct mac_sctn mac; + struct par_sctn par; +} __packed; + +struct cpu_inf { + u64 lpar_cap; + u64 lpar_grp_cap; + u64 lpar_weight; + u64 all_weight; + int cpu_num_ded; + int cpu_num_shd; +}; + +struct lpar_cpu_inf { + struct cpu_inf cp; + struct cpu_inf ifl; +}; + +static inline u64 cpu_id(u8 ctidx, void *diag224_buf) +{ + return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); +} + +/* + * Scales the cpu capping from the lpar range to the one expected in + * sthyi data. + * + * diag204 reports a cap in hundredths of processor units. + * z/VM's range for one core is 0 - 0x10000. + */ +static u32 scale_cap(u32 in) +{ + return (0x10000 * in) / 100; +} + +static void fill_hdr(struct sthyi_sctns *sctns) +{ + sctns->hdr.infhdln = sizeof(sctns->hdr); + sctns->hdr.infmoff = sizeof(sctns->hdr); + sctns->hdr.infmlen = sizeof(sctns->mac); + sctns->hdr.infplen = sizeof(sctns->par); + sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; + sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; +} + +static void fill_stsi_mac(struct sthyi_sctns *sctns, + struct sysinfo_1_1_1 *sysinfo) +{ + if (stsi(sysinfo, 1, 1, 1)) + return; + + sclp_ocf_cpc_name_copy(sctns->mac.infmname); + + memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); + memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); + memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); + memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); + + sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD; +} + +static void fill_stsi_par(struct sthyi_sctns *sctns, + struct sysinfo_2_2_2 *sysinfo) +{ + if (stsi(sysinfo, 2, 2, 2)) + return; + + sctns->par.infppnum = sysinfo->lpar_number; + memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); + + sctns->par.infpval1 |= PAR_ID_VLD; +} + +static void fill_stsi(struct sthyi_sctns *sctns) +{ + void *sysinfo; + + /* Errors are handled through the validity bits in the response. */ + sysinfo = (void *)__get_free_page(GFP_KERNEL); + if (!sysinfo) + return; + + fill_stsi_mac(sctns, sysinfo); + fill_stsi_par(sctns, sysinfo); + + free_pages((unsigned long)sysinfo, 0); +} + +static void fill_diag_mac(struct sthyi_sctns *sctns, + struct diag204_x_phys_block *block, + void *diag224_buf) +{ + int i; + + for (i = 0; i < block->hdr.cpus; i++) { + switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { + case CP: + if (block->cpus[i].weight == DED_WEIGHT) + sctns->mac.infmdcps++; + else + sctns->mac.infmscps++; + break; + case IFL: + if (block->cpus[i].weight == DED_WEIGHT) + sctns->mac.infmdifl++; + else + sctns->mac.infmsifl++; + break; + } + } + sctns->mac.infmval1 |= MAC_CNT_VLD; +} + +/* Returns a pointer to the the next partition block. */ +static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, + bool this_lpar, + void *diag224_buf, + struct diag204_x_part_block *block) +{ + int i, capped = 0, weight_cp = 0, weight_ifl = 0; + struct cpu_inf *cpu_inf; + + for (i = 0; i < block->hdr.rcpus; i++) { + if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) + continue; + + switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { + case CP: + cpu_inf = &part_inf->cp; + if (block->cpus[i].cur_weight < DED_WEIGHT) + weight_cp |= block->cpus[i].cur_weight; + break; + case IFL: + cpu_inf = &part_inf->ifl; + if (block->cpus[i].cur_weight < DED_WEIGHT) + weight_ifl |= block->cpus[i].cur_weight; + break; + default: + continue; + } + + if (!this_lpar) + continue; + + capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; + cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; + cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; + + if (block->cpus[i].weight == DED_WEIGHT) + cpu_inf->cpu_num_ded += 1; + else + cpu_inf->cpu_num_shd += 1; + } + + if (this_lpar && capped) { + part_inf->cp.lpar_weight = weight_cp; + part_inf->ifl.lpar_weight = weight_ifl; + } + part_inf->cp.all_weight += weight_cp; + part_inf->ifl.all_weight += weight_ifl; + return (struct diag204_x_part_block *)&block->cpus[i]; +} + +static void fill_diag(struct sthyi_sctns *sctns) +{ + int i, r, pages; + bool this_lpar; + void *diag204_buf; + void *diag224_buf = NULL; + struct diag204_x_info_blk_hdr *ti_hdr; + struct diag204_x_part_block *part_block; + struct diag204_x_phys_block *phys_block; + struct lpar_cpu_inf lpar_inf = {}; + + /* Errors are handled through the validity bits in the response. */ + pages = diag204((unsigned long)DIAG204_SUBC_RSI | + (unsigned long)DIAG204_INFO_EXT, 0, NULL); + if (pages <= 0) + return; + + diag204_buf = vmalloc(PAGE_SIZE * pages); + if (!diag204_buf) + return; + + r = diag204((unsigned long)DIAG204_SUBC_STIB7 | + (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf); + if (r < 0) + goto out; + + diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA); + if (!diag224_buf || diag224(diag224_buf)) + goto out; + + ti_hdr = diag204_buf; + part_block = diag204_buf + sizeof(*ti_hdr); + + for (i = 0; i < ti_hdr->npar; i++) { + /* + * For the calling lpar we also need to get the cpu + * caps and weights. The time information block header + * specifies the offset to the partition block of the + * caller lpar, so we know when we process its data. + */ + this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; + part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, + part_block); + } + + phys_block = (struct diag204_x_phys_block *)part_block; + part_block = diag204_buf + ti_hdr->this_part; + if (part_block->hdr.mtid) + sctns->par.infpflg1 = PAR_MT_EN; + + sctns->par.infpval1 |= PAR_GRP_VLD; + sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); + sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); + memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, + sizeof(sctns->par.infplgnm)); + + sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; + sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; + sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; + sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; + sctns->par.infpval1 |= PAR_PCNT_VLD; + + sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); + sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); + sctns->par.infpval1 |= PAR_ABS_VLD; + + /* + * Everything below needs global performance data to be + * meaningful. + */ + if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { + sctns->hdr.infhflg1 |= HDR_PERF_UNAV; + goto out; + } + + fill_diag_mac(sctns, phys_block, diag224_buf); + + if (lpar_inf.cp.lpar_weight) { + sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * + lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; + } + + if (lpar_inf.ifl.lpar_weight) { + sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * + lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; + } + sctns->par.infpval1 |= PAR_WGHT_VLD; + +out: + kfree(diag224_buf); + vfree(diag204_buf); +} + +static int sthyi(u64 vaddr) +{ + register u64 code asm("0") = 0; + register u64 addr asm("2") = vaddr; + int cc; + + asm volatile( + ".insn rre,0xB2560000,%[code],%[addr]\n" + "ipm %[cc]\n" + "srl %[cc],28\n" + : [cc] "=d" (cc) + : [code] "d" (code), [addr] "a" (addr) + : "memory", "cc"); + return cc; +} + +int handle_sthyi(struct kvm_vcpu *vcpu) +{ + int reg1, reg2, r = 0; + u64 code, addr, cc = 0; + struct sthyi_sctns *sctns = NULL; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + code = vcpu->run->s.regs.gprs[reg1]; + addr = vcpu->run->s.regs.gprs[reg2]; + + vcpu->stat.instruction_sthyi++; + VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); + trace_kvm_s390_handle_sthyi(vcpu, code, addr); + + if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + if (code & 0xffff) { + cc = 3; + goto out; + } + + /* + * If the page has not yet been faulted in, we want to do that + * now and not after all the expensive calculations. + */ + r = write_guest(vcpu, addr, reg2, &cc, 1); + if (r) + return kvm_s390_inject_prog_cond(vcpu, r); + + sctns = (void *)get_zeroed_page(GFP_KERNEL); + if (!sctns) + return -ENOMEM; + + /* + * If we are a guest, we don't want to emulate an emulated + * instruction. We ask the hypervisor to provide the data. + */ + if (test_facility(74)) { + cc = sthyi((u64)sctns); + goto out; + } + + fill_hdr(sctns); + fill_stsi(sctns); + fill_diag(sctns); + +out: + if (!cc) { + r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); + if (r) { + free_page((unsigned long)sctns); + return kvm_s390_inject_prog_cond(vcpu, r); + } + } + + free_page((unsigned long)sctns); + vcpu->run->s.regs.gprs[reg2 + 1] = cc ? 4 : 0; + kvm_s390_set_psw_cc(vcpu, cc); + return r; +} diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 90d26a6aa52c..a429ef9b0d30 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -433,6 +433,26 @@ TRACE_EVENT(kvm_s390_handle_operexc, icpt_insn_codes)) ); +TRACE_EVENT(kvm_s390_handle_sthyi, + TP_PROTO(VCPU_PROTO_COMMON, u64 code, u64 addr), + TP_ARGS(VCPU_ARGS_COMMON, code, addr), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(u64, code) + __field(u64, addr) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->code = code; + __entry->addr = addr; + ), + + VCPU_TP_PRINTK("STHYI fc: %llu addr: %016llx", + __entry->code, __entry->addr) + ); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ From 7d0a5e62411a9223512c6af2e4c08a2d7c00fa2e Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 10 May 2016 15:03:42 +0200 Subject: [PATCH 07/52] KVM: s390: Limit sthyi execution Store hypervisor information is a valid instruction not only in supervisor state but also in problem state, i.e. the guest's userspace. Its execution is not only computational and memory intensive, but also has to get hold of the ipte lock to write to the guest's memory. This lock is not intended to be held often and long, especially not from the untrusted guest userspace. Therefore we apply rate limiting of sthyi executions per VM. Signed-off-by: Janosch Frank Acked-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/kvm-s390.c | 2 ++ arch/s390/kvm/sthyi.c | 11 +++++++++++ 3 files changed, 14 insertions(+) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 7233b1c49964..bcc20dc91ea8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -652,6 +652,7 @@ struct kvm_arch{ wait_queue_head_t ipte_wq; int ipte_lock_count; struct mutex ipte_mutex; + struct ratelimit_state sthyi_limit; spinlock_t start_stop_lock; struct sie_page2 *sie_page2; struct kvm_s390_cpu_model model; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1c10254119b3..44297ff53b44 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1151,6 +1151,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; + ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500); + kvm->arch.use_esca = 0; /* start with basic SCA */ rwlock_init(&kvm->arch.sca_lock); kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 894d5626f18d..bd98b7d25200 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -403,6 +404,16 @@ int handle_sthyi(struct kvm_vcpu *vcpu) u64 code, addr, cc = 0; struct sthyi_sctns *sctns = NULL; + /* + * STHYI requires extensive locking in the higher hypervisors + * and is very computational/memory expensive. Therefore we + * ratelimit the executions per VM. + */ + if (!__ratelimit(&vcpu->kvm->arch.sthyi_limit)) { + kvm_s390_retry_instr(vcpu); + return 0; + } + kvm_s390_get_regs_rre(vcpu, ®1, ®2); code = vcpu->run->s.regs.gprs[reg1]; addr = vcpu->run->s.regs.gprs[reg2]; From c1778e515712dae0575657fe6c9511ffcb28a7e0 Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Fri, 6 May 2016 15:47:19 +0300 Subject: [PATCH 08/52] KVM: s390: Add mnemonic print to kvm_s390_intercept_prog We have a table of mnemonic names for intercepted program interruptions, let's print readable name of the interruption in the kvm_s390_intercept_prog trace event. Signed-off-by: Alexander Yarygin Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/trace.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index a429ef9b0d30..1c4586b367a4 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -185,8 +185,10 @@ TRACE_EVENT(kvm_s390_intercept_prog, __entry->code = code; ), - VCPU_TP_PRINTK("intercepted program interruption %04x", - __entry->code) + VCPU_TP_PRINTK("intercepted program interruption %04x (%s)", + __entry->code, + __print_symbolic(__entry->code, + icpt_prog_codes)) ); /* From 15c9705f0c8af2d19dede9866aec364746b269ef Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 19 Mar 2015 17:36:43 +0100 Subject: [PATCH 09/52] KVM: s390: interface to query and configure cpu features For now, we only have an interface to query and configure facilities indicated via STFL(E). However, we also have features indicated via SCLP, that have to be indicated to the guest by user space and usually require KVM support. This patch allows user space to query and configure available cpu features for the guest. Please note that disabling a feature doesn't necessarily mean that it is completely disabled (e.g. ESOP is mostly handled by the SIE). We will try our best to disable it. Most features (e.g. SCLP) can't directly be forwarded, as most of them need in addition to hardware support, support in KVM. As we later on want to turn these features in KVM explicitly on/off (to simulate different behavior), we have to filter all features provided by the hardware and make them configurable. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/devices/vm.txt | 27 ++++++++++ arch/s390/include/asm/kvm_host.h | 2 + arch/s390/include/uapi/asm/kvm.h | 8 +++ arch/s390/kvm/kvm-s390.c | 63 ++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.h | 6 +++ 5 files changed, 106 insertions(+) diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index a9ea8774a45f..0ed6808b9965 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -85,6 +85,33 @@ Returns: -EBUSY in case 1 or more vcpus are already activated (only in write -ENOMEM if not enough memory is available to process the ioctl 0 in case of success +2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o) + +Allows user space to retrieve available cpu features. A feature is available if +provided by the hardware and supported by kvm. In theory, cpu features could +even be completely emulated by kvm. + +struct kvm_s390_vm_cpu_feat { + __u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering +}; + +Parameters: address of a buffer to load the feature list from. +Returns: -EFAULT if the given address is not accessible from kernel space. + 0 in case of success. + +2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w) + +Allows user space to retrieve or change enabled cpu features for all VCPUs of a +VM. Features that are not available cannot be enabled. + +See 2.3. for a description of the parameter struct. + +Parameters: address of a buffer to store/load the feature list from. +Returns: -EFAULT if the given address is not accessible from kernel space. + -EINVAL if a cpu feature that is not available is to be enabled. + -EBUSY if at least one VCPU has already been defined. + 0 in case of success. + 3. GROUP: KVM_S390_VM_TOD Architectures: s390 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index bcc20dc91ea8..b2a83a0ce42c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -658,6 +658,8 @@ struct kvm_arch{ struct kvm_s390_cpu_model model; struct kvm_s390_crypto crypto; u64 epoch; + /* subset of available cpu features enabled by user space */ + DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); }; #define KVM_HVA_ERR_BAD (-1UL) diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 3b8e99ef9d58..a8559f265e26 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -93,6 +93,14 @@ struct kvm_s390_vm_cpu_machine { __u64 fac_list[256]; }; +#define KVM_S390_VM_CPU_PROCESSOR_FEAT 2 +#define KVM_S390_VM_CPU_MACHINE_FEAT 3 + +#define KVM_S390_VM_CPU_FEAT_NR_BITS 1024 +struct kvm_s390_vm_cpu_feat { + __u64 feat[16]; +}; + /* kvm attributes for crypto */ #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 44297ff53b44..6960468f28ad 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -132,6 +133,9 @@ unsigned long kvm_s390_fac_list_mask_size(void) return ARRAY_SIZE(kvm_s390_fac_list_mask); } +/* available cpu features supported by kvm */ +static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); + static struct gmap_notifier gmap_notifier; debug_info_t *kvm_s390_dbf; @@ -677,6 +681,29 @@ out: return ret; } +static int kvm_s390_set_processor_feat(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_feat data; + int ret = -EBUSY; + + if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) + return -EFAULT; + if (!bitmap_subset((unsigned long *) data.feat, + kvm_s390_available_cpu_feat, + KVM_S390_VM_CPU_FEAT_NR_BITS)) + return -EINVAL; + + mutex_lock(&kvm->lock); + if (!atomic_read(&kvm->online_vcpus)) { + bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, + KVM_S390_VM_CPU_FEAT_NR_BITS); + ret = 0; + } + mutex_unlock(&kvm->lock); + return ret; +} + static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -685,6 +712,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_PROCESSOR: ret = kvm_s390_set_processor(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_FEAT: + ret = kvm_s390_set_processor_feat(kvm, attr); + break; } return ret; } @@ -733,6 +763,31 @@ out: return ret; } +static int kvm_s390_get_processor_feat(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_feat data; + + bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, + KVM_S390_VM_CPU_FEAT_NR_BITS); + if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) + return -EFAULT; + return 0; +} + +static int kvm_s390_get_machine_feat(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_feat data; + + bitmap_copy((unsigned long *) data.feat, + kvm_s390_available_cpu_feat, + KVM_S390_VM_CPU_FEAT_NR_BITS); + if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) + return -EFAULT; + return 0; +} + static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -744,6 +799,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MACHINE: ret = kvm_s390_get_machine(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_FEAT: + ret = kvm_s390_get_processor_feat(kvm, attr); + break; + case KVM_S390_VM_CPU_MACHINE_FEAT: + ret = kvm_s390_get_machine_feat(kvm, attr); + break; } return ret; } @@ -827,6 +888,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_S390_VM_CPU_PROCESSOR: case KVM_S390_VM_CPU_MACHINE: + case KVM_S390_VM_CPU_PROCESSOR_FEAT: + case KVM_S390_VM_CPU_MACHINE_FEAT: ret = 0; break; default: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index c5ec4d31e5e3..52aa47e112d8 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -175,6 +175,12 @@ static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) return 0; } +static inline int test_kvm_cpu_feat(struct kvm *kvm, unsigned long nr) +{ + WARN_ON_ONCE(nr >= KVM_S390_VM_CPU_FEAT_NR_BITS); + return test_bit_inv(nr, kvm->arch.cpu_feat); +} + /* are cpu states controlled by user space */ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) { From 22be5a133169e855097936438417ab1b672ad43f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Jan 2016 13:22:54 +0100 Subject: [PATCH 10/52] KVM: s390: forward ESOP if available ESOP guarantees that during a protection exception, bit 61 of real location 168-175 will only be set to 1 if it was because of ALCP or DATP. If the exception is due to LAP or KCP, the bit will always be set to 0. The old SOP definition allowed bit 61 to be unpredictable in case of LAP or KCP in some conditions. So ESOP replaces this unpredictability by a guarantee. Therefore, we can directly forward ESOP if it is available on our machine. We don't have to do anything when ESOP is disabled - the guest will simply expect unpredictable values. Our guest access functions are already handling ESOP properly. Please note that future functionality in KVM will require knowledge about ESOP being enabled for a guest or not. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/uapi/asm/kvm.h | 1 + arch/s390/kvm/kvm-s390.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index a8559f265e26..789c4e27e294 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -97,6 +97,7 @@ struct kvm_s390_vm_cpu_machine { #define KVM_S390_VM_CPU_MACHINE_FEAT 3 #define KVM_S390_VM_CPU_FEAT_NR_BITS 1024 +#define KVM_S390_VM_CPU_FEAT_ESOP 0 struct kvm_s390_vm_cpu_feat { __u64 feat[16]; }; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6960468f28ad..2b5c14da3227 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -193,6 +193,17 @@ void kvm_arch_hardware_unsetup(void) &kvm_clock_notifier); } +static void allow_cpu_feat(unsigned long nr) +{ + set_bit_inv(nr, kvm_s390_available_cpu_feat); +} + +static void kvm_s390_cpu_feat_init(void) +{ + if (MACHINE_HAS_ESOP) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); +} + int kvm_arch_init(void *opaque) { kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); @@ -204,6 +215,8 @@ int kvm_arch_init(void *opaque) return -ENOMEM; } + kvm_s390_cpu_feat_init(); + /* Register floating interrupt controller interface. */ return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); } From 6167375b558196fdedd38e9867f7bb30ff4dda50 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 19:44:10 +0200 Subject: [PATCH 11/52] KVM: s390: gaccess: store guest address on ALC prot exceptions Let's pass the effective guest address to get_vcpu_asce(), so we can properly set the guest address in case we inject an ALC protection exception. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 66938d283b77..c0da9e9d4490 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -477,7 +477,7 @@ enum { }; static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, - ar_t ar, enum gacc_mode mode) + unsigned long ga, ar_t ar, enum gacc_mode mode) { int rc; struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); @@ -519,6 +519,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, vcpu->arch.pgm.exc_access_id = ar; break; case PGM_PROTECTION: + tec_bits->addr = ga >> PAGE_SHIFT; tec_bits->b60 = 1; tec_bits->b61 = 1; break; @@ -783,7 +784,8 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, if (!len) return 0; - rc = get_vcpu_asce(vcpu, &asce, ar, mode); + ga = kvm_s390_logical_to_effective(vcpu, ga); + rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode); if (rc) return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; @@ -854,7 +856,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, gva = kvm_s390_logical_to_effective(vcpu, gva); tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - rc = get_vcpu_asce(vcpu, &asce, ar, mode); + rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); tec->addr = gva >> PAGE_SHIFT; if (rc) return rc; From d03193de30e6d99770930c6fbf14f0d5dd5cb2f0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 19:56:46 +0200 Subject: [PATCH 12/52] KVM: s390: gaccess: function for preparing translation exceptions Let's provide a function trans_exc() that can be used for handling preparation of translation exceptions on a central basis. We will use that function to replace existing code in gaccess. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 62 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index c0da9e9d4490..b6ccb26bc3c1 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -476,6 +476,68 @@ enum { FSI_FETCH = 2 /* Exception was due to fetch operation */ }; +enum prot_type { + PROT_TYPE_LA = 0, + PROT_TYPE_KEYC = 1, + PROT_TYPE_ALC = 2, + PROT_TYPE_DAT = 3, +}; + +static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, + ar_t ar, enum gacc_mode mode, enum prot_type prot) +{ + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + struct trans_exc_code_bits *tec; + + memset(pgm, 0, sizeof(*pgm)); + pgm->code = code; + tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + + switch (code) { + case PGM_ASCE_TYPE: + case PGM_PAGE_TRANSLATION: + case PGM_REGION_FIRST_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_THIRD_TRANS: + case PGM_SEGMENT_TRANSLATION: + /* + * op_access_id only applies to MOVE_PAGE -> set bit 61 + * exc_access_id has to be set to 0 for some instructions. Both + * cases have to be handled by the caller. We can always store + * exc_access_id, as it is undefined for non-ar cases. + */ + tec->addr = gva >> PAGE_SHIFT; + tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; + /* FALL THROUGH */ + case PGM_ALEN_TRANSLATION: + case PGM_ALE_SEQUENCE: + case PGM_ASTE_VALIDITY: + case PGM_ASTE_SEQUENCE: + case PGM_EXTENDED_AUTHORITY: + pgm->exc_access_id = ar; + break; + case PGM_PROTECTION: + switch (prot) { + case PROT_TYPE_ALC: + tec->b60 = 1; + /* FALL THROUGH */ + case PROT_TYPE_DAT: + tec->b61 = 1; + tec->addr = gva >> PAGE_SHIFT; + tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; + /* exc_access_id is undefined for most cases */ + pgm->exc_access_id = ar; + break; + default: /* LA and KEYC set b61 to 0, other params undefined */ + break; + } + break; + } + return code; +} + static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, unsigned long ga, ar_t ar, enum gacc_mode mode) { From 3e3c67f6a327852375247c98b0d153c44e460216 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 20:00:33 +0200 Subject: [PATCH 13/52] KVM: s390: gaccess: convert kvm_s390_check_low_addr_prot_real() Let's use our new function for preparing translation exceptions. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index b6ccb26bc3c1..61dc45ef50b9 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -979,20 +979,9 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, */ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; - psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec_bits; union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; if (!ctlreg0.lap || !is_low_address(gra)) return 0; - - memset(pgm, 0, sizeof(*pgm)); - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = FSI_STORE; - tec_bits->as = psw_bits(*psw).as; - tec_bits->addr = gra >> PAGE_SHIFT; - pgm->code = PGM_PROTECTION; - - return pgm->code; + return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA); } From fbcb7d5157718645cc198c6be6b435ab326c1892 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 20:06:55 +0200 Subject: [PATCH 14/52] KVM: s390: gaccess: convert guest_translate_address() Let's use our new function for preparing translation exceptions. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 61dc45ef50b9..ae9f9e8e063c 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -910,37 +910,28 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, unsigned long *gpa, enum gacc_mode mode) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec; union asce asce; int rc; gva = kvm_s390_logical_to_effective(vcpu, gva); - tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); - tec->addr = gva >> PAGE_SHIFT; if (rc) return rc; if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { - if (mode == GACC_STORE) { - rc = pgm->code = PGM_PROTECTION; - return rc; - } + if (mode == GACC_STORE) + return trans_exc(vcpu, PGM_PROTECTION, gva, 0, + mode, PROT_TYPE_LA); } if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ rc = guest_translate(vcpu, gva, gpa, asce, mode); - if (rc > 0) { - if (rc == PGM_PROTECTION) - tec->b61 = 1; - pgm->code = rc; - } + if (rc > 0) + return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT); } else { - rc = 0; *gpa = kvm_s390_real_to_abs(vcpu, gva); if (kvm_is_error_gpa(vcpu->kvm, *gpa)) - rc = pgm->code = PGM_ADDRESSING; + return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0); } return rc; From cde0dcfb5df1dbcd90a8e73130a6b7091bdb493a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 20:13:35 +0200 Subject: [PATCH 15/52] KVM: s390: gaccess: convert guest_page_range() Let's use our new function for preparing translation exceptions. As we will need the correct ar, let's pass that to guest_page_range(). This will also make sure that the guest address is stored in the tec for applicable excptions. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index ae9f9e8e063c..ec6c91e85dbe 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -792,40 +792,31 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, +static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, unsigned long *pages, unsigned long nr_pages, const union asce asce, enum gacc_mode mode) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec_bits; - int lap_enabled, rc; + int lap_enabled, rc = 0; - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; lap_enabled = low_address_protection_enabled(vcpu, asce); while (nr_pages) { ga = kvm_s390_logical_to_effective(vcpu, ga); - tec_bits->addr = ga >> PAGE_SHIFT; - if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) { - pgm->code = PGM_PROTECTION; - return pgm->code; - } + if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) + return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, + PROT_TYPE_LA); ga &= PAGE_MASK; if (psw_bits(*psw).t) { rc = guest_translate(vcpu, ga, pages, asce, mode); if (rc < 0) return rc; - if (rc == PGM_PROTECTION) - tec_bits->b61 = 1; - if (rc) - pgm->code = rc; } else { *pages = kvm_s390_real_to_abs(vcpu, ga); if (kvm_is_error_gpa(vcpu->kvm, *pages)) - pgm->code = PGM_ADDRESSING; + rc = PGM_ADDRESSING; } - if (pgm->code) - return pgm->code; + if (rc) + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT); ga += PAGE_SIZE; pages++; nr_pages--; @@ -859,7 +850,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, need_ipte_lock = psw_bits(*psw).t && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode); + rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); for (idx = 0; idx < nr_pages && !rc; idx++) { gpa = *(pages + idx) + (ga & ~PAGE_MASK); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); From bcfa01d787278476f3e79530d03df9b3f52e6e59 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 20:21:03 +0200 Subject: [PATCH 16/52] KVM: s390: gaccess: convert get_vcpu_asce() Let's use our new function for preparing translation exceptions. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index ec6c91e85dbe..8e245e764c21 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -543,13 +543,6 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, { int rc; struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; - struct trans_exc_code_bits *tec_bits; - - memset(pgm, 0, sizeof(*pgm)); - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; - tec_bits->as = psw.as; if (!psw.t) { asce->val = 0; @@ -572,22 +565,8 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, return 0; case PSW_AS_ACCREG: rc = ar_translation(vcpu, asce, ar, mode); - switch (rc) { - case PGM_ALEN_TRANSLATION: - case PGM_ALE_SEQUENCE: - case PGM_ASTE_VALIDITY: - case PGM_ASTE_SEQUENCE: - case PGM_EXTENDED_AUTHORITY: - vcpu->arch.pgm.exc_access_id = ar; - break; - case PGM_PROTECTION: - tec_bits->addr = ga >> PAGE_SHIFT; - tec_bits->b60 = 1; - tec_bits->b61 = 1; - break; - } if (rc > 0) - pgm->code = rc; + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC); return rc; } return 0; From 1afd43e0fbba4a92effc22977e3a7e64213ee860 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 18 May 2016 15:59:06 +0200 Subject: [PATCH 17/52] s390/crypto: allow to query all known cpacf functions KVM will have to query these functions, let's add at least the query capabilities. PCKMO has RRE format, as bit 16-31 are ignored, we can still use the existing function. As PCKMO won't touch the cc, let's force it to 0 upfront. Signed-off-by: David Hildenbrand Acked-by: Ingo Tuchscherer Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/cpacf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 1a82cf26ee11..d28621de8e0b 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -20,6 +20,9 @@ #define CPACF_KMC 0xb92f /* MSA */ #define CPACF_KIMD 0xb93e /* MSA */ #define CPACF_KLMD 0xb93f /* MSA */ +#define CPACF_PCKMO 0xb928 /* MSA3 */ +#define CPACF_KMF 0xb92a /* MSA4 */ +#define CPACF_KMO 0xb92b /* MSA4 */ #define CPACF_PCC 0xb92c /* MSA4 */ #define CPACF_KMCTR 0xb92d /* MSA4 */ #define CPACF_PPNO 0xb93c /* MSA5 */ @@ -136,6 +139,7 @@ static inline void __cpacf_query(unsigned int opcode, unsigned char *status) register unsigned long r1 asm("1") = (unsigned long) status; asm volatile( + " spm 0\n" /* pckmo doesn't change the cc */ /* Parameter registers are ignored, but may not be 0 */ "0: .insn rrf,%[opc] << 16,2,2,2,0\n" " brc 1,0b\n" /* handle partial completion */ @@ -157,6 +161,12 @@ static inline int cpacf_query(unsigned int opcode, unsigned int func) if (!test_facility(17)) /* check for MSA */ return 0; break; + case CPACF_PCKMO: + if (!test_facility(76)) /* check for MSA3 */ + return 0; + break; + case CPACF_KMF: + case CPACF_KMO: case CPACF_PCC: case CPACF_KMCTR: if (!test_facility(77)) /* check for MSA4 */ From 0a763c780b7cb830c250d00ead975778ab948f40 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 18 May 2016 16:03:47 +0200 Subject: [PATCH 18/52] KVM: s390: interface to query and configure cpu subfunctions We have certain instructions that indicate available subfunctions via a query subfunction (crypto functions and ptff), or via a test bit function (plo). By exposing these "subfunction blocks" to user space, we allow user space to 1) query available subfunctions and make sure subfunctions won't get lost during migration - e.g. properly indicate them via a CPU model 2) change the subfunctions to be reported to the guest (even adding unavailable ones) This mechanism works just like the way we indicate the stfl(e) list to user space. This way, user space could even emulate some subfunctions in QEMU in the future. If this is ever applicable, we have to make sure later on, that unsupported subfunctions result in an intercept to QEMU. Please note that support to indicate them to the guest is still missing and requires hardware support. Usually, the IBC takes already care of these subfunctions for migration safety. QEMU should make sure to always set these bits properly according to the machine generation to be emulated. Available subfunctions are only valid in combination with STFLE bits retrieved via KVM_S390_VM_CPU_MACHINE and enabled via KVM_S390_VM_CPU_PROCESSOR. If the applicable bits are available, the indicated subfunctions are guaranteed to be correct. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/devices/vm.txt | 57 +++++++++++++++ arch/s390/include/uapi/asm/kvm.h | 20 ++++++ arch/s390/kvm/kvm-s390.c | 89 ++++++++++++++++++++++++ 3 files changed, 166 insertions(+) diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 0ed6808b9965..8a458f42ded2 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -112,6 +112,63 @@ Returns: -EFAULT if the given address is not accessible from kernel space. -EBUSY if at least one VCPU has already been defined. 0 in case of success. +2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o) + +Allows user space to retrieve available cpu subfunctions without any filtering +done by a set IBC. These subfunctions are indicated to the guest VCPU via +query or "test bit" subfunctions and used e.g. by cpacf functions, plo and ptff. + +A subfunction block is only valid if KVM_S390_VM_CPU_MACHINE contains the +STFL(E) bit introducing the affected instruction. If the affected instruction +indicates subfunctions via a "query subfunction", the response block is +contained in the returned struct. If the affected instruction +indicates subfunctions via a "test bit" mechanism, the subfunction codes are +contained in the returned struct in MSB 0 bit numbering. + +struct kvm_s390_vm_cpu_subfunc { + u8 plo[32]; # always valid (ESA/390 feature) + u8 ptff[16]; # valid with TOD-clock steering + u8 kmac[16]; # valid with Message-Security-Assist + u8 kmc[16]; # valid with Message-Security-Assist + u8 km[16]; # valid with Message-Security-Assist + u8 kimd[16]; # valid with Message-Security-Assist + u8 klmd[16]; # valid with Message-Security-Assist + u8 pckmo[16]; # valid with Message-Security-Assist-Extension 3 + u8 kmctr[16]; # valid with Message-Security-Assist-Extension 4 + u8 kmf[16]; # valid with Message-Security-Assist-Extension 4 + u8 kmo[16]; # valid with Message-Security-Assist-Extension 4 + u8 pcc[16]; # valid with Message-Security-Assist-Extension 4 + u8 ppno[16]; # valid with Message-Security-Assist-Extension 5 + u8 reserved[1824]; # reserved for future instructions +}; + +Parameters: address of a buffer to load the subfunction blocks from. +Returns: -EFAULT if the given address is not accessible from kernel space. + 0 in case of success. + +2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w) + +Allows user space to retrieve or change cpu subfunctions to be indicated for +all VCPUs of a VM. This attribute will only be available if kernel and +hardware support are in place. + +The kernel uses the configured subfunction blocks for indication to +the guest. A subfunction block will only be used if the associated STFL(E) bit +has not been disabled by user space (so the instruction to be queried is +actually available for the guest). + +As long as no data has been written, a read will fail. The IBC will be used +to determine available subfunctions in this case, this will guarantee backward +compatibility. + +See 2.5. for a description of the parameter struct. + +Parameters: address of a buffer to store/load the subfunction blocks from. +Returns: -EFAULT if the given address is not accessible from kernel space. + -EINVAL when reading, if there was no write yet. + -EBUSY if at least one VCPU has already been defined. + 0 in case of success. + 3. GROUP: KVM_S390_VM_TOD Architectures: s390 diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 789c4e27e294..f0818d70d73d 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -102,6 +102,26 @@ struct kvm_s390_vm_cpu_feat { __u64 feat[16]; }; +#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC 4 +#define KVM_S390_VM_CPU_MACHINE_SUBFUNC 5 +/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */ +struct kvm_s390_vm_cpu_subfunc { + __u8 plo[32]; /* always */ + __u8 ptff[16]; /* with TOD-clock steering */ + __u8 kmac[16]; /* with MSA */ + __u8 kmc[16]; /* with MSA */ + __u8 km[16]; /* with MSA */ + __u8 kimd[16]; /* with MSA */ + __u8 klmd[16]; /* with MSA */ + __u8 pckmo[16]; /* with MSA3 */ + __u8 kmctr[16]; /* with MSA4 */ + __u8 kmf[16]; /* with MSA4 */ + __u8 kmo[16]; /* with MSA4 */ + __u8 pcc[16]; /* with MSA4 */ + __u8 ppno[16]; /* with MSA5 */ + __u8 reserved[1824]; +}; + /* kvm attributes for crypto */ #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2b5c14da3227..f746a35e3950 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include "kvm-s390.h" #include "gaccess.h" @@ -135,6 +137,8 @@ unsigned long kvm_s390_fac_list_mask_size(void) /* available cpu features supported by kvm */ static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); +/* available subfunctions indicated via query / "test bit" */ +static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; static struct gmap_notifier gmap_notifier; debug_info_t *kvm_s390_dbf; @@ -198,8 +202,52 @@ static void allow_cpu_feat(unsigned long nr) set_bit_inv(nr, kvm_s390_available_cpu_feat); } +static inline int plo_test_bit(unsigned char nr) +{ + register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; + int cc = 3; /* subfunction not available */ + + asm volatile( + /* Parameter registers are ignored for "test bit" */ + " plo 0,0,0,0(0)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : "d" (r0) + : "cc"); + return cc == 0; +} + static void kvm_s390_cpu_feat_init(void) { + int i; + + for (i = 0; i < 256; ++i) { + if (plo_test_bit(i)) + kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); + } + + if (test_facility(28)) /* TOD-clock steering */ + etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF); + + if (test_facility(17)) { /* MSA */ + __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac); + __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc); + __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km); + __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd); + __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd); + } + if (test_facility(76)) /* MSA3 */ + __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo); + if (test_facility(77)) { /* MSA4 */ + __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr); + __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf); + __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo); + __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc); + } + if (test_facility(57)) /* MSA5 */ + __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno); + if (MACHINE_HAS_ESOP) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); } @@ -717,6 +765,16 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm, return ret; } +static int kvm_s390_set_processor_subfunc(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + /* + * Once supported by kernel + hw, we have to store the subfunctions + * in kvm->arch and remember that user space configured them. + */ + return -ENXIO; +} + static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -728,6 +786,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_PROCESSOR_FEAT: ret = kvm_s390_set_processor_feat(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: + ret = kvm_s390_set_processor_subfunc(kvm, attr); + break; } return ret; } @@ -801,6 +862,25 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm, return 0; } +static int kvm_s390_get_processor_subfunc(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + /* + * Once we can actually configure subfunctions (kernel + hw support), + * we have to check if they were already set by user space, if so copy + * them from kvm->arch. + */ + return -ENXIO; +} + +static int kvm_s390_get_machine_subfunc(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, + sizeof(struct kvm_s390_vm_cpu_subfunc))) + return -EFAULT; + return 0; +} static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -818,6 +898,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MACHINE_FEAT: ret = kvm_s390_get_machine_feat(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: + ret = kvm_s390_get_processor_subfunc(kvm, attr); + break; + case KVM_S390_VM_CPU_MACHINE_SUBFUNC: + ret = kvm_s390_get_machine_subfunc(kvm, attr); + break; } return ret; } @@ -903,8 +989,11 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MACHINE: case KVM_S390_VM_CPU_PROCESSOR_FEAT: case KVM_S390_VM_CPU_MACHINE_FEAT: + case KVM_S390_VM_CPU_MACHINE_SUBFUNC: ret = 0; break; + /* configuring subfunctions is not supported yet */ + case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: default: ret = -ENXIO; break; From 4013ade3fb2fefa021827d675d8bc1d51f4aef93 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:49:43 +0100 Subject: [PATCH 19/52] s390/sclp: detect 64-bit-SCAO facility Let's correctly detect that facility, so we can correctly handle its abscence later on. Reviewed-by: Christian Borntraeger Acked-by: Martin Schwidefsky Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/sclp.h | 1 + drivers/s390/char/sclp_early.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 49736a0d4e0e..521400086e65 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -59,6 +59,7 @@ struct sclp_info { unsigned char has_hvs : 1; unsigned char has_esca : 1; unsigned char has_sief2 : 1; + unsigned char has_64bscao : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 0ac520dd1b21..211eb86ae62d 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -114,6 +114,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.facilities = sccb->facilities; sclp.has_sprp = !!(sccb->fac84 & 0x02); sclp.has_core_type = !!(sccb->fac84 & 0x01); + sclp.has_64bscao = !!(sccb->fac116 & 0x80); sclp.has_esca = !!(sccb->fac116 & 0x08); sclp.has_hvs = !!(sccb->fac119 & 0x80); if (sccb->fac85 & 0x02) From 76a6dd7241ae03c47f44a9605dcd525f31b2124a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:33:49 +0100 Subject: [PATCH 20/52] KVM: s390: handle missing 64-bit-SCAO facility Without that facility, we may only use scaol. So fallback to DMA allocation in that case, so we won't overwrite random memory via the SIE. Also disallow ESCA, so we don't have to handle that allocation case. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f746a35e3950..efb902cdd1d2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -317,8 +317,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: - r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS - : KVM_S390_BSCA_CPU_SLOTS; + r = KVM_S390_BSCA_CPU_SLOTS; + if (sclp.has_esca && sclp.has_64bscao) + r = KVM_S390_ESCA_CPU_SLOTS; break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; @@ -1295,6 +1296,7 @@ static void sca_dispose(struct kvm *kvm) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { + gfp_t alloc_flags = GFP_KERNEL; int i, rc; char debug_name[16]; static unsigned long sca_offset; @@ -1319,8 +1321,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500); kvm->arch.use_esca = 0; /* start with basic SCA */ + if (!sclp.has_64bscao) + alloc_flags |= GFP_DMA; rwlock_init(&kvm->arch.sca_lock); - kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); @@ -1567,7 +1571,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) if (id < KVM_S390_BSCA_CPU_SLOTS) return true; - if (!sclp.has_esca) + if (!sclp.has_esca || !sclp.has_64bscao) return false; mutex_lock(&kvm->lock); From b9e28897e6e9f82585ecf6ea45942866ece7d167 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:51:52 +0100 Subject: [PATCH 21/52] s390/sclp: detect guest-PER enhancement Let's detect that facility, so we can correctly handle its abscence. Reviewed-by: Christian Borntraeger Acked-by: Martin Schwidefsky Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/sclp.h | 4 +++- drivers/s390/char/sclp_early.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 521400086e65..076f6318b6fa 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -33,7 +33,8 @@ struct sclp_core_entry { u8 : 4; u8 sief2 : 1; u8 : 3; - u8 : 3; + u8 : 2; + u8 gpere : 1; u8 siif : 1; u8 sigpif : 1; u8 : 3; @@ -60,6 +61,7 @@ struct sclp_info { unsigned char has_esca : 1; unsigned char has_sief2 : 1; unsigned char has_64bscao : 1; + unsigned char has_gpere : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 211eb86ae62d..a05f2d07ea02 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -146,6 +146,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_siif = cpue->siif; sclp.has_sigpif = cpue->sigpif; sclp.has_sief2 = cpue->sief2; + sclp.has_gpere = cpue->gpere; break; } From 89b5b4de33902a57cb9c8f2d06de4ffbc921de15 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:47:13 +0100 Subject: [PATCH 22/52] KVM: s390: guestdbg: signal missing hardware support Without guest-PER enhancement, we can't provide any debugging support. Therefore act like kernel support is missing. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index efb902cdd1d2..e477c8e5b5c1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2179,6 +2179,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, if (dbg->control & ~VALID_GUESTDBG_FLAGS) return -EINVAL; + if (!sclp.has_gpere) + return -EINVAL; if (dbg->control & KVM_GUESTDBG_ENABLE) { vcpu->guest_debug = dbg->control; From 09be9cb92bb9e799bdbfd3834595bd6b4703b40b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:55:35 +0100 Subject: [PATCH 23/52] s390/sclp: detect cmma Let's detect the Collaborative-memory-management-interpretation facility, aka CMM assist, so we can correctly enable cmma later. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/sclp.h | 1 + drivers/s390/char/sclp_early.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 076f6318b6fa..fa40ac8056f5 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -62,6 +62,7 @@ struct sclp_info { unsigned char has_sief2 : 1; unsigned char has_64bscao : 1; unsigned char has_gpere : 1; + unsigned char has_cmma : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index a05f2d07ea02..366e1a46e96d 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -115,6 +115,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_sprp = !!(sccb->fac84 & 0x02); sclp.has_core_type = !!(sccb->fac84 & 0x01); sclp.has_64bscao = !!(sccb->fac116 & 0x80); + sclp.has_cmma = !!(sccb->fac116 & 0x40); sclp.has_esca = !!(sccb->fac116 & 0x08); sclp.has_hvs = !!(sccb->fac119 & 0x80); if (sccb->fac85 & 0x02) From c24cc9c8a6ca798427d3ff46b55df8403361df3e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:53:04 +0100 Subject: [PATCH 24/52] KVM: s390: enable CMMA if the interpration is available Now that we can detect if collaborative-memory-management interpretation is available, replace the heuristic by a real hardware detection. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e477c8e5b5c1..005e664f6360 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -485,9 +485,8 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att unsigned int idx; switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: - /* enable CMMA only for z10 and later (EDAT_1) */ ret = -EINVAL; - if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1) + if (!sclp.has_cmma) break; ret = -EBUSY; From f9cbd9b02539330ddd349df583fcfc2db8a23b90 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 3 Mar 2016 09:48:47 +0100 Subject: [PATCH 25/52] KVM: s390: provide CMMA attributes only if available Let's not provide the device attribute for cmma enabling and clearing if the hardware doesn't support it. This also helps getting rid of the undocumented return value "-EINVAL" in case CMMA is not available when trying to enable it. Also properly document the meaning of -EINVAL for CMMA clearing. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/api.txt | 2 ++ Documentation/virtual/kvm/devices/vm.txt | 3 ++- arch/s390/kvm/kvm-s390.c | 7 ++++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a4482cce4bae..4aac3e51bf9f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2520,6 +2520,7 @@ Parameters: struct kvm_device_attr Returns: 0 on success, -1 on error Errors: ENXIO: The group or attribute is unknown/unsupported for this device + or hardware support is missing. EPERM: The attribute cannot (currently) be accessed this way (e.g. read-only attribute, or attribute that only makes sense when the device is in a different state) @@ -2547,6 +2548,7 @@ Parameters: struct kvm_device_attr Returns: 0 on success, -1 on error Errors: ENXIO: The group or attribute is unknown/unsupported for this device + or hardware support is missing. Tests whether a device supports a particular attribute. A successful return indicates the attribute is implemented. It does not necessarily diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 8a458f42ded2..b6cda49f2ba4 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -20,7 +20,8 @@ Enables Collaborative Memory Management Assist (CMMA) for the virtual machine. 1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA Parameters: none -Returns: 0 +Returns: -EINVAL if CMMA was not enabled + 0 otherwise Clear the CMMA status for all guest pages, so any pages the guest marked as unused are again used any may not be reclaimed by the host. diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 005e664f6360..f695c6e08337 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -485,7 +485,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att unsigned int idx; switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: - ret = -EINVAL; + ret = -ENXIO; if (!sclp.has_cmma) break; @@ -499,6 +499,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att mutex_unlock(&kvm->lock); break; case KVM_S390_VM_MEM_CLR_CMMA: + ret = -ENXIO; + if (!sclp.has_cmma) + break; ret = -EINVAL; if (!kvm->arch.use_cmma) break; @@ -964,6 +967,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: case KVM_S390_VM_MEM_CLR_CMMA: + ret = sclp.has_cmma ? 0 : -ENXIO; + break; case KVM_S390_VM_MEM_LIMIT_SIZE: ret = 0; break; From 5236c751da5e6ccfda4e5d53690a37dfb456997b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:53:46 +0100 Subject: [PATCH 26/52] s390/sclp: detect guest-storage-limit-suppression Let's detect that facility. Reviewed-by: Christian Borntraeger Acked-by: Martin Schwidefsky Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/sclp.h | 1 + drivers/s390/char/sclp_early.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index fa40ac8056f5..e1450dd9d932 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -63,6 +63,7 @@ struct sclp_info { unsigned char has_64bscao : 1; unsigned char has_gpere : 1; unsigned char has_cmma : 1; + unsigned char has_gsls : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 366e1a46e96d..99fce6b784bf 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -114,6 +114,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.facilities = sccb->facilities; sclp.has_sprp = !!(sccb->fac84 & 0x02); sclp.has_core_type = !!(sccb->fac84 & 0x01); + sclp.has_gsls = !!(sccb->fac85 & 0x80); sclp.has_64bscao = !!(sccb->fac116 & 0x80); sclp.has_cmma = !!(sccb->fac116 & 0x40); sclp.has_esca = !!(sccb->fac116 & 0x08); From efed110446226c725268a1f980806d799990a979 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 16 Apr 2015 12:32:41 +0200 Subject: [PATCH 27/52] KVM: s390: handle missing guest-storage-limit-suppression If guest-storage-limit-suppression is not available, we would for now have a valid guest address space with size 0. So let's simply set the origin to 0 and the limit to hamax. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 4 +++- arch/s390/kvm/kvm-s390.c | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b2a83a0ce42c..9eed5c18a61c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -186,7 +186,9 @@ struct kvm_s390_sie_block { __u32 scaol; /* 0x0064 */ __u8 reserved68[4]; /* 0x0068 */ __u32 todpr; /* 0x006c */ - __u8 reserved70[32]; /* 0x0070 */ + __u8 reserved70[16]; /* 0x0070 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ psw_t gpsw; /* 0x0090 */ __u64 gg14; /* 0x00a0 */ __u64 gg15; /* 0x00a8 */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f695c6e08337..2a239554eb89 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1897,6 +1897,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block = &sie_page->sie_block; vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + /* the real guest size will always be smaller than msl */ + vcpu->arch.sie_block->mso = 0; + vcpu->arch.sie_block->msl = sclp.hamax; + vcpu->arch.sie_block->icpua = id; spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; From 72cd82b9e9d075713367ad840c2a9b52b4cd447d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:59:03 +0100 Subject: [PATCH 28/52] s390/sclp: detect intervention bypass facility Let's detect if we have the intervention bypass facility installed. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/sclp.h | 7 ++++++- drivers/s390/char/sclp_early.c | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index e1450dd9d932..ef1f427ad4d1 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -38,7 +38,11 @@ struct sclp_core_entry { u8 siif : 1; u8 sigpif : 1; u8 : 3; - u8 reserved2[10]; + u8 reserved2[3]; + u8 : 2; + u8 ib : 1; + u8 : 5; + u8 reserved3[6]; u8 type; u8 reserved1; } __attribute__((packed)); @@ -64,6 +68,7 @@ struct sclp_info { unsigned char has_gpere : 1; unsigned char has_cmma : 1; unsigned char has_gsls : 1; + unsigned char has_ib : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 99fce6b784bf..2240b615131e 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -149,6 +149,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_sigpif = cpue->sigpif; sclp.has_sief2 = cpue->sief2; sclp.has_gpere = cpue->gpere; + sclp.has_ib = cpue->ib; break; } From 11ad65b79e8c27cdafe404e33938da270a55858a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 4 Apr 2016 15:46:26 +0200 Subject: [PATCH 29/52] KVM: s390: enable ib only if available Let's enable intervention bypass only if the facility is acutally available. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2a239554eb89..340fb405bc23 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1845,7 +1845,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) if (test_kvm_facility(vcpu->kvm, 8)) vcpu->arch.sie_block->ecb2 |= 0x08; - vcpu->arch.sie_block->eca = 0xC1002000U; + vcpu->arch.sie_block->eca = 0x81002000U; + if (sclp.has_ib) + vcpu->arch.sie_block->eca |= 0x40000000U; if (sclp.has_siif) vcpu->arch.sie_block->eca |= 1; if (sclp.has_sigpif) From 4a5c3e08271216891ce1b5315cec3dcadbd01cd4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:00:23 +0100 Subject: [PATCH 30/52] s390/sclp: detect conditional-external-interception facility Let's detect if we have that facility. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/sclp.h | 4 +++- drivers/s390/char/sclp_early.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index ef1f427ad4d1..c91ad198a59c 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -41,7 +41,8 @@ struct sclp_core_entry { u8 reserved2[3]; u8 : 2; u8 ib : 1; - u8 : 5; + u8 cei : 1; + u8 : 4; u8 reserved3[6]; u8 type; u8 reserved1; @@ -69,6 +70,7 @@ struct sclp_info { unsigned char has_cmma : 1; unsigned char has_gsls : 1; unsigned char has_ib : 1; + unsigned char has_cei : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 2240b615131e..4b330fbd4f08 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -150,6 +150,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_sief2 = cpue->sief2; sclp.has_gpere = cpue->gpere; sclp.has_ib = cpue->ib; + sclp.has_cei = cpue->cei; break; } From 48ee7d3a7f8f3ca90dfc5e1103e68c0044051acc Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 4 Apr 2016 15:49:34 +0200 Subject: [PATCH 31/52] KVM: s390: enable cei only if available Let's only enable conditional-external-interruption if the facility is actually available. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 340fb405bc23..1a239a6748fe 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1845,7 +1845,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) if (test_kvm_facility(vcpu->kvm, 8)) vcpu->arch.sie_block->ecb2 |= 0x08; - vcpu->arch.sie_block->eca = 0x81002000U; + vcpu->arch.sie_block->eca = 0x1002000U; + if (sclp.has_cei) + vcpu->arch.sie_block->eca |= 0x80000000U; if (sclp.has_ib) vcpu->arch.sie_block->eca |= 0x40000000U; if (sclp.has_siif) From a0eb55e6318f1bcfe93b01f0944622f14a6b2977 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:02:25 +0100 Subject: [PATCH 32/52] s390/sclp: detect PFMF interpretation facility Let's detect that facility. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/sclp.h | 1 + drivers/s390/char/sclp_early.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index c91ad198a59c..bdb7f22d9ad4 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -71,6 +71,7 @@ struct sclp_info { unsigned char has_gsls : 1; unsigned char has_ib : 1; unsigned char has_cei : 1; + unsigned char has_pfmfi : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 4b330fbd4f08..500cbfd83541 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -46,7 +46,8 @@ struct read_info_sccb { u64 rnmax2; /* 104-111 */ u8 _pad_112[116 - 112]; /* 112-115 */ u8 fac116; /* 116 */ - u8 _pad_117[119 - 117]; /* 117-118 */ + u8 fac117; /* 117 */ + u8 _pad_118; /* 118 */ u8 fac119; /* 119 */ u16 hcpua; /* 120-121 */ u8 _pad_122[124 - 122]; /* 122-123 */ @@ -118,6 +119,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_64bscao = !!(sccb->fac116 & 0x80); sclp.has_cmma = !!(sccb->fac116 & 0x40); sclp.has_esca = !!(sccb->fac116 & 0x08); + sclp.has_pfmfi = !!(sccb->fac117 & 0x40); sclp.has_hvs = !!(sccb->fac119 & 0x80); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; From 873b425e4c2fd0ba6617d67a45fbf119b65575b4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 4 Apr 2016 15:53:47 +0200 Subject: [PATCH 33/52] KVM: s390: enable PFMFI only if available Let's enable interpretation of PFMFI only if the facility is actually available. Emulation code still works in case the guest is offered EDAT-1. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1a239a6748fe..d987eb8af059 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1843,7 +1843,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) vcpu->arch.sie_block->ecb |= 0x10; - if (test_kvm_facility(vcpu->kvm, 8)) + if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) vcpu->arch.sie_block->ecb2 |= 0x08; vcpu->arch.sie_block->eca = 0x1002000U; if (sclp.has_cei) From 9c375490fc812ebdf3259ea2566c271d00544fc2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:02:52 +0100 Subject: [PATCH 34/52] s390/sclp: detect interlock-and-broadcast-suppression facility Let's detect that facility. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/sclp.h | 1 + drivers/s390/char/sclp_early.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index bdb7f22d9ad4..99a0150d07b9 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -72,6 +72,7 @@ struct sclp_info { unsigned char has_ib : 1; unsigned char has_cei : 1; unsigned char has_pfmfi : 1; + unsigned char has_ibs : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 500cbfd83541..d5b873c92ffc 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -120,6 +120,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_cmma = !!(sccb->fac116 & 0x40); sclp.has_esca = !!(sccb->fac116 & 0x08); sclp.has_pfmfi = !!(sccb->fac117 & 0x40); + sclp.has_ibs = !!(sccb->fac117 & 0x20); sclp.has_hvs = !!(sccb->fac119 & 0x80); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; From 09a400e78eaf02d8ab8e836edf864e1025c8e2d7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 4 Apr 2016 15:57:08 +0200 Subject: [PATCH 35/52] KVM: s390: enable ibs only if available Let's enable interlock-and-broadcast suppression only if the facility is actually available. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d987eb8af059..ad93b40bfdc0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2789,6 +2789,8 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm) static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) { + if (!sclp.has_ibs) + return; kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); } From bdab09f3d81c3fac6314012ca0eff1206ea067ab Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 12 Apr 2016 11:07:49 +0200 Subject: [PATCH 36/52] KVM: s390: enable host-protection-interruption only with ESOP host-protection-interruption control was introduced with ESOP. So let's enable it only if we have ESOP and add an explanatory comment why we can live without it. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ad93b40bfdc0..4e764faed524 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1837,7 +1837,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvm_s390_vcpu_setup_model(vcpu); - vcpu->arch.sie_block->ecb = 0x02; + /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ + if (MACHINE_HAS_ESOP) + vcpu->arch.sie_block->ecb |= 0x02; if (test_kvm_facility(vcpu->kvm, 9)) vcpu->arch.sie_block->ecb |= 0x04; if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) From f597d24eee2dd9486edaac7a1821f35bc4d349c2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 22 Apr 2016 16:26:49 +0200 Subject: [PATCH 37/52] KVM: s390: turn on tx even without ctx Constrained transactional execution is an addon of transactional execution. Let's enable the assist also if only TX is enabled for the guest. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4e764faed524..9d0e4d0487f4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1842,7 +1842,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ecb |= 0x02; if (test_kvm_facility(vcpu->kvm, 9)) vcpu->arch.sie_block->ecb |= 0x04; - if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) + if (test_kvm_facility(vcpu->kvm, 73)) vcpu->arch.sie_block->ecb |= 0x10; if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) From 1bb78d161feae5b613c80eb822059eec60d2a538 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 7 Jun 2016 09:57:08 +0200 Subject: [PATCH 38/52] KVM: s390: provide logging for diagnose 0x500 We might need to debug some virtio things, so better have diagnose 500 logged. Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck --- arch/s390/kvm/diag.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 1ea4095b67d7..ce865bd4f81d 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -212,6 +212,11 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) return -EOPNOTSUPP; + VCPU_EVENT(vcpu, 4, "diag 0x500 schid 0x%8.8x queue 0x%x cookie 0x%llx", + (u32) vcpu->run->s.regs.gprs[2], + (u32) vcpu->run->s.regs.gprs[3], + vcpu->run->s.regs.gprs[4]); + /* * The layout is as follows: * - gpr 2 contains the subchannel id (passed as addr) From dcc98ea6146e4da27eee2f3e9983500e9618cc23 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 7 Jun 2016 09:37:17 +0200 Subject: [PATCH 39/52] KVM: s390: fixup I/O interrupt traces We currently have two issues with the I/O interrupt injection logging: 1. All QEMU versions up to 2.6 have a wrong encoding of device numbers etc for the I/O interrupt type, so the inject VM_EVENT will have wrong data. Let's fix this by using the interrupt parameters and not the interrupt type number. 2. We only log in kvm_s390_inject_vm, but not when coming from kvm_s390_reinject_io_int or from flic. Let's move the logging to the common __inject_io function. We also enhance the logging for delivery to match the data. Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck --- arch/s390/kvm/interrupt.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5a80af740d3e..d72c4a877622 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -28,9 +28,6 @@ #include "gaccess.h" #include "trace-s390.h" -#define IOINT_SCHID_MASK 0x0000ffff -#define IOINT_SSID_MASK 0x00030000 -#define IOINT_CSSID_MASK 0x03fc0000 #define PFAULT_INIT 0x0600 #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 @@ -821,7 +818,14 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info, list); if (inti) { - VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type); + if (inti->type & KVM_S390_INT_IO_AI_MASK) + VCPU_EVENT(vcpu, 4, "%s", "deliver: I/O (AI)"); + else + VCPU_EVENT(vcpu, 4, "deliver: I/O %x ss %x schid %04x", + inti->io.subchannel_id >> 8, + inti->io.subchannel_id >> 1 & 0x3, + inti->io.subchannel_nr); + vcpu->stat.deliver_io_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, @@ -1415,6 +1419,13 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) } fi->counters[FIRQ_CNTR_IO] += 1; + if (inti->type & KVM_S390_INT_IO_AI_MASK) + VM_EVENT(kvm, 4, "%s", "inject: I/O (AI)"); + else + VM_EVENT(kvm, 4, "inject: I/O %x ss %x schid %04x", + inti->io.subchannel_id >> 8, + inti->io.subchannel_id >> 1 & 0x3, + inti->io.subchannel_nr); isc = int_word_to_isc(inti->io.io_int_word); list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; list_add_tail(&inti->list, list); @@ -1531,13 +1542,6 @@ int kvm_s390_inject_vm(struct kvm *kvm, inti->mchk.mcic = s390int->parm64; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (inti->type & KVM_S390_INT_IO_AI_MASK) - VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); - else - VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", - s390int->type & IOINT_CSSID_MASK, - s390int->type & IOINT_SSID_MASK, - s390int->type & IOINT_SCHID_MASK); inti->io.subchannel_id = s390int->parm >> 16; inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; inti->io.io_int_parm = s390int->parm64 >> 32; From c427c42cd612719e8fb8b5891cc9761e7770024e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 10 May 2016 13:51:54 +0200 Subject: [PATCH 40/52] s390/mm: don't drop errors in get_guest_storage_key Commit 1e133ab296f3 ("s390/mm: split arch/s390/mm/pgtable.c") changed the return value of get_guest_storage_key to an unsigned char, resulting in -EFAULT getting interpreted as a valid storage key. Cc: stable@vger.kernel.org # 4.6+ Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/pgtable.h | 2 +- arch/s390/mm/pgtable.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 18d2beb89340..42b968a85863 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -893,7 +893,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq); -unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr); +unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4324b87f9398..2a23ca96f9c2 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -543,7 +543,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(set_guest_storage_key); -unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr) +unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) { unsigned char key; spinlock_t *ptl; From d3ed1ceeace311af9973d17a07a114bfaf0ca1b1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 8 Mar 2016 11:53:35 +0100 Subject: [PATCH 41/52] s390/mm: set and get guest storage key mmap locking Move the mmap semaphore locking out of set_guest_storage_key and get_guest_storage_key. This makes the two functions more like the other ptep_xxx operations and allows to avoid repeated semaphore operations if multiple keys are read or written. Reviewed-by: David Hildenbrand Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 26 ++++++++++++++++---------- arch/s390/kvm/priv.c | 7 +++++-- arch/s390/mm/pgtable.c | 15 +++------------ 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9d0e4d0487f4..d0156d7969e0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1050,26 +1050,30 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (!keys) return -ENOMEM; + down_read(¤t->mm->mmap_sem); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { r = -EFAULT; - goto out; + break; } curkey = get_guest_storage_key(current->mm, hva); if (IS_ERR_VALUE(curkey)) { r = curkey; - goto out; + break; } keys[i] = curkey; } + up_read(¤t->mm->mmap_sem); + + if (!r) { + r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, + sizeof(uint8_t) * args->count); + if (r) + r = -EFAULT; + } - r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, - sizeof(uint8_t) * args->count); - if (r) - r = -EFAULT; -out: kvfree(keys); return r; } @@ -1106,24 +1110,26 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (r) goto out; + down_read(¤t->mm->mmap_sem); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { r = -EFAULT; - goto out; + break; } /* Lowest order bit is reserved */ if (keys[i] & 0x01) { r = -EINVAL; - goto out; + break; } r = set_guest_storage_key(current->mm, hva, (unsigned long)keys[i], 0); if (r) - goto out; + break; } + up_read(¤t->mm->mmap_sem); out: kvfree(keys); return r; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 95916fa7c670..c6deed782c61 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -728,9 +728,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; - if (set_guest_storage_key(current->mm, useraddr, + down_read(¤t->mm->mmap_sem); + rc = set_guest_storage_key(current->mm, useraddr, vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, - vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) + vcpu->run->s.regs.gprs[reg1] & PFMF_NQ); + up_read(¤t->mm->mmap_sem); + if (rc) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 2a23ca96f9c2..7612a7c3a3a8 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -506,12 +506,9 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_t old, new; pte_t *ptep; - down_read(&mm->mmap_sem); ptep = get_locked_pte(mm, addr, &ptl); - if (unlikely(!ptep)) { - up_read(&mm->mmap_sem); + if (unlikely(!ptep)) return -EFAULT; - } new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | @@ -538,7 +535,6 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); - up_read(&mm->mmap_sem); return 0; } EXPORT_SYMBOL(set_guest_storage_key); @@ -550,14 +546,11 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) pgste_t pgste; pte_t *ptep; - down_read(&mm->mmap_sem); ptep = get_locked_pte(mm, addr, &ptl); - if (unlikely(!ptep)) { - up_read(&mm->mmap_sem); + if (unlikely(!ptep)) return -EFAULT; - } - pgste = pgste_get_lock(ptep); + pgste = pgste_get_lock(ptep); if (pte_val(*ptep) & _PAGE_INVALID) { key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; @@ -572,10 +565,8 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) if (pgste_val(pgste) & PGSTE_GC_BIT) key |= _PAGE_CHANGED; } - pgste_set_unlock(ptep, pgste); pte_unmap_unlock(ptep, ptl); - up_read(&mm->mmap_sem); return key; } EXPORT_SYMBOL(get_guest_storage_key); From 8d6037a7b4f21708451d4aec14828f9ebe77b37a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 9 May 2016 11:15:32 +0200 Subject: [PATCH 42/52] s390/mm: simplify get_guest_storage_key We can safe a few LOC and make that function easier to understand by rewriting existing code. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 7612a7c3a3a8..4c8d572d59cc 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -551,20 +551,11 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) return -EFAULT; pgste = pgste_get_lock(ptep); - if (pte_val(*ptep) & _PAGE_INVALID) { - key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; - key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; - key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; - key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; - } else { + key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + if (!(pte_val(*ptep) & _PAGE_INVALID)) key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); - - /* Reflect guest's logical view, not physical */ - if (pgste_val(pgste) & PGSTE_GR_BIT) - key |= _PAGE_REFERENCED; - if (pgste_val(pgste) & PGSTE_GC_BIT) - key |= _PAGE_CHANGED; - } + /* Reflect guest's logical view, not physical */ + key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; pgste_set_unlock(ptep, pgste); pte_unmap_unlock(ptep, ptl); return key; From 154c8c19c35b6da94a623cb793458e203572083d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 9 May 2016 11:22:34 +0200 Subject: [PATCH 43/52] s390/mm: return key via pointer in get_guest_storage_key Let's just split returning the key and reporting errors. This makes calling code easier and avoids bugs as happened already. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/pgtable.h | 3 ++- arch/s390/kvm/kvm-s390.c | 8 ++------ arch/s390/mm/pgtable.c | 12 ++++++------ 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 42b968a85863..91f0e7b79821 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -893,7 +893,8 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq); -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr); +int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char *key); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d0156d7969e0..ad166c6698e0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1029,7 +1029,6 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) { uint8_t *keys; uint64_t hva; - unsigned long curkey; int i, r = 0; if (args->flags != 0) @@ -1058,12 +1057,9 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) break; } - curkey = get_guest_storage_key(current->mm, hva); - if (IS_ERR_VALUE(curkey)) { - r = curkey; + r = get_guest_storage_key(current->mm, hva, &keys[i]); + if (r) break; - } - keys[i] = curkey; } up_read(¤t->mm->mmap_sem); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4c8d572d59cc..3e35298758d6 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -539,9 +539,9 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(set_guest_storage_key); -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) +int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char *key) { - unsigned char key; spinlock_t *ptl; pgste_t pgste; pte_t *ptep; @@ -551,14 +551,14 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) return -EFAULT; pgste = pgste_get_lock(ptep); - key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) - key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); + *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); /* Reflect guest's logical view, not physical */ - key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; + *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; pgste_set_unlock(ptep, pgste); pte_unmap_unlock(ptep, ptl); - return key; + return 0; } EXPORT_SYMBOL(get_guest_storage_key); #endif From fe69eabf8deb85ae8b2958830ea3b2911e332820 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 9 May 2016 13:08:07 +0200 Subject: [PATCH 44/52] KVM: s390: storage keys fit into a char No need to convert the storage key into an unsigned long, the target function expects a char as argument. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ad166c6698e0..49c60393a15c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1120,8 +1120,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) break; } - r = set_guest_storage_key(current->mm, hva, - (unsigned long)keys[i], 0); + r = set_guest_storage_key(current->mm, hva, keys[i], 0); if (r) break; } From 6164a2e90a5b6c5c32ccfe7a1baff80d603d702d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 13 Apr 2016 10:09:47 +0200 Subject: [PATCH 45/52] KVM: s390: pfmf: fix end address calculation The current calculation is wrong if absolute != real address. Let's just calculate the start address for 4k frames upfront. Otherwise, the calculated end address will be wrong, resulting in wrong memory location/storage keys getting touched. To keep low-address protection working (using the effective address), we have to move the check. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c6deed782c61..bfba98302ca0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -682,8 +682,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; start = kvm_s390_logical_to_effective(vcpu, start); + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (kvm_s390_check_low_addr_prot_real(vcpu, start)) + return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); + } + switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { case 0x00000000: + /* only 4k frames specify a real address */ + start = kvm_s390_real_to_abs(vcpu, start); end = (start + (1UL << 12)) & ~((1UL << 12) - 1); break; case 0x00001000: @@ -701,20 +708,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); } - if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (kvm_s390_check_low_addr_prot_real(vcpu, start)) - return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); - } - while (start < end) { - unsigned long useraddr, abs_addr; + unsigned long useraddr; /* Translate guest address to host address */ - if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0) - abs_addr = kvm_s390_real_to_abs(vcpu, start); - else - abs_addr = start; - useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr)); + useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); if (kvm_is_error_hva(useraddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); From 9a68f0af8cd907452fa6c33343d38cdacff96294 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 13 Apr 2016 12:09:58 +0200 Subject: [PATCH 46/52] KVM: s390: pfmf: MR and MC are ignored without CSSKE These two bits are simply ignored when the conditional-SSKE facility is not installed. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index bfba98302ca0..5c926b74d7ca 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -675,10 +675,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) !test_kvm_facility(vcpu->kvm, 14)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - /* No support for conditional-SSKE */ - if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC)) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; start = kvm_s390_logical_to_effective(vcpu, start); From 2c26d1d23abd9a67d056c95a0823132a71edc477 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 13 Apr 2016 15:47:21 +0200 Subject: [PATCH 47/52] KVM: s390: pfmf: take care of amode when setting reg2 Depending on the addressing mode, we must not overwrite bit 0-31 of the register. In addition, 24 bit and 31 bit have to set certain bits to 0, which is guaranteed by converting the end address to an effective address. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 5c926b74d7ca..71fa603034d0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -733,8 +733,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) start += PAGE_SIZE; } - if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) - vcpu->run->s.regs.gprs[reg2] = end; + if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { + if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) { + vcpu->run->s.regs.gprs[reg2] = end; + } else { + vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL; + end = kvm_s390_logical_to_effective(vcpu, end); + vcpu->run->s.regs.gprs[reg2] |= end; + } + } return 0; } From 1824c723ac90f9870ebafae4b3b3e5f4b82ffeef Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 10 May 2016 09:43:11 +0200 Subject: [PATCH 48/52] KVM: s390: pfmf: support conditional-sske facility We already indicate that facility but don't implement it in our pfmf interception handler. Let's add a new storage key handling function for conditionally setting the guest storage key. As we will reuse this function later on, let's directly implement returning the old key via parameter and indicating if any change happened via rc. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/pgtable.h | 3 +++ arch/s390/kvm/priv.c | 18 ++++++++++++++---- arch/s390/mm/pgtable.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 91f0e7b79821..2f6702e27db9 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -893,6 +893,9 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq); +int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char key, unsigned char *oldkey, + bool nq, bool mr, bool mc); int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 71fa603034d0..752a1ac1aab6 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -654,8 +654,10 @@ static int handle_epsw(struct kvm_vcpu *vcpu) static int handle_pfmf(struct kvm_vcpu *vcpu) { + bool mr = false, mc = false, nq; int reg1, reg2; unsigned long start, end; + unsigned char key; vcpu->stat.instruction_pfmf++; @@ -675,6 +677,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) !test_kvm_facility(vcpu->kvm, 14)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + /* Only provide conditional-SSKE support if enabled for the guest */ + if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK && + test_kvm_facility(vcpu->kvm, 10)) { + mr = vcpu->run->s.regs.gprs[reg1] & PFMF_MR; + mc = vcpu->run->s.regs.gprs[reg1] & PFMF_MC; + } + + nq = vcpu->run->s.regs.gprs[reg1] & PFMF_NQ; + key = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY; start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; start = kvm_s390_logical_to_effective(vcpu, start); @@ -723,11 +734,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; down_read(¤t->mm->mmap_sem); - rc = set_guest_storage_key(current->mm, useraddr, - vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, - vcpu->run->s.regs.gprs[reg1] & PFMF_NQ); + rc = cond_set_guest_storage_key(current->mm, useraddr, + key, NULL, nq, mr, mc); up_read(¤t->mm->mmap_sem); - if (rc) + if (rc < 0) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 3e35298758d6..e791e8b27fd2 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -539,6 +539,39 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(set_guest_storage_key); +/** + * Conditionally set a guest storage key (handling csske). + * oldkey will be updated when either mr or mc is set and a pointer is given. + * + * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest + * storage key was updated and -EFAULT on access errors. + */ +int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char key, unsigned char *oldkey, + bool nq, bool mr, bool mc) +{ + unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; + int rc; + + /* we can drop the pgste lock between getting and setting the key */ + if (mr | mc) { + rc = get_guest_storage_key(current->mm, addr, &tmp); + if (rc) + return rc; + if (oldkey) + *oldkey = tmp; + if (!mr) + mask |= _PAGE_REFERENCED; + if (!mc) + mask |= _PAGE_CHANGED; + if (!((tmp ^ key) & mask)) + return 0; + } + rc = set_guest_storage_key(current->mm, addr, key, nq); + return rc < 0 ? rc : 1; +} +EXPORT_SYMBOL(cond_set_guest_storage_key); + int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key) { From 695be0e7a24a8875c347437566f2c44ba673580b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 May 2016 14:07:05 +0200 Subject: [PATCH 49/52] KVM: s390: pfmf: handle address overflows In theory, end could always end up being < start, if overflowing to 0. Although very unlikely for now, let's just fix it. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 752a1ac1aab6..b8327b8fdb8f 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -715,7 +715,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); } - while (start < end) { + while (start != end) { unsigned long useraddr; /* Translate guest address to host address */ From 238614515287c9400727e4cd7aa958649dcbf05f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:56:43 +0100 Subject: [PATCH 50/52] s390/sclp: detect storage-key facility Let's correctly detect that facility. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/sclp.h | 4 +++- drivers/s390/char/sclp_early.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 99a0150d07b9..2ad9c204b1a2 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -32,7 +32,8 @@ struct sclp_core_entry { u8 reserved0; u8 : 4; u8 sief2 : 1; - u8 : 3; + u8 skey : 1; + u8 : 2; u8 : 2; u8 gpere : 1; u8 siif : 1; @@ -73,6 +74,7 @@ struct sclp_info { unsigned char has_cei : 1; unsigned char has_pfmfi : 1; unsigned char has_ibs : 1; + unsigned char has_skey : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index d5b873c92ffc..c71df0c7dedc 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -154,6 +154,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_gpere = cpue->gpere; sclp.has_ib = cpue->ib; sclp.has_cei = cpue->cei; + sclp.has_skey = cpue->skey; break; } From 11ddcd41bce5c2394b0390584236afdd13656998 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 10 May 2016 09:40:09 +0200 Subject: [PATCH 51/52] KVM: s390: trace and count all skey intercepts Let's trace and count all skey handling operations, even if lazy skey handling was already activated. Also, don't enable lazy skey handling if anything went wrong while enabling skey handling for the SIE. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 13 ++++++++----- arch/s390/kvm/trace.h | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index b8327b8fdb8f..6745c2a602c3 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -152,24 +152,27 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) static int __skey_check_enable(struct kvm_vcpu *vcpu) { int rc = 0; + + trace_kvm_s390_skey_related_inst(vcpu); if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE))) return rc; rc = s390_enable_skey(); - VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest"); - trace_kvm_s390_skey_related_inst(vcpu); - vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); + VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc); + if (!rc) + vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); return rc; } static int handle_skey(struct kvm_vcpu *vcpu) { - int rc = __skey_check_enable(vcpu); + int rc; + vcpu->stat.instruction_storage_key++; + rc = __skey_check_enable(vcpu); if (rc) return rc; - vcpu->stat.instruction_storage_key++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 1c4586b367a4..4fc9d4e5be89 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -41,7 +41,7 @@ TRACE_EVENT(kvm_s390_skey_related_inst, TP_fast_assign( VCPU_ASSIGN_COMMON ), - VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu") + VCPU_TP_PRINTK("%s", "storage key related instruction") ); TRACE_EVENT(kvm_s390_major_guest_pfault, From a7e19ab55ffdd82f1a8d12694b9a0c0beeef534c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 10 May 2016 09:50:21 +0200 Subject: [PATCH 52/52] KVM: s390: handle missing storage-key facility Without the storage-key facility, SIE won't interpret SSKE, ISKE and RRBE for us. So let's add proper interception handlers that will be called if lazy sske cannot be enabled. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/page.h | 7 +- arch/s390/include/asm/pgtable.h | 1 + arch/s390/kvm/priv.c | 150 ++++++++++++++++++++++++++++++-- arch/s390/mm/pgtable.c | 37 ++++++++ 4 files changed, 184 insertions(+), 11 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 53eacbd4f09b..f874e7d51c19 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -109,13 +109,14 @@ static inline unsigned char page_get_storage_key(unsigned long addr) static inline int page_reset_referenced(unsigned long addr) { - unsigned int ipm; + int cc; asm volatile( " rrbe 0,%1\n" " ipm %0\n" - : "=d" (ipm) : "a" (addr) : "cc"); - return !!(ipm & 0x20000000); + " srl %0,28\n" + : "=d" (cc) : "a" (addr) : "cc"); + return cc; } /* Bits int the storage key */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2f6702e27db9..9951e7e59756 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -896,6 +896,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, unsigned char *oldkey, bool nq, bool mr, bool mc); +int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr); int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 6745c2a602c3..3db3be139992 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "gaccess.h" #include "kvm-s390.h" #include "trace.h" @@ -164,8 +165,7 @@ static int __skey_check_enable(struct kvm_vcpu *vcpu) return rc; } - -static int handle_skey(struct kvm_vcpu *vcpu) +static int try_handle_skey(struct kvm_vcpu *vcpu) { int rc; @@ -173,12 +173,146 @@ static int handle_skey(struct kvm_vcpu *vcpu) rc = __skey_check_enable(vcpu); if (rc) return rc; - + if (sclp.has_skey) { + /* with storage-key facility, SIE interprets it for us */ + kvm_s390_retry_instr(vcpu); + VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); + return -EAGAIN; + } if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + return 0; +} - kvm_s390_retry_instr(vcpu); - VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); +static int handle_iske(struct kvm_vcpu *vcpu) +{ + unsigned long addr; + unsigned char key; + int reg1, reg2; + int rc; + + rc = try_handle_skey(vcpu); + if (rc) + return rc != -EAGAIN ? rc : 0; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + addr = kvm_s390_logical_to_effective(vcpu, addr); + addr = kvm_s390_real_to_abs(vcpu, addr); + addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + down_read(¤t->mm->mmap_sem); + rc = get_guest_storage_key(current->mm, addr, &key); + up_read(¤t->mm->mmap_sem); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + vcpu->run->s.regs.gprs[reg1] &= ~0xff; + vcpu->run->s.regs.gprs[reg1] |= key; + return 0; +} + +static int handle_rrbe(struct kvm_vcpu *vcpu) +{ + unsigned long addr; + int reg1, reg2; + int rc; + + rc = try_handle_skey(vcpu); + if (rc) + return rc != -EAGAIN ? rc : 0; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + addr = kvm_s390_logical_to_effective(vcpu, addr); + addr = kvm_s390_real_to_abs(vcpu, addr); + addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + down_read(¤t->mm->mmap_sem); + rc = reset_guest_reference_bit(current->mm, addr); + up_read(¤t->mm->mmap_sem); + if (rc < 0) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + kvm_s390_set_psw_cc(vcpu, rc); + return 0; +} + +#define SSKE_NQ 0x8 +#define SSKE_MR 0x4 +#define SSKE_MC 0x2 +#define SSKE_MB 0x1 +static int handle_sske(struct kvm_vcpu *vcpu) +{ + unsigned char m3 = vcpu->arch.sie_block->ipb >> 28; + unsigned long start, end; + unsigned char key, oldkey; + int reg1, reg2; + int rc; + + rc = try_handle_skey(vcpu); + if (rc) + return rc != -EAGAIN ? rc : 0; + + if (!test_kvm_facility(vcpu->kvm, 8)) + m3 &= ~SSKE_MB; + if (!test_kvm_facility(vcpu->kvm, 10)) + m3 &= ~(SSKE_MC | SSKE_MR); + if (!test_kvm_facility(vcpu->kvm, 14)) + m3 &= ~SSKE_NQ; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + key = vcpu->run->s.regs.gprs[reg1] & 0xfe; + start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + start = kvm_s390_logical_to_effective(vcpu, start); + if (m3 & SSKE_MB) { + /* start already designates an absolute address */ + end = (start + (1UL << 20)) & ~((1UL << 20) - 1); + } else { + start = kvm_s390_real_to_abs(vcpu, start); + end = start + PAGE_SIZE; + } + + while (start != end) { + unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + + if (kvm_is_error_hva(addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + down_read(¤t->mm->mmap_sem); + rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey, + m3 & SSKE_NQ, m3 & SSKE_MR, + m3 & SSKE_MC); + up_read(¤t->mm->mmap_sem); + if (rc < 0) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + start += PAGE_SIZE; + }; + + if (m3 & (SSKE_MC | SSKE_MR)) { + if (m3 & SSKE_MB) { + /* skey in reg1 is unpredictable */ + kvm_s390_set_psw_cc(vcpu, 3); + } else { + kvm_s390_set_psw_cc(vcpu, rc); + vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL; + vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8; + } + } + if (m3 & SSKE_MB) { + if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) + vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK; + else + vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL; + end = kvm_s390_logical_to_effective(vcpu, end); + vcpu->run->s.regs.gprs[reg2] |= end; + } return 0; } @@ -586,9 +720,9 @@ static const intercept_handler_t b2_handlers[256] = { [0x11] = handle_store_prefix, [0x12] = handle_store_cpu_address, [0x21] = handle_ipte_interlock, - [0x29] = handle_skey, - [0x2a] = handle_skey, - [0x2b] = handle_skey, + [0x29] = handle_iske, + [0x2a] = handle_rrbe, + [0x2b] = handle_sske, [0x2c] = handle_test_block, [0x30] = handle_io_inst, [0x31] = handle_io_inst, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e791e8b27fd2..fa286d0c0f2d 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -572,6 +572,43 @@ int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(cond_set_guest_storage_key); +/** + * Reset a guest reference bit (rrbe), returning the reference and changed bit. + * + * Returns < 0 in case of error, otherwise the cc to be reported to the guest. + */ +int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) +{ + spinlock_t *ptl; + pgste_t old, new; + pte_t *ptep; + int cc = 0; + + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + + new = old = pgste_get_lock(ptep); + /* Reset guest reference bit only */ + pgste_val(new) &= ~PGSTE_GR_BIT; + + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + /* Merge real referenced bit into host-set */ + pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; + } + /* Reflect guest's logical view, not physical */ + cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; + /* Changing the guest storage key is considered a change of the page */ + if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) + pgste_val(new) |= PGSTE_UC_BIT; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(ptep, ptl); + return 0; +} +EXPORT_SYMBOL(reset_guest_reference_bit); + int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key) {