From 9c3da0991754d480328eeaa2b90cb231a1cea9b6 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sat, 17 Jan 2009 17:11:57 -0800 Subject: [PATCH 01/30] IB: Remove __constant_{endian} uses The base versions handle constant folding just fine, use them directly. The replacements are OK in the include/ files as they are not exported to userspace so we don't need the __ prefixed versions. This patch does not affect code generation at all. Signed-off-by: Harvey Harrison Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 15 ++- drivers/infiniband/core/cm_msgs.h | 22 ++--- drivers/infiniband/core/mad_rmpp.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_qp.c | 4 +- drivers/infiniband/hw/ehca/ehca_sqp.c | 8 +- drivers/infiniband/hw/ipath/ipath_eeprom.c | 4 +- drivers/infiniband/hw/ipath/ipath_mad.c | 95 +++++++++---------- drivers/infiniband/hw/ipath/ipath_rc.c | 2 +- drivers/infiniband/hw/ipath/ipath_sdma.c | 4 +- drivers/infiniband/hw/ipath/ipath_uc.c | 2 +- drivers/infiniband/hw/ipath/ipath_ud.c | 4 +- drivers/infiniband/hw/ipath/ipath_user_sdma.c | 6 +- drivers/infiniband/hw/ipath/ipath_verbs.c | 2 +- drivers/infiniband/hw/ipath/ipath_verbs.h | 10 +- drivers/infiniband/hw/mlx4/qp.c | 22 ++--- include/rdma/ib_cm.h | 12 +-- include/rdma/ib_mad.h | 2 +- include/rdma/ib_smi.h | 34 +++---- 18 files changed, 124 insertions(+), 126 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index f1e82a92e61e..5130fc55b8e2 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -927,8 +927,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, unsigned long flags; int ret = 0; - service_mask = service_mask ? service_mask : - __constant_cpu_to_be64(~0ULL); + service_mask = service_mask ? service_mask : ~cpu_to_be64(0); service_id &= service_mask; if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && (service_id != IB_CM_ASSIGN_SERVICE_ID)) @@ -954,7 +953,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, spin_lock_irqsave(&cm.lock, flags); if (service_id == IB_CM_ASSIGN_SERVICE_ID) { cm_id->service_id = cpu_to_be64(cm.listen_service_id++); - cm_id->service_mask = __constant_cpu_to_be64(~0ULL); + cm_id->service_mask = ~cpu_to_be64(0); } else { cm_id->service_id = service_id; cm_id->service_mask = service_mask; @@ -1134,7 +1133,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, goto error1; } cm_id->service_id = param->service_id; - cm_id->service_mask = __constant_cpu_to_be64(~0ULL); + cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = cm_convert_to_ms( param->primary_path->packet_life_time) * 2 + cm_convert_to_ms( @@ -1545,7 +1544,7 @@ static int cm_req_handler(struct cm_work *work) cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; cm_id_priv->id.context = listen_cm_id_priv->id.context; cm_id_priv->id.service_id = req_msg->service_id; - cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL); + cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_process_routed_req(req_msg, work->mad_recv_wc->wc); cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); @@ -2898,7 +2897,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, goto out; cm_id->service_id = param->service_id; - cm_id->service_mask = __constant_cpu_to_be64(~0ULL); + cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; ret = cm_alloc_msg(cm_id_priv, &msg); @@ -2992,7 +2991,7 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; cm_id_priv->id.context = cur_cm_id_priv->id.context; cm_id_priv->id.service_id = sidr_req_msg->service_id; - cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL); + cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_format_sidr_req_event(work, &cur_cm_id_priv->id); cm_process_work(cm_id_priv, work); @@ -3789,7 +3788,7 @@ static int __init ib_cm_init(void) rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); cm.listen_service_table = RB_ROOT; - cm.listen_service_id = __constant_be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); + cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); cm.remote_id_table = RB_ROOT; cm.remote_qp_table = RB_ROOT; cm.remote_sidr_table = RB_ROOT; diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index aec9c7af825d..7e63c08f697c 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -44,17 +44,17 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -#define CM_REQ_ATTR_ID __constant_htons(0x0010) -#define CM_MRA_ATTR_ID __constant_htons(0x0011) -#define CM_REJ_ATTR_ID __constant_htons(0x0012) -#define CM_REP_ATTR_ID __constant_htons(0x0013) -#define CM_RTU_ATTR_ID __constant_htons(0x0014) -#define CM_DREQ_ATTR_ID __constant_htons(0x0015) -#define CM_DREP_ATTR_ID __constant_htons(0x0016) -#define CM_SIDR_REQ_ATTR_ID __constant_htons(0x0017) -#define CM_SIDR_REP_ATTR_ID __constant_htons(0x0018) -#define CM_LAP_ATTR_ID __constant_htons(0x0019) -#define CM_APR_ATTR_ID __constant_htons(0x001A) +#define CM_REQ_ATTR_ID cpu_to_be16(0x0010) +#define CM_MRA_ATTR_ID cpu_to_be16(0x0011) +#define CM_REJ_ATTR_ID cpu_to_be16(0x0012) +#define CM_REP_ATTR_ID cpu_to_be16(0x0013) +#define CM_RTU_ATTR_ID cpu_to_be16(0x0014) +#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015) +#define CM_DREP_ATTR_ID cpu_to_be16(0x0016) +#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017) +#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018) +#define CM_LAP_ATTR_ID cpu_to_be16(0x0019) +#define CM_APR_ATTR_ID cpu_to_be16(0x001A) enum cm_msg_sequence { CM_MSG_SEQUENCE_REQ, diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 3af2b84cd838..57a3c6f947b2 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -735,7 +735,7 @@ process_rmpp_data(struct ib_mad_agent_private *agent, goto bad; } - if (rmpp_hdr->seg_num == __constant_htonl(1)) { + if (rmpp_hdr->seg_num == cpu_to_be32(1)) { if (!(ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST)) { rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG; goto bad; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 19661b2f0406..48e2b0bcabd0 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -99,8 +99,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { plen = 4; wqe->write.sgl[0].stag = wr->ex.imm_data; - wqe->write.sgl[0].len = __constant_cpu_to_be32(0); - wqe->write.num_sgle = __constant_cpu_to_be32(0); + wqe->write.sgl[0].len = cpu_to_be32(0); + wqe->write.num_sgle = cpu_to_be32(0); *flit_cnt = 6; } else { plen = 0; diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c index 44447aaa5501..c568b28f4e20 100644 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -46,11 +46,11 @@ #include "ehca_iverbs.h" #include "hcp_if.h" -#define IB_MAD_STATUS_REDIRECT __constant_htons(0x0002) -#define IB_MAD_STATUS_UNSUP_VERSION __constant_htons(0x0004) -#define IB_MAD_STATUS_UNSUP_METHOD __constant_htons(0x0008) +#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002) +#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008) -#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001) +#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) /** * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index dc37277f1c80..fc7181985e8e 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -772,8 +772,8 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) "0x%x, not 0x%x\n", csum, ifp->if_csum); goto done; } - if (*(__be64 *) ifp->if_guid == 0ULL || - *(__be64 *) ifp->if_guid == __constant_cpu_to_be64(-1LL)) { + if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || + *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { ipath_dev_err(dd, "Invalid GUID %llx from flash; " "ignoring\n", *(unsigned long long *) ifp->if_guid); diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 17a123197477..16a702d46018 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -37,10 +37,10 @@ #include "ipath_verbs.h" #include "ipath_common.h" -#define IB_SMP_UNSUP_VERSION __constant_htons(0x0004) -#define IB_SMP_UNSUP_METHOD __constant_htons(0x0008) -#define IB_SMP_UNSUP_METH_ATTR __constant_htons(0x000C) -#define IB_SMP_INVALID_FIELD __constant_htons(0x001C) +#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008) +#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C) +#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C) static int reply(struct ib_smp *smp) { @@ -789,12 +789,12 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp, return recv_subn_get_pkeytable(smp, ibdev); } -#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001) -#define IB_PMA_PORT_SAMPLES_CONTROL __constant_htons(0x0010) -#define IB_PMA_PORT_SAMPLES_RESULT __constant_htons(0x0011) -#define IB_PMA_PORT_COUNTERS __constant_htons(0x0012) -#define IB_PMA_PORT_COUNTERS_EXT __constant_htons(0x001D) -#define IB_PMA_PORT_SAMPLES_RESULT_EXT __constant_htons(0x001E) +#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) +#define IB_PMA_PORT_SAMPLES_CONTROL cpu_to_be16(0x0010) +#define IB_PMA_PORT_SAMPLES_RESULT cpu_to_be16(0x0011) +#define IB_PMA_PORT_COUNTERS cpu_to_be16(0x0012) +#define IB_PMA_PORT_COUNTERS_EXT cpu_to_be16(0x001D) +#define IB_PMA_PORT_SAMPLES_RESULT_EXT cpu_to_be16(0x001E) struct ib_perf { u8 base_version; @@ -884,19 +884,19 @@ struct ib_pma_portcounters { __be32 port_rcv_packets; } __attribute__ ((packed)); -#define IB_PMA_SEL_SYMBOL_ERROR __constant_htons(0x0001) -#define IB_PMA_SEL_LINK_ERROR_RECOVERY __constant_htons(0x0002) -#define IB_PMA_SEL_LINK_DOWNED __constant_htons(0x0004) -#define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008) -#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010) -#define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040) -#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS __constant_htons(0x0200) -#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS __constant_htons(0x0400) -#define IB_PMA_SEL_PORT_VL15_DROPPED __constant_htons(0x0800) -#define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000) -#define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000) -#define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000) -#define IB_PMA_SEL_PORT_RCV_PACKETS __constant_htons(0x8000) +#define IB_PMA_SEL_SYMBOL_ERROR cpu_to_be16(0x0001) +#define IB_PMA_SEL_LINK_ERROR_RECOVERY cpu_to_be16(0x0002) +#define IB_PMA_SEL_LINK_DOWNED cpu_to_be16(0x0004) +#define IB_PMA_SEL_PORT_RCV_ERRORS cpu_to_be16(0x0008) +#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS cpu_to_be16(0x0010) +#define IB_PMA_SEL_PORT_XMIT_DISCARDS cpu_to_be16(0x0040) +#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS cpu_to_be16(0x0200) +#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS cpu_to_be16(0x0400) +#define IB_PMA_SEL_PORT_VL15_DROPPED cpu_to_be16(0x0800) +#define IB_PMA_SEL_PORT_XMIT_DATA cpu_to_be16(0x1000) +#define IB_PMA_SEL_PORT_RCV_DATA cpu_to_be16(0x2000) +#define IB_PMA_SEL_PORT_XMIT_PACKETS cpu_to_be16(0x4000) +#define IB_PMA_SEL_PORT_RCV_PACKETS cpu_to_be16(0x8000) struct ib_pma_portcounters_ext { u8 reserved; @@ -913,14 +913,14 @@ struct ib_pma_portcounters_ext { __be64 port_multicast_rcv_packets; } __attribute__ ((packed)); -#define IB_PMA_SELX_PORT_XMIT_DATA __constant_htons(0x0001) -#define IB_PMA_SELX_PORT_RCV_DATA __constant_htons(0x0002) -#define IB_PMA_SELX_PORT_XMIT_PACKETS __constant_htons(0x0004) -#define IB_PMA_SELX_PORT_RCV_PACKETS __constant_htons(0x0008) -#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS __constant_htons(0x0010) -#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS __constant_htons(0x0020) -#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS __constant_htons(0x0040) -#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS __constant_htons(0x0080) +#define IB_PMA_SELX_PORT_XMIT_DATA cpu_to_be16(0x0001) +#define IB_PMA_SELX_PORT_RCV_DATA cpu_to_be16(0x0002) +#define IB_PMA_SELX_PORT_XMIT_PACKETS cpu_to_be16(0x0004) +#define IB_PMA_SELX_PORT_RCV_PACKETS cpu_to_be16(0x0008) +#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS cpu_to_be16(0x0010) +#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS cpu_to_be16(0x0020) +#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS cpu_to_be16(0x0040) +#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS cpu_to_be16(0x0080) static int recv_pma_get_classportinfo(struct ib_perf *pmp) { @@ -933,7 +933,7 @@ static int recv_pma_get_classportinfo(struct ib_perf *pmp) pmp->status |= IB_SMP_INVALID_FIELD; /* Indicate AllPortSelect is valid (only one port anyway) */ - p->cap_mask = __constant_cpu_to_be16(1 << 8); + p->cap_mask = cpu_to_be16(1 << 8); p->base_version = 1; p->class_version = 1; /* @@ -951,12 +951,11 @@ static int recv_pma_get_classportinfo(struct ib_perf *pmp) * We support 5 counters which only count the mandatory quantities. */ #define COUNTER_MASK(q, n) (q << ((9 - n) * 3)) -#define COUNTER_MASK0_9 \ - __constant_cpu_to_be32(COUNTER_MASK(1, 0) | \ - COUNTER_MASK(1, 1) | \ - COUNTER_MASK(1, 2) | \ - COUNTER_MASK(1, 3) | \ - COUNTER_MASK(1, 4)) +#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \ + COUNTER_MASK(1, 1) | \ + COUNTER_MASK(1, 2) | \ + COUNTER_MASK(1, 3) | \ + COUNTER_MASK(1, 4)) static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp, struct ib_device *ibdev, u8 port) @@ -1137,7 +1136,7 @@ static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp, status = dev->pma_sample_status; p->sample_status = cpu_to_be16(status); /* 64 bits */ - p->extended_width = __constant_cpu_to_be32(0x80000000); + p->extended_width = cpu_to_be32(0x80000000); for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : cpu_to_be64( @@ -1185,7 +1184,7 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, pmp->status |= IB_SMP_INVALID_FIELD; if (cntrs.symbol_error_counter > 0xFFFFUL) - p->symbol_error_counter = __constant_cpu_to_be16(0xFFFF); + p->symbol_error_counter = cpu_to_be16(0xFFFF); else p->symbol_error_counter = cpu_to_be16((u16)cntrs.symbol_error_counter); @@ -1199,17 +1198,17 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, else p->link_downed_counter = (u8)cntrs.link_downed_counter; if (cntrs.port_rcv_errors > 0xFFFFUL) - p->port_rcv_errors = __constant_cpu_to_be16(0xFFFF); + p->port_rcv_errors = cpu_to_be16(0xFFFF); else p->port_rcv_errors = cpu_to_be16((u16) cntrs.port_rcv_errors); if (cntrs.port_rcv_remphys_errors > 0xFFFFUL) - p->port_rcv_remphys_errors = __constant_cpu_to_be16(0xFFFF); + p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF); else p->port_rcv_remphys_errors = cpu_to_be16((u16)cntrs.port_rcv_remphys_errors); if (cntrs.port_xmit_discards > 0xFFFFUL) - p->port_xmit_discards = __constant_cpu_to_be16(0xFFFF); + p->port_xmit_discards = cpu_to_be16(0xFFFF); else p->port_xmit_discards = cpu_to_be16((u16)cntrs.port_xmit_discards); @@ -1220,24 +1219,24 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) | cntrs.excessive_buffer_overrun_errors; if (cntrs.vl15_dropped > 0xFFFFUL) - p->vl15_dropped = __constant_cpu_to_be16(0xFFFF); + p->vl15_dropped = cpu_to_be16(0xFFFF); else p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped); if (cntrs.port_xmit_data > 0xFFFFFFFFUL) - p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF); + p->port_xmit_data = cpu_to_be32(0xFFFFFFFF); else p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data); if (cntrs.port_rcv_data > 0xFFFFFFFFUL) - p->port_rcv_data = __constant_cpu_to_be32(0xFFFFFFFF); + p->port_rcv_data = cpu_to_be32(0xFFFFFFFF); else p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data); if (cntrs.port_xmit_packets > 0xFFFFFFFFUL) - p->port_xmit_packets = __constant_cpu_to_be32(0xFFFFFFFF); + p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF); else p->port_xmit_packets = cpu_to_be32((u32)cntrs.port_xmit_packets); if (cntrs.port_rcv_packets > 0xFFFFFFFFUL) - p->port_rcv_packets = __constant_cpu_to_be32(0xFFFFFFFF); + p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF); else p->port_rcv_packets = cpu_to_be32((u32) cntrs.port_rcv_packets); diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 9170710b950d..79b3dbc97179 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1744,7 +1744,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & - __constant_cpu_to_be32(1 << 23)) != 0); + cpu_to_be32(1 << 23)) != 0); break; case OP(RDMA_WRITE_FIRST): diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 8e255adf5d9b..4b0698590850 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -781,10 +781,10 @@ retry: descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0]; descqp -= 2; /* SDmaLastDesc */ - descqp[0] |= __constant_cpu_to_le64(1ULL << 11); + descqp[0] |= cpu_to_le64(1ULL << 11); if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) { /* SDmaIntReq */ - descqp[0] |= __constant_cpu_to_le64(1ULL << 15); + descqp[0] |= cpu_to_le64(1ULL << 15); } /* Commit writes to memory and advance the tail on the chip */ diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 82cc588b8bf2..22e60998f1a7 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -419,7 +419,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & - __constant_cpu_to_be32(1 << 23)) != 0); + cpu_to_be32(1 << 23)) != 0); break; case OP(RDMA_WRITE_FIRST): diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 91c74cc797ae..6076cb61bf6a 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -370,7 +370,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) */ ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && ah_attr->dlid != IPATH_PERMISSIVE_LID ? - __constant_cpu_to_be32(IPATH_MULTICAST_QPN) : + cpu_to_be32(IPATH_MULTICAST_QPN) : cpu_to_be32(wqe->wr.wr.ud.remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK); /* @@ -573,7 +573,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & - __constant_cpu_to_be32(1 << 23)) != 0); + cpu_to_be32(1 << 23)) != 0); bail:; } diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c index 82d9a0b5ca2f..7bff4b9baa0a 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -667,13 +667,13 @@ static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd, static inline __le64 ipath_sdma_make_first_desc0(__le64 descq) { - return descq | __constant_cpu_to_le64(1ULL << 12); + return descq | cpu_to_le64(1ULL << 12); } static inline __le64 ipath_sdma_make_last_desc0(__le64 descq) { /* last */ /* dma head */ - return descq | __constant_cpu_to_le64(1ULL << 11 | 1ULL << 13); + return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13); } static inline __le64 ipath_sdma_make_desc1(u64 addr) @@ -763,7 +763,7 @@ static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd, if (ofs >= IPATH_SMALLBUF_DWORDS) { for (i = 0; i < pkt->naddr; i++) { dd->ipath_sdma_descq[dtail].qw[0] |= - __constant_cpu_to_le64(1ULL << 14); + cpu_to_le64(1ULL << 14); if (++dtail == dd->ipath_sdma_descq_cnt) dtail = 0; } diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index cdf0e6abd34d..9289ab4b0ae8 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1585,7 +1585,7 @@ static int ipath_query_port(struct ib_device *ibdev, u64 ibcstat; memset(props, 0, sizeof(*props)); - props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE); + props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); props->lmc = dd->ipath_lmc; props->sm_lid = dev->sm_lid; props->sm_sl = dev->sm_sl; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 11e3f613df93..ae6cff4abffc 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -86,11 +86,11 @@ #define IB_PMA_SAMPLE_STATUS_RUNNING 0x02 /* Mandatory IB performance counter select values. */ -#define IB_PMA_PORT_XMIT_DATA __constant_htons(0x0001) -#define IB_PMA_PORT_RCV_DATA __constant_htons(0x0002) -#define IB_PMA_PORT_XMIT_PKTS __constant_htons(0x0003) -#define IB_PMA_PORT_RCV_PKTS __constant_htons(0x0004) -#define IB_PMA_PORT_XMIT_WAIT __constant_htons(0x0005) +#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001) +#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002) +#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003) +#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004) +#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005) struct ib_reth { __be64 vaddr; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index a91cb4c3fa5c..f385a24d31d2 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -71,17 +71,17 @@ enum { }; static const __be32 mlx4_ib_opcode[] = { - [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND), - [IB_WR_LSO] = __constant_cpu_to_be32(MLX4_OPCODE_LSO), - [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), - [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), - [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), - [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), - [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), - [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), - [IB_WR_SEND_WITH_INV] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL), - [IB_WR_LOCAL_INV] = __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), - [IB_WR_FAST_REG_MR] = __constant_cpu_to_be32(MLX4_OPCODE_FMR), + [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), + [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), + [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), + [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), + [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), + [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), + [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), + [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), + [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), + [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), + [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index ec7c6d99ed3f..938858304300 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -314,12 +314,12 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, */ void ib_destroy_cm_id(struct ib_cm_id *cm_id); -#define IB_SERVICE_ID_AGN_MASK __constant_cpu_to_be64(0xFF00000000000000ULL) -#define IB_CM_ASSIGN_SERVICE_ID __constant_cpu_to_be64(0x0200000000000000ULL) -#define IB_CMA_SERVICE_ID __constant_cpu_to_be64(0x0000000001000000ULL) -#define IB_CMA_SERVICE_ID_MASK __constant_cpu_to_be64(0xFFFFFFFFFF000000ULL) -#define IB_SDP_SERVICE_ID __constant_cpu_to_be64(0x0000000000010000ULL) -#define IB_SDP_SERVICE_ID_MASK __constant_cpu_to_be64(0xFFFFFFFFFFFF0000ULL) +#define IB_SERVICE_ID_AGN_MASK cpu_to_be64(0xFF00000000000000ULL) +#define IB_CM_ASSIGN_SERVICE_ID cpu_to_be64(0x0200000000000000ULL) +#define IB_CMA_SERVICE_ID cpu_to_be64(0x0000000001000000ULL) +#define IB_CMA_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFF000000ULL) +#define IB_SDP_SERVICE_ID cpu_to_be64(0x0000000000010000ULL) +#define IB_SDP_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFFFF0000ULL) struct ib_cm_compare_data { u8 data[IB_CM_COMPARE_SIZE]; diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 5f6c40fffcf4..8cc71a130d1b 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -107,7 +107,7 @@ #define IB_MGMT_RMPP_STATUS_ABORT_MAX 127 #define IB_QP0 0 -#define IB_QP1 __constant_htonl(1) +#define IB_QP1 cpu_to_be32(1) #define IB_QP1_QKEY 0x80010000 #define IB_QP_SET_QKEY 0x80000000 diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index aaca0878668f..98b9086d769a 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -63,25 +63,25 @@ struct ib_smp { u8 return_path[IB_SMP_MAX_PATH_HOPS]; } __attribute__ ((packed)); -#define IB_SMP_DIRECTION __constant_htons(0x8000) +#define IB_SMP_DIRECTION cpu_to_be16(0x8000) /* Subnet management attributes */ -#define IB_SMP_ATTR_NOTICE __constant_htons(0x0002) -#define IB_SMP_ATTR_NODE_DESC __constant_htons(0x0010) -#define IB_SMP_ATTR_NODE_INFO __constant_htons(0x0011) -#define IB_SMP_ATTR_SWITCH_INFO __constant_htons(0x0012) -#define IB_SMP_ATTR_GUID_INFO __constant_htons(0x0014) -#define IB_SMP_ATTR_PORT_INFO __constant_htons(0x0015) -#define IB_SMP_ATTR_PKEY_TABLE __constant_htons(0x0016) -#define IB_SMP_ATTR_SL_TO_VL_TABLE __constant_htons(0x0017) -#define IB_SMP_ATTR_VL_ARB_TABLE __constant_htons(0x0018) -#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE __constant_htons(0x0019) -#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE __constant_htons(0x001A) -#define IB_SMP_ATTR_MCAST_FORWARD_TABLE __constant_htons(0x001B) -#define IB_SMP_ATTR_SM_INFO __constant_htons(0x0020) -#define IB_SMP_ATTR_VENDOR_DIAG __constant_htons(0x0030) -#define IB_SMP_ATTR_LED_INFO __constant_htons(0x0031) -#define IB_SMP_ATTR_VENDOR_MASK __constant_htons(0xFF00) +#define IB_SMP_ATTR_NOTICE cpu_to_be16(0x0002) +#define IB_SMP_ATTR_NODE_DESC cpu_to_be16(0x0010) +#define IB_SMP_ATTR_NODE_INFO cpu_to_be16(0x0011) +#define IB_SMP_ATTR_SWITCH_INFO cpu_to_be16(0x0012) +#define IB_SMP_ATTR_GUID_INFO cpu_to_be16(0x0014) +#define IB_SMP_ATTR_PORT_INFO cpu_to_be16(0x0015) +#define IB_SMP_ATTR_PKEY_TABLE cpu_to_be16(0x0016) +#define IB_SMP_ATTR_SL_TO_VL_TABLE cpu_to_be16(0x0017) +#define IB_SMP_ATTR_VL_ARB_TABLE cpu_to_be16(0x0018) +#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE cpu_to_be16(0x0019) +#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE cpu_to_be16(0x001A) +#define IB_SMP_ATTR_MCAST_FORWARD_TABLE cpu_to_be16(0x001B) +#define IB_SMP_ATTR_SM_INFO cpu_to_be16(0x0020) +#define IB_SMP_ATTR_VENDOR_DIAG cpu_to_be16(0x0030) +#define IB_SMP_ATTR_LED_INFO cpu_to_be16(0x0031) +#define IB_SMP_ATTR_VENDOR_MASK cpu_to_be16(0xFF00) struct ib_port_info { __be64 mkey; From f0f6f346a1edaec23b990c25f53478669e56fa70 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 28 Jan 2009 14:54:35 -0800 Subject: [PATCH 02/30] IB/mlx4: Fix dispatch of IB_EVENT_LID_CHANGE event When snooping a PortInfo MAD, its client_reregister bit is checked. If the bit is ON then a CLIENT_REREGISTER event is dispatched, otherwise a LID_CHANGE event is dispatched. This way of decision ignores the cases where the MAD changes the LID along with an instruction to reregister (so a necessary LID_CHANGE event won't be dispatched) or the MAD is neither of these (and an unnecessary LID_CHANGE event will be dispatched). This causes problems at least with IPoIB, which will do a "light" flush on reregister, rather than the "heavy" flush required due to a LID change. Fix this by dispatching a CLIENT_REREGISTER event if the client_reregister bit is set, but also compare the LID in the MAD to the current LID. If and only if they are not identical then a LID_CHANGE event is dispatched. Signed-off-by: Moni Shoua Signed-off-by: Jack Morgenstein Signed-off-by: Yossi Etigin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 606f1e2ef284..19e68ab66168 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -147,7 +147,8 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) * Snoop SM MADs for port info and P_Key table sets, so we can * synthesize LID change and P_Key change events. */ -static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) +static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, + u16 prev_lid) { struct ib_event event; @@ -157,6 +158,7 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { struct ib_port_info *pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; + u16 lid = be16_to_cpu(pinfo->lid); update_sm_ah(to_mdev(ibdev), port_num, be16_to_cpu(pinfo->sm_lid), @@ -165,12 +167,15 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) event.device = ibdev; event.element.port_num = port_num; - if (pinfo->clientrereg_resv_subnetto & 0x80) + if (pinfo->clientrereg_resv_subnetto & 0x80) { event.event = IB_EVENT_CLIENT_REREGISTER; - else - event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } - ib_dispatch_event(&event); + if (prev_lid != lid) { + event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } } if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { @@ -228,8 +233,9 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { - u16 slid; + u16 slid, prev_lid = 0; int err; + struct ib_port_attr pattr; slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); @@ -263,6 +269,13 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } else return IB_MAD_RESULT_SUCCESS; + if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && + in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + !ib_query_port(ibdev, port_num, &pattr)) + prev_lid = pattr.lid; + err = mlx4_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, mad_flags & IB_MAD_IGNORE_BKEY, @@ -271,7 +284,7 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_FAILURE; if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad); + smp_snoop(ibdev, port_num, in_mad, prev_lid); node_desc_override(ibdev, out_mad); } From 270b8b85134c299799dddec624ceeb5671330131 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 28 Jan 2009 15:15:56 -0800 Subject: [PATCH 03/30] IB/mthca: Fix dispatch of IB_EVENT_LID_CHANGE event When snooping a PortInfo MAD, its client_reregister bit is checked. If the bit is ON then a CLIENT_REREGISTER event is dispatched, otherwise a LID_CHANGE event is dispatched. This way of decision ignores the cases where the MAD changes the LID along with an instruction to reregister (so a necessary LID_CHANGE event won't be dispatched) or the MAD is neither of these (and an unnecessary LID_CHANGE event will be dispatched). This causes problems at least with IPoIB, which will do a "light" flush on reregister, rather than the "heavy" flush required due to a LID change. Fix this by dispatching a CLIENT_REREGISTER event if the client_reregister bit is set, but also compare the LID in the MAD to the current LID. If and only if they are not identical then a LID_CHANGE event is dispatched. Signed-off-by: Moni Shoua Signed-off-by: Jack Morgenstein Signed-off-by: Yossi Etigin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_mad.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 640449582aba..5648659ff0b0 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -104,7 +104,8 @@ static void update_sm_ah(struct mthca_dev *dev, */ static void smp_snoop(struct ib_device *ibdev, u8 port_num, - struct ib_mad *mad) + struct ib_mad *mad, + u16 prev_lid) { struct ib_event event; @@ -114,6 +115,7 @@ static void smp_snoop(struct ib_device *ibdev, if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { struct ib_port_info *pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; + u16 lid = be16_to_cpu(pinfo->lid); mthca_update_rate(to_mdev(ibdev), port_num); update_sm_ah(to_mdev(ibdev), port_num, @@ -123,12 +125,15 @@ static void smp_snoop(struct ib_device *ibdev, event.device = ibdev; event.element.port_num = port_num; - if (pinfo->clientrereg_resv_subnetto & 0x80) + if (pinfo->clientrereg_resv_subnetto & 0x80) { event.event = IB_EVENT_CLIENT_REREGISTER; - else - event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } - ib_dispatch_event(&event); + if (prev_lid != lid) { + event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } } if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { @@ -196,6 +201,8 @@ int mthca_process_mad(struct ib_device *ibdev, int err; u8 status; u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + u16 prev_lid = 0; + struct ib_port_attr pattr; /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && @@ -233,6 +240,12 @@ int mthca_process_mad(struct ib_device *ibdev, return IB_MAD_RESULT_SUCCESS; } else return IB_MAD_RESULT_SUCCESS; + if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && + in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + !ib_query_port(ibdev, port_num, &pattr)) + prev_lid = pattr.lid; err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, @@ -252,7 +265,7 @@ int mthca_process_mad(struct ib_device *ibdev, } if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad); + smp_snoop(ibdev, port_num, in_mad, prev_lid); node_desc_override(ibdev, out_mad); } From 900f4c16c338f742b80f3aa500e12ceb017e86af Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 10 Feb 2009 16:38:22 -0800 Subject: [PATCH 04/30] RDMA/cxgb3: sgl/pbl offset calculation needs 64 bits The variable 'offset' in iwch_sgl2pbl_map() needs to be a u64. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_qp.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 19661b2f0406..c84ac5bfb107 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -195,15 +195,12 @@ static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -/* - * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now. - */ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, u32 num_sgle, u32 * pbl_addr, u8 * page_size) { int i; struct iwch_mr *mhp; - u32 offset; + u64 offset; for (i = 0; i < num_sgle; i++) { mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); @@ -235,8 +232,8 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, return -EINVAL; } offset = sg_list[i].addr - mhp->attr.va_fbo; - offset += ((u32) mhp->attr.va_fbo) % - (1UL << (12 + mhp->attr.page_size)); + offset += mhp->attr.va_fbo & + ((1UL << (12 + mhp->attr.page_size)) - 1); pbl_addr[i] = ((mhp->attr.pbl_addr - rhp->rdev.rnic_info.pbl_base) >> 3) + (offset >> (12 + mhp->attr.page_size)); From 42fb61f02f9bdc476c7a76d3cce0400d989f44c5 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 10 Feb 2009 16:38:57 -0800 Subject: [PATCH 05/30] RDMA/cxgb3: Connection termination fixes The poll and flush code needs to handle all send opcodes: SEND, SEND_WITH_SE, SEND_WITH_INV, and SEND_WITH_SE_INV. Ignore TERM indications if the connection already gone. Ignore HW receive completions if the RQ is empty. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 13 +++++++++++-- drivers/infiniband/hw/cxgb3/cxio_wr.h | 6 ++++++ drivers/infiniband/hw/cxgb3/iwch_cm.c | 3 +++ drivers/infiniband/hw/cxgb3/iwch_ev.c | 5 ----- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 4dcf08b3fd83..c2740e790f73 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -450,7 +450,7 @@ static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe)) return 0; - if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) && + if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) && Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) return 0; @@ -1204,11 +1204,12 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, } /* incoming SEND with no receive posted failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) && + if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) && Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { ret = -1; goto skip_cqe; } + BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe)); goto proc_cqe; } @@ -1223,6 +1224,13 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, * then we complete this with TPT_ERR_MSN and mark the wq in * error. */ + + if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { + wq->error = 1; + ret = -1; + goto skip_cqe; + } + if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) { wq->error = 1; hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN)); @@ -1277,6 +1285,7 @@ proc_cqe: cxio_hal_pblpool_free(wq->rdev, wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); + BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr)); wq->rq_rptr++; } diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index 04618f7bfbb3..ff9be1a13106 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -604,6 +604,12 @@ struct t3_cqe { #define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header))) #define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header))) +#define CQE_SEND_OPCODE(x)( \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV)) + #define CQE_LEN(x) (be32_to_cpu((x).len)) /* used for RQ completion processing */ diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 44e936e48a31..8699947aaf6c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1678,6 +1678,9 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep *ep = ctx; + if (state_read(&ep->com) != FPDU_MODE) + return CPL_RET_BUF_DONE; + PDBG("%s ep %p\n", __func__, ep); skb_pull(skb, sizeof(struct cpl_rdma_terminate)); PDBG("%s saving %d bytes of term msg\n", __func__, skb->len); diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c index 7b67a6771720..743c5d8b8806 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c @@ -179,11 +179,6 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) case TPT_ERR_BOUND: case TPT_ERR_INVALIDATE_SHARED_MR: case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - printk(KERN_ERR "%s - CQE Err qpid 0x%x opcode %d status 0x%x " - "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__, - CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), - CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); break; From 426328963078f644c7194403a308588cf684d4c6 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 16 Feb 2009 21:23:32 -0800 Subject: [PATCH 06/30] RDMA/cxgb3: Remove modulo math from build_rdma_recv() Remove modulo usage to avoid a divide in the fast path (not all gcc versions do strength reduction here). Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index c84ac5bfb107..0e18f6fc23e2 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -263,8 +263,8 @@ static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); /* to in the WQE == the offset into the page */ - wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) % - (1UL << (12 + page_size[i]))); + wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) & + ((1UL << (12 + page_size[i])) - 1)); /* pbl_addr is the adapters address in the PBL */ wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); From 71d98b4628ee869d62814f6d8607d76cab4b9ec5 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 17 Feb 2009 14:51:47 -0800 Subject: [PATCH 07/30] IPoIB: In unicast_arp_send(), only free newly-created paths If path_rec_start() returns error, call path_free() only if the path was newly-created. If we free an existing path whose valid flag was zero, (but do not detach it from the list) we cause corruption of the path list (of which it is a member), and get a kernel crash. The simplest solution is to not free an existing path -- just leave it in the list as-is (i.e., with its valid flag cleared). Thanks to Yossi Etigin of Voltaire for identifying the problem flow which caused the kernel crash. Signed-off-by: Jack Morgenstein Signed-off-by: Moni Shua Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0bd2a4ff0842..353c13b91e8f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -660,8 +660,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, path = __path_find(dev, phdr->hwaddr + 4); if (!path || !path->valid) { - if (!path) + int new_path = 0; + + if (!path) { path = path_rec_create(dev, phdr->hwaddr + 4); + new_path = 1; + } if (path) { /* put pseudoheader back on for next time */ skb_push(skb, sizeof *phdr); @@ -669,7 +673,8 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, if (!path->query && path_rec_start(dev, path)) { spin_unlock_irqrestore(&priv->lock, flags); - path_free(dev, path); + if (new_path) + path_free(dev, path); return; } else __path_add(dev, path); From 71c4512201575c7cc008b364e2e2c75cc7085c26 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 22 Feb 2009 20:04:34 -0800 Subject: [PATCH 08/30] IB/ipath: Fix memory leak in init_shadow_tids() error path If the second vmalloc() fails, the wrong pointer is pased to vfree(), so the first vmalloc() ends up getting leaked. This was spotted by the Coverity checker (CID 2709). Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_init_chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 64aeefbd2a5d..077879c0bdb5 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -455,7 +455,7 @@ static void init_shadow_tids(struct ipath_devdata *dd) if (!addrs) { ipath_dev_err(dd, "failed to allocate shadow dma handle " "array, no expected sends!\n"); - vfree(dd->ipath_pageshadow); + vfree(pages); dd->ipath_pageshadow = NULL; return; } From e538052746d570c874650a24eed89fca6e4c93dc Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 22 Feb 2009 20:14:37 -0800 Subject: [PATCH 09/30] IB/ipath: Really run work in ipath_release_user_pages_on_close() ipath_release_user_pages_on_close() just allocated a structure to schedule work with but just returned (leaking the structure) rather than actually doing schedule_work(). Fix the logic to what was intended. This was spotted by the Coverity checker (CID 2700). Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_user_pages.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index 0190edc8044e..855911e7396d 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -209,20 +209,20 @@ void ipath_release_user_pages_on_close(struct page **p, size_t num_pages) mm = get_task_mm(current); if (!mm) - goto bail; + return; work = kmalloc(sizeof(*work), GFP_KERNEL); if (!work) goto bail_mm; - goto bail; - INIT_WORK(&work->work, user_pages_account); work->mm = mm; work->num_pages = num_pages; + schedule_work(&work->work); + return; + bail_mm: mmput(mm); -bail: return; } From 9206dff15705267c19f8fed391c4fb95975540a3 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 25 Feb 2009 13:27:46 -0800 Subject: [PATCH 10/30] IB: Remove sysfs files before unregistering device Move the ib_device_unregister_sysfs() call from ib_dealloc_device() to ib_unregister_device(). The old code allows device unregister to proceed even if some sysfs files are open, which leaves a window where userspace can open a file before a device is removed but then end up reading the file after the device is removed, which leads to various kernel crashes either because the device data structure is freed or because the low-level driver code is gone after module removal. By not returning from ib_unregister_device() until after all sysfs entries are removed, we make sure that data structures and/or module code is not freed until after all sysfs access is done. Reported-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/core/device.c | 4 +++- drivers/infiniband/core/sysfs.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7913b804311e..d1fba4153332 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -193,7 +193,7 @@ void ib_dealloc_device(struct ib_device *device) BUG_ON(device->reg_state != IB_DEV_UNREGISTERED); - ib_device_unregister_sysfs(device); + kobject_put(&device->dev.kobj); } EXPORT_SYMBOL(ib_dealloc_device); @@ -348,6 +348,8 @@ void ib_unregister_device(struct ib_device *device) mutex_unlock(&device_mutex); + ib_device_unregister_sysfs(device); + spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry_safe(context, tmp, &device->client_data_list, list) kfree(context); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index b43f7d3682d3..5270aeb56e9e 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -848,6 +848,9 @@ void ib_device_unregister_sysfs(struct ib_device *device) struct kobject *p, *t; struct ib_port *port; + /* Hold kobject until ib_dealloc_device() */ + kobject_get(&device->dev.kobj); + list_for_each_entry_safe(p, t, &device->port_list, entry) { list_del(&p->entry); port = container_of(p, struct ib_port, kobj); From 085343b47592b33276abf58a71c52ea29f0a338b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 27 Feb 2009 10:29:13 -0800 Subject: [PATCH 11/30] mlx4_core: Add device IDs for MT25458 10GigE devices Add device IDs for Mellanox MT25458 ConnectX+10GBaseT 10GigE adapters. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/mlx4/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 6ef2490d5c3e..8480f0346844 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -1230,6 +1230,8 @@ static struct pci_device_id mlx4_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon" EN 10GigE */ { PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ + { PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */ + { PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ { 0, } }; From 1aedb7721f05461f777fdee25b50d8a168c425ed Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 27 Feb 2009 10:30:46 -0800 Subject: [PATCH 12/30] IB/iser: Remove hard setting of path MTU Remove hard setting of the IB MTU used by iSER's RC queue-pair to 1K, as this was done due to inter-op issues with an old iser target which is not used any more. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 319b188145be..ea9e1556e0d6 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -401,13 +401,6 @@ static void iser_route_handler(struct rdma_cm_id *cma_id) if (ret) goto failure; - iser_dbg("path.mtu is %d setting it to %d\n", - cma_id->route.path_rec->mtu, IB_MTU_1024); - - /* we must set the MTU to 1024 as this is what the target is assuming */ - if (cma_id->route.path_rec->mtu > IB_MTU_1024) - cma_id->route.path_rec->mtu = IB_MTU_1024; - memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 4; conn_param.initiator_depth = 1; From 7020cb0fe216fdcec246cdc2412614a3190fbb2f Mon Sep 17 00:00:00 2001 From: Ramachandra K Date: Fri, 27 Feb 2009 10:33:12 -0800 Subject: [PATCH 13/30] IB/mad: Fix RMPP header RRespTime manipulation Fix ib_set_rmpp_flags() to use the correct bit mask for RRespTime. In the 8-bit field of the RMPP header, the first 5 bits are RRespTime and next 3 bits are RMPPFlags. Hence to retain the first 5 bits, the mask should be 0xF8 instead of 0xF1. ack_recv()-->format_ack() calls ib_set_rmpp_flags() and due to the incorrect ANDing with 0xF1, RRespTime got changed incorrectly and RMPP Acks sent back always had a RRespTime of 0x1E (30) which caused the other end to consider the time outs to be approximately 4297 seconds (i.e. in the order of 4*2^30) instead of the usual ~4 seconds (order of 4*2^20). Signed-off-by: Ramachandra K Acked-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/ib_mad.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 5f6c40fffcf4..1a0f409f1700 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -290,7 +290,7 @@ static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime) */ static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags) { - rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF1) | + rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF8) | (flags & 0x7); } From 1d9bc6d648ece77ffb41c5a577eab81fac5ad4de Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 27 Feb 2009 10:34:30 -0800 Subject: [PATCH 14/30] IB/mad: Fix null pointer dereference in local_completions() handle_outgoing_dr_smp() can queue a struct ib_mad_local_private *local on the mad_agent_priv->local_work work queue with local->mad_priv == NULL if device->process_mad() returns IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY and (!ib_response_mad(&mad_priv->mad.mad) || !mad_agent_priv->agent.recv_handler). In this case, local_completions() will be called with local->mad_priv == NULL. The code does check for this case and skips calling recv_mad_agent->agent.recv_handler() but recv == 0 so kmem_cache_free() is called with a NULL pointer. Also, since recv isn't reinitialized each time through the loop, it can cause a memory leak if recv should have been zero. Signed-off-by: Ralph Campbell --- drivers/infiniband/core/mad.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 5c54fc2350be..735ad4ea10f0 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2356,7 +2356,7 @@ static void local_completions(struct work_struct *work) struct ib_mad_local_private *local; struct ib_mad_agent_private *recv_mad_agent; unsigned long flags; - int recv = 0; + int free_mad; struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; @@ -2370,14 +2370,15 @@ static void local_completions(struct work_struct *work) completion_list); list_del(&local->completion_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + free_mad = 0; if (local->mad_priv) { recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { printk(KERN_ERR PFX "No receive MAD agent for local completion\n"); + free_mad = 1; goto local_send_completion; } - recv = 1; /* * Defined behavior is to complete response * before request @@ -2422,7 +2423,7 @@ local_send_completion: spin_lock_irqsave(&mad_agent_priv->lock, flags); atomic_dec(&mad_agent_priv->refcount); - if (!recv) + if (free_mad) kmem_cache_free(ib_mad_cache, local->mad_priv); kfree(local); } From d9620a4c82c61a91c9313f80ba951c902573c028 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 27 Feb 2009 14:44:32 -0800 Subject: [PATCH 15/30] IB/mad: initialize mad_agent_priv before putting on lists There is a potential race in ib_register_mad_agent() where the struct ib_mad_agent_private is not fully initialized before it is added to the list of agents per IB port. This means the ib_mad_agent_private could be seen before the refcount, spin locks, and linked lists are initialized. The fix is to initialize the structure earlier. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 735ad4ea10f0..dbcd285405ec 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -301,6 +301,16 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, mad_agent_priv->agent.context = context; mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; mad_agent_priv->agent.port_num = port_num; + spin_lock_init(&mad_agent_priv->lock); + INIT_LIST_HEAD(&mad_agent_priv->send_list); + INIT_LIST_HEAD(&mad_agent_priv->wait_list); + INIT_LIST_HEAD(&mad_agent_priv->done_list); + INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); + INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); + INIT_LIST_HEAD(&mad_agent_priv->local_list); + INIT_WORK(&mad_agent_priv->local_work, local_completions); + atomic_set(&mad_agent_priv->refcount, 1); + init_completion(&mad_agent_priv->comp); spin_lock_irqsave(&port_priv->reg_lock, flags); mad_agent_priv->agent.hi_tid = ++ib_mad_client_id; @@ -350,17 +360,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); spin_unlock_irqrestore(&port_priv->reg_lock, flags); - spin_lock_init(&mad_agent_priv->lock); - INIT_LIST_HEAD(&mad_agent_priv->send_list); - INIT_LIST_HEAD(&mad_agent_priv->wait_list); - INIT_LIST_HEAD(&mad_agent_priv->done_list); - INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); - INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); - INIT_LIST_HEAD(&mad_agent_priv->local_list); - INIT_WORK(&mad_agent_priv->local_work, local_completions); - atomic_set(&mad_agent_priv->refcount, 1); - init_completion(&mad_agent_priv->comp); - return &mad_agent_priv->agent; error4: From 4780c1953f9bef07365b13af01ae4e8238ecd3de Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 3 Mar 2009 14:22:17 -0800 Subject: [PATCH 16/30] IB/mad: Fix ib_post_send_mad() returning 0 with no generate send comp If ib_post_send_mad() returns 0, the API guarantees that there will be a callback to send_buf->mad_agent->send_handler() so that the sender can call ib_free_send_mad(). Otherwise, the ib_mad_send_buf will be leaked and the mad_agent reference count will never go to zero and the IB device module cannot be unloaded. The above can happen without this patch if process_mad() returns (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED). If process_mad() returns IB_MAD_RESULT_SUCCESS and there is no agent registered to receive the mad being sent, handle_outgoing_dr_smp() returns zero which causes a MAD packet which is at the end of the directed route to be incorrectly sent on the wire but doesn't cause a hang since the HCA generates a send completion. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index dbcd285405ec..de922a04ca2d 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -742,9 +742,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, break; case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: kmem_cache_free(ib_mad_cache, mad_priv); - kfree(local); - ret = 1; - goto out; + break; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ port_priv = ib_get_mad_port(mad_agent_priv->agent.device, @@ -755,10 +753,12 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, &mad_priv->mad.mad); } if (!port_priv || !recv_mad_agent) { + /* + * No receiving agent so drop packet and + * generate send completion. + */ kmem_cache_free(ib_mad_cache, mad_priv); - kfree(local); - ret = 0; - goto out; + break; } local->mad_priv = mad_priv; local->recv_mad_agent = recv_mad_agent; From 6b708b3dde0ab3a10a0eea7774c1d6482f32f587 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 3 Mar 2009 14:30:01 -0800 Subject: [PATCH 17/30] IB/sa_query: Fix AH leak due to update_sm_ah() race Our testing uncovered a race condition in ib_sa_event(): spin_lock_irqsave(&port->ah_lock, flags); if (port->sm_ah) kref_put(&port->sm_ah->ref, free_sm_ah); port->sm_ah = NULL; spin_unlock_irqrestore(&port->ah_lock, flags); schedule_work(&sa_dev->port[event->element.port_num - sa_dev->start_port].update_task); If two events occur back-to-back (e.g., client-reregister and LID change), both may pass the spinlock-protected code above before the scheduled work updates the port->sm_ah handle. Then if the scheduled work ends up running twice, the second operation will then find a non-NULL port->sm_ah, and will simply overwrite it in update_sm_ah -- resulting in an AH leak. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/core/sa_query.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 7863a50d56f2..1865049e80f7 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -395,6 +395,8 @@ static void update_sm_ah(struct work_struct *work) } spin_lock_irq(&port->ah_lock); + if (port->sm_ah) + kref_put(&port->sm_ah->ref, free_sm_ah); port->sm_ah = new_ah; spin_unlock_irq(&port->ah_lock); From 6432f366842c78d17b2df974c68f39cb5794f2c2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 4 Mar 2009 15:22:39 -0800 Subject: [PATCH 18/30] IB: Remove useless ibdev_is_alive() tests from sysfs code Some attribute show functions test ibdev_is_alive() to make sure that it's OK to access device state. However, the sysfs attributes will not be registered until the device is fully initialized, and they'll be unregistered before anything is torn down, so ibdev_is_alive() doesn't do anything useful. Remove it. Signed-off-by: Roland Dreier --- drivers/infiniband/core/sysfs.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 5270aeb56e9e..5c04cfb54cb9 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -66,11 +66,6 @@ struct port_table_attribute { int index; }; -static inline int ibdev_is_alive(const struct ib_device *dev) -{ - return dev->reg_state == IB_DEV_REGISTERED; -} - static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -80,8 +75,6 @@ static ssize_t port_attr_show(struct kobject *kobj, if (!port_attr->show) return -EIO; - if (!ibdev_is_alive(p->ibdev)) - return -ENODEV; return port_attr->show(p, port_attr, buf); } @@ -562,9 +555,6 @@ static ssize_t show_node_type(struct device *device, { struct ib_device *dev = container_of(device, struct ib_device, dev); - if (!ibdev_is_alive(dev)) - return -ENODEV; - switch (dev->node_type) { case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); @@ -581,9 +571,6 @@ static ssize_t show_sys_image_guid(struct device *device, struct ib_device_attr attr; ssize_t ret; - if (!ibdev_is_alive(dev)) - return -ENODEV; - ret = ib_query_device(dev, &attr); if (ret) return ret; @@ -600,9 +587,6 @@ static ssize_t show_node_guid(struct device *device, { struct ib_device *dev = container_of(device, struct ib_device, dev); - if (!ibdev_is_alive(dev)) - return -ENODEV; - return sprintf(buf, "%04x:%04x:%04x:%04x\n", be16_to_cpu(((__be16 *) &dev->node_guid)[0]), be16_to_cpu(((__be16 *) &dev->node_guid)[1]), From dae5d13a7e6efcf6e0e00c0febb530b894fa13a3 Mon Sep 17 00:00:00 2001 From: Don Wood Date: Fri, 6 Mar 2009 15:12:09 -0800 Subject: [PATCH 19/30] RDMA/nes: Account for freed PBL after HW operation Fix occurrences where the software PBL counts were changed before the hardware was updated. This bug allowed another thread to overallocate the hardware resources. Add proper PBL accounting in case nes_reg_mr() fails. Signed-off-by: Don Wood Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 70 ++++++++++++++++----------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 4fdb72454f94..e66681349c92 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -551,6 +551,7 @@ static int nes_dealloc_fmr(struct ib_fmr *ibfmr) struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; int i = 0; + int rc; /* free the resources */ if (nesfmr->leaf_pbl_cnt == 0) { @@ -572,7 +573,9 @@ static int nes_dealloc_fmr(struct ib_fmr *ibfmr) nesmr->ibmw.rkey = ibfmr->rkey; nesmr->ibmw.uobject = NULL; - if (nesfmr->nesmr.pbls_used != 0) { + rc = nes_dealloc_mw(&nesmr->ibmw); + + if ((rc == 0) && (nesfmr->nesmr.pbls_used != 0)) { spin_lock_irqsave(&nesadapter->pbl_lock, flags); if (nesfmr->nesmr.pbl_4k) { nesadapter->free_4kpbl += nesfmr->nesmr.pbls_used; @@ -584,7 +587,7 @@ static int nes_dealloc_fmr(struct ib_fmr *ibfmr) spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); } - return nes_dealloc_mw(&nesmr->ibmw); + return rc; } @@ -1993,7 +1996,16 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, stag, ret, cqp_request->major_code, cqp_request->minor_code); major_code = cqp_request->major_code; nes_put_cqp_request(nesdev, cqp_request); - + if ((!ret || major_code) && pbl_count != 0) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (pbl_count > 1) + nesadapter->free_4kpbl += pbl_count+1; + else if (residual_page_count > 32) + nesadapter->free_4kpbl += pbl_count; + else + nesadapter->free_256pbl += pbl_count; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } if (!ret) return -ETIME; else if (major_code) @@ -2607,24 +2619,6 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) cqp_request->waiting = 1; cqp_wqe = &cqp_request->cqp_wqe; - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - if (nesmr->pbls_used != 0) { - if (nesmr->pbl_4k) { - nesadapter->free_4kpbl += nesmr->pbls_used; - if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) { - printk(KERN_ERR PFX "free 4KB PBLs(%u) has exceeded the max(%u)\n", - nesadapter->free_4kpbl, nesadapter->max_4kpbl); - } - } else { - nesadapter->free_256pbl += nesmr->pbls_used; - if (nesadapter->free_256pbl > nesadapter->max_256pbl) { - printk(KERN_ERR PFX "free 256B PBLs(%u) has exceeded the max(%u)\n", - nesadapter->free_256pbl, nesadapter->max_256pbl); - } - } - } - - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); nes_fill_init_cqp_wqe(cqp_wqe, nesdev); set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, NES_CQP_DEALLOCATE_STAG | NES_CQP_STAG_VA_TO | @@ -2642,11 +2636,6 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) " CQP Major:Minor codes = 0x%04X:0x%04X\n", ib_mr->rkey, ret, cqp_request->major_code, cqp_request->minor_code); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - (ib_mr->rkey & 0x0fffff00) >> 8); - - kfree(nesmr); - major_code = cqp_request->major_code; minor_code = cqp_request->minor_code; @@ -2662,8 +2651,33 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) " to destroy STag, ib_mr=%p, rkey = 0x%08X\n", major_code, minor_code, ib_mr, ib_mr->rkey); return -EIO; - } else - return 0; + } + + if (nesmr->pbls_used != 0) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (nesmr->pbl_4k) { + nesadapter->free_4kpbl += nesmr->pbls_used; + if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) + printk(KERN_ERR PFX "free 4KB PBLs(%u) has " + "exceeded the max(%u)\n", + nesadapter->free_4kpbl, + nesadapter->max_4kpbl); + } else { + nesadapter->free_256pbl += nesmr->pbls_used; + if (nesadapter->free_256pbl > nesadapter->max_256pbl) + printk(KERN_ERR PFX "free 256B PBLs(%u) has " + "exceeded the max(%u)\n", + nesadapter->free_256pbl, + nesadapter->max_256pbl); + } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + (ib_mr->rkey & 0x0fffff00) >> 8); + + kfree(nesmr); + + return 0; } From cd6853d3eb453aee2574521b7ce2cd5a45492a59 Mon Sep 17 00:00:00 2001 From: Chien Tung Date: Fri, 6 Mar 2009 15:12:10 -0800 Subject: [PATCH 20/30] RDMA/nes: Update copyright to new legal entity and year Update copyright to the new legal entity, Intel-NE, Inc., an Intel company. Update copyright for the new year. Signed-off-by: Chien Tung Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes.c | 2 +- drivers/infiniband/hw/nes/nes.h | 2 +- drivers/infiniband/hw/nes/nes_cm.c | 2 +- drivers/infiniband/hw/nes/nes_cm.h | 2 +- drivers/infiniband/hw/nes/nes_context.h | 2 +- drivers/infiniband/hw/nes/nes_hw.c | 2 +- drivers/infiniband/hw/nes/nes_hw.h | 2 +- drivers/infiniband/hw/nes/nes_nic.c | 2 +- drivers/infiniband/hw/nes/nes_user.h | 2 +- drivers/infiniband/hw/nes/nes_utils.c | 2 +- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- drivers/infiniband/hw/nes/nes_verbs.h | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index b9611ade9eab..ca599767ffbd 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 13a5bb1a7bcf..04b12ad23390 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index a01b4488208b..6f42ab6bd021 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index fafa35042ebd..4ab2bebf12ac 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h index da9daba8e668..0fb8d81d9a62 100644 --- a/drivers/infiniband/hw/nes/nes_context.h +++ b/drivers/infiniband/hw/nes/nes_context.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 5d139db1b771..cb4a5f32f2f7 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index bc0b4de04450..6f8712d7a2cc 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. +* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 57a47cf7e513..8e1d073913ec 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h index e64306bce80b..cc90c14b49eb 100644 --- a/drivers/infiniband/hw/nes/nes_user.h +++ b/drivers/infiniband/hw/nes/nes_user.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect. All rights reserved. + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index 6f3bc1b6bf22..a282031d15c7 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index e66681349c92..b42b17ac7712 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index 6c6b4da5184f..da3c368f1ef8 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two From b9c367e7e694502085f21d9d2686645798080aa3 Mon Sep 17 00:00:00 2001 From: Chien Tung Date: Fri, 6 Mar 2009 15:12:10 -0800 Subject: [PATCH 21/30] RDMA/nes: Report correct vendor_id and vendor_part_id ibv_devinfo displays 0 for vendor_id and vendor_part_id. Fill in OUI and device_id for those two fields. Signed-off-by: Chien Tung Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_hw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index cb4a5f32f2f7..3a72a06f1df8 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -254,6 +254,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { u32 adapter_size; u32 arp_table_size; u16 vendor_id; + u16 device_id; u8 OneG_Mode; u8 func_index; @@ -356,6 +357,13 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { return NULL; } + nesadapter->vendor_id = (((u32) nesadapter->mac_addr_high) << 8) | + (nesadapter->mac_addr_low >> 24); + + pci_bus_read_config_word(nesdev->pcidev->bus, nesdev->pcidev->devfn, + PCI_DEVICE_ID, &device_id); + nesadapter->vendor_part_id = device_id; + if (nes_init_serdes(nesdev, hw_rev, port_count, nesadapter, OneG_Mode)) { kfree(nesadapter); From 7b14ab0b438f20c7d6599985b036bb2864fd2524 Mon Sep 17 00:00:00 2001 From: Chien Tung Date: Fri, 6 Mar 2009 15:12:11 -0800 Subject: [PATCH 22/30] RDMA/nes: Fix tmp_addr compilation warning In find_node(), tmp_addr causes an "unused variable" warning when INFINIBAND_NES_DEBUG is not defined. It's only used in a nes_debug() and the print does not make sense. So take out the whole thing. Reported-by: Manish Katiyar Signed-off-by: Chien Tung Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6f42ab6bd021..bd918df3d22c 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -778,14 +778,10 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, unsigned long flags; struct list_head *hte; struct nes_cm_node *cm_node; - __be32 tmp_addr = cpu_to_be32(loc_addr); /* get a handle on the hte */ hte = &cm_core->connected_nodes; - nes_debug(NES_DBG_CM, "Searching for an owner node: %pI4:%x from core %p->%p\n", - &tmp_addr, loc_port, cm_core, hte); - /* walk list and find cm_node associated with this session ID */ spin_lock_irqsave(&cm_core->ht_lock, flags); list_for_each_entry(cm_node, hte, list) { From fd87778cb99429f5e2e041213a5c9c564bbe7b78 Mon Sep 17 00:00:00 2001 From: Don Wood Date: Fri, 6 Mar 2009 15:12:11 -0800 Subject: [PATCH 23/30] RDMA/nes: Inform hardware that asynchronous event has been handled When asynchronous events are processed by software, it is necessary to let the hardware know that software has handled the event. This frees up the entry in the asynchronous event queue. Signed-off-by: Don Wood Signed-off-by: Chien Tung Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_hw.c | 2 ++ drivers/infiniband/hw/nes/nes_hw.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 3a72a06f1df8..1c5e946ce226 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -2269,6 +2269,8 @@ static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq) if (++head >= aeq_size) head = 0; + + nes_write32(nesdev->regs + NES_AEQ_ALLOC, 1 << 16); } while (1); aeq->aeq_head = head; diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 6f8712d7a2cc..bf7ecfa5f976 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -61,6 +61,7 @@ enum pci_regs { NES_CQ_ACK = 0x0034, NES_WQE_ALLOC = 0x0040, NES_CQE_ALLOC = 0x0044, + NES_AEQ_ALLOC = 0x0048 }; enum indexed_regs { From 2869975cfbd58dc6591d8c3ba1f171e7f758be28 Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Fri, 6 Mar 2009 15:12:11 -0800 Subject: [PATCH 24/30] RDMA/nes: Remove LLTX NETIF_F_LLTX is deprecated. Remove private TX locking from the driver and remove the NETIF_F_LLTX feature flag. This also fixes a warning in some configs that comes from doing skb_linearize() call in the hard_start_xmit method with IRQs disabled (if HIGHMEM is enabled, skb_linearize() may end up enabling BHs, which is a no-no if hard IRQs are disabled in that context). By getting rid of LLTX, we do not disable IRQs when skb_linearize() is called. Remove the sq_lock as it is not needed for non-LLTX. Fix ethtool not to show the counter for sq_lock. Reported-by: aluno3@poczta.onet.pl Signed-off-by: Faisal Latif Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_hw.c | 5 +- drivers/infiniband/hw/nes/nes_hw.h | 2 - drivers/infiniband/hw/nes/nes_nic.c | 140 ++++++++++++---------------- 3 files changed, 63 insertions(+), 84 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 1c5e946ce226..9a51f25c6cee 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1644,7 +1644,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) nesvnic->post_cqp_request = nes_post_cqp_request; nesvnic->mcrq_mcast_filter = NULL; - spin_lock_init(&nesvnic->nic.sq_lock); spin_lock_init(&nesvnic->nic.rq_lock); /* setup the RQ */ @@ -2632,9 +2631,9 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) } else break; } - if (skb) - dev_kfree_skb_any(skb); } + if (skb) + dev_kfree_skb_any(skb); nesnic->sq_tail++; nesnic->sq_tail &= nesnic->sq_size-1; if (sq_cqes > 128) { diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index bf7ecfa5f976..f41a8710d2a8 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -876,7 +876,6 @@ struct nes_hw_nic { u8 replenishing_rq; u8 reserved; - spinlock_t sq_lock; spinlock_t rq_lock; }; @@ -1148,7 +1147,6 @@ struct nes_ib_device; struct nes_vnic { struct nes_ib_device *nesibdev; u64 sq_full; - u64 sq_locked; u64 tso_requests; u64 segmented_tso_requests; u64 linearized_skbs; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 8e1d073913ec..025ed9f7d9c2 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -400,8 +400,7 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) if (skb_headlen(skb) == skb->len) { if (skb_headlen(skb) <= NES_FIRST_FRAG_SIZE) { nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_2_1_IDX] = 0; - nesnic->tx_skb[nesnic->sq_head] = NULL; - dev_kfree_skb(skb); + nesnic->tx_skb[nesnic->sq_head] = skb; } } else { /* Deal with Fragments */ @@ -453,7 +452,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) u32 wqe_count=1; u32 send_rc; struct iphdr *iph; - unsigned long flags; __le16 *wqe_fragment_length; u32 nr_frags; u32 original_first_length; @@ -480,13 +478,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) if (netif_queue_stopped(netdev)) return NETDEV_TX_BUSY; - local_irq_save(flags); - if (!spin_trylock(&nesnic->sq_lock)) { - local_irq_restore(flags); - nesvnic->sq_locked++; - return NETDEV_TX_LOCKED; - } - /* Check if SQ is full */ if ((((nesnic->sq_tail+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) == 1) { if (!netif_queue_stopped(netdev)) { @@ -498,7 +489,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) } } nesvnic->sq_full++; - spin_unlock_irqrestore(&nesnic->sq_lock, flags); return NETDEV_TX_BUSY; } @@ -531,7 +521,6 @@ sq_no_longer_full: } } nesvnic->sq_full++; - spin_unlock_irqrestore(&nesnic->sq_lock, flags); nes_debug(NES_DBG_NIC_TX, "%s: HNIC SQ full- TSO request has too many frags!\n", netdev->name); return NETDEV_TX_BUSY; @@ -656,17 +645,13 @@ tso_sq_no_longer_full: skb_set_transport_header(skb, hoffset); skb_set_network_header(skb, nhoffset); send_rc = nes_nic_send(skb, netdev); - if (send_rc != NETDEV_TX_OK) { - spin_unlock_irqrestore(&nesnic->sq_lock, flags); + if (send_rc != NETDEV_TX_OK) return NETDEV_TX_OK; - } } } else { send_rc = nes_nic_send(skb, netdev); - if (send_rc != NETDEV_TX_OK) { - spin_unlock_irqrestore(&nesnic->sq_lock, flags); + if (send_rc != NETDEV_TX_OK) return NETDEV_TX_OK; - } } barrier(); @@ -676,7 +661,6 @@ tso_sq_no_longer_full: (wqe_count << 24) | (1 << 23) | nesvnic->nic.qp_id); netdev->trans_start = jiffies; - spin_unlock_irqrestore(&nesnic->sq_lock, flags); return NETDEV_TX_OK; } @@ -1012,7 +996,6 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { "Pause Frames Received", "Internal Routing Errors", "SQ SW Dropped SKBs", - "SQ Locked", "SQ Full", "Segmented TSO Requests", "Rx Symbol Errors", @@ -1129,16 +1112,17 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, struct nes_device *nesdev = nesvnic->nesdev; u32 nic_count; u32 u32temp; + u32 index = 0; target_ethtool_stats->n_stats = NES_ETHTOOL_STAT_COUNT; - target_stat_values[0] = nesvnic->nesdev->link_status_interrupts; - target_stat_values[1] = nesvnic->linearized_skbs; - target_stat_values[2] = nesvnic->tso_requests; + target_stat_values[index] = nesvnic->nesdev->link_status_interrupts; + target_stat_values[++index] = nesvnic->linearized_skbs; + target_stat_values[++index] = nesvnic->tso_requests; u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200)); nesvnic->nesdev->mac_pause_frames_sent += u32temp; - target_stat_values[3] = nesvnic->nesdev->mac_pause_frames_sent; + target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_sent; u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_RX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200)); @@ -1209,60 +1193,59 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, nesvnic->endnode_ipv4_tcp_retransmits += u32temp; } - target_stat_values[4] = nesvnic->nesdev->mac_pause_frames_received; - target_stat_values[5] = nesdev->nesadapter->nic_rx_eth_route_err; - target_stat_values[6] = nesvnic->tx_sw_dropped; - target_stat_values[7] = nesvnic->sq_locked; - target_stat_values[8] = nesvnic->sq_full; - target_stat_values[9] = nesvnic->segmented_tso_requests; - target_stat_values[10] = nesvnic->nesdev->mac_rx_symbol_err_frames; - target_stat_values[11] = nesvnic->nesdev->mac_rx_jabber_frames; - target_stat_values[12] = nesvnic->nesdev->mac_rx_oversized_frames; - target_stat_values[13] = nesvnic->nesdev->mac_rx_short_frames; - target_stat_values[14] = nesvnic->endnode_nstat_rx_discard; - target_stat_values[15] = nesvnic->endnode_nstat_rx_octets; - target_stat_values[16] = nesvnic->endnode_nstat_rx_frames; - target_stat_values[17] = nesvnic->endnode_nstat_tx_octets; - target_stat_values[18] = nesvnic->endnode_nstat_tx_frames; - target_stat_values[19] = mh_detected; - target_stat_values[20] = mh_pauses_sent; - target_stat_values[21] = nesvnic->endnode_ipv4_tcp_retransmits; - target_stat_values[22] = atomic_read(&cm_connects); - target_stat_values[23] = atomic_read(&cm_accepts); - target_stat_values[24] = atomic_read(&cm_disconnects); - target_stat_values[25] = atomic_read(&cm_connecteds); - target_stat_values[26] = atomic_read(&cm_connect_reqs); - target_stat_values[27] = atomic_read(&cm_rejects); - target_stat_values[28] = atomic_read(&mod_qp_timouts); - target_stat_values[29] = atomic_read(&qps_created); - target_stat_values[30] = atomic_read(&sw_qps_destroyed); - target_stat_values[31] = atomic_read(&qps_destroyed); - target_stat_values[32] = atomic_read(&cm_closes); - target_stat_values[33] = cm_packets_sent; - target_stat_values[34] = cm_packets_bounced; - target_stat_values[35] = cm_packets_created; - target_stat_values[36] = cm_packets_received; - target_stat_values[37] = cm_packets_dropped; - target_stat_values[38] = cm_packets_retrans; - target_stat_values[39] = cm_listens_created; - target_stat_values[40] = cm_listens_destroyed; - target_stat_values[41] = cm_backlog_drops; - target_stat_values[42] = atomic_read(&cm_loopbacks); - target_stat_values[43] = atomic_read(&cm_nodes_created); - target_stat_values[44] = atomic_read(&cm_nodes_destroyed); - target_stat_values[45] = atomic_read(&cm_accel_dropped_pkts); - target_stat_values[46] = atomic_read(&cm_resets_recvd); - target_stat_values[47] = int_mod_timer_init; - target_stat_values[48] = int_mod_cq_depth_1; - target_stat_values[49] = int_mod_cq_depth_4; - target_stat_values[50] = int_mod_cq_depth_16; - target_stat_values[51] = int_mod_cq_depth_24; - target_stat_values[52] = int_mod_cq_depth_32; - target_stat_values[53] = int_mod_cq_depth_128; - target_stat_values[54] = int_mod_cq_depth_256; - target_stat_values[55] = nesvnic->lro_mgr.stats.aggregated; - target_stat_values[56] = nesvnic->lro_mgr.stats.flushed; - target_stat_values[57] = nesvnic->lro_mgr.stats.no_desc; + target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_received; + target_stat_values[++index] = nesdev->nesadapter->nic_rx_eth_route_err; + target_stat_values[++index] = nesvnic->tx_sw_dropped; + target_stat_values[++index] = nesvnic->sq_full; + target_stat_values[++index] = nesvnic->segmented_tso_requests; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_symbol_err_frames; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_jabber_frames; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_oversized_frames; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_short_frames; + target_stat_values[++index] = nesvnic->endnode_nstat_rx_discard; + target_stat_values[++index] = nesvnic->endnode_nstat_rx_octets; + target_stat_values[++index] = nesvnic->endnode_nstat_rx_frames; + target_stat_values[++index] = nesvnic->endnode_nstat_tx_octets; + target_stat_values[++index] = nesvnic->endnode_nstat_tx_frames; + target_stat_values[++index] = mh_detected; + target_stat_values[++index] = mh_pauses_sent; + target_stat_values[++index] = nesvnic->endnode_ipv4_tcp_retransmits; + target_stat_values[++index] = atomic_read(&cm_connects); + target_stat_values[++index] = atomic_read(&cm_accepts); + target_stat_values[++index] = atomic_read(&cm_disconnects); + target_stat_values[++index] = atomic_read(&cm_connecteds); + target_stat_values[++index] = atomic_read(&cm_connect_reqs); + target_stat_values[++index] = atomic_read(&cm_rejects); + target_stat_values[++index] = atomic_read(&mod_qp_timouts); + target_stat_values[++index] = atomic_read(&qps_created); + target_stat_values[++index] = atomic_read(&sw_qps_destroyed); + target_stat_values[++index] = atomic_read(&qps_destroyed); + target_stat_values[++index] = atomic_read(&cm_closes); + target_stat_values[++index] = cm_packets_sent; + target_stat_values[++index] = cm_packets_bounced; + target_stat_values[++index] = cm_packets_created; + target_stat_values[++index] = cm_packets_received; + target_stat_values[++index] = cm_packets_dropped; + target_stat_values[++index] = cm_packets_retrans; + target_stat_values[++index] = cm_listens_created; + target_stat_values[++index] = cm_listens_destroyed; + target_stat_values[++index] = cm_backlog_drops; + target_stat_values[++index] = atomic_read(&cm_loopbacks); + target_stat_values[++index] = atomic_read(&cm_nodes_created); + target_stat_values[++index] = atomic_read(&cm_nodes_destroyed); + target_stat_values[++index] = atomic_read(&cm_accel_dropped_pkts); + target_stat_values[++index] = atomic_read(&cm_resets_recvd); + target_stat_values[++index] = int_mod_timer_init; + target_stat_values[++index] = int_mod_cq_depth_1; + target_stat_values[++index] = int_mod_cq_depth_4; + target_stat_values[++index] = int_mod_cq_depth_16; + target_stat_values[++index] = int_mod_cq_depth_24; + target_stat_values[++index] = int_mod_cq_depth_32; + target_stat_values[++index] = int_mod_cq_depth_128; + target_stat_values[++index] = int_mod_cq_depth_256; + target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated; + target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed; + target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc; } @@ -1616,7 +1599,6 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n"); netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; netdev->vlan_rx_register = nes_netdev_vlan_rx_register; - netdev->features |= NETIF_F_LLTX; /* Fill in the port structure */ nesvnic->netdev = netdev; From 0145f341a951b998d6d0fa38992a42d2a90b5bab Mon Sep 17 00:00:00 2001 From: Don Wood Date: Fri, 6 Mar 2009 15:15:00 -0800 Subject: [PATCH 25/30] RDMA/nes: Improve use of PBLs Two level 256 byte PBLs was not implemented so the driver could report out of memory when in fact there were PBLs still available. This solution prefers to use 4KB PBLs over two level 256B PBLs until the number of 4KB PBLs falls below a threshold. At this point the 4KB PBL structure is converted to use 256B PBLs which prevents the driver from running out of 4KB PBLs too quickly. Signed-off-by: Don Wood Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 191 ++++++++++++++++++-------- 1 file changed, 134 insertions(+), 57 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index b42b17ac7712..96d953540a2c 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1887,21 +1887,75 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) return ret; } +/** + * root_256 + */ +static u32 root_256(struct nes_device *nesdev, + struct nes_root_vpbl *root_vpbl, + struct nes_root_vpbl *new_root, + u16 pbl_count_4k, + u16 pbl_count_256) +{ + u64 leaf_pbl; + int i, j, k; + + if (pbl_count_4k == 1) { + new_root->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, + 512, &new_root->pbl_pbase); + + if (new_root->pbl_vbase == NULL) + return 0; + + leaf_pbl = (u64)root_vpbl->pbl_pbase; + for (i = 0; i < 16; i++) { + new_root->pbl_vbase[i].pa_low = + cpu_to_le32((u32)leaf_pbl); + new_root->pbl_vbase[i].pa_high = + cpu_to_le32((u32)((((u64)leaf_pbl) >> 32))); + leaf_pbl += 256; + } + } else { + for (i = 3; i >= 0; i--) { + j = i * 16; + root_vpbl->pbl_vbase[j] = root_vpbl->pbl_vbase[i]; + leaf_pbl = le32_to_cpu(root_vpbl->pbl_vbase[j].pa_low) + + (((u64)le32_to_cpu(root_vpbl->pbl_vbase[j].pa_high)) + << 32); + for (k = 1; k < 16; k++) { + leaf_pbl += 256; + root_vpbl->pbl_vbase[j + k].pa_low = + cpu_to_le32((u32)leaf_pbl); + root_vpbl->pbl_vbase[j + k].pa_high = + cpu_to_le32((u32)((((u64)leaf_pbl) >> 32))); + } + } + } + + return 1; +} + /** * nes_reg_mr */ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, u32 stag, u64 region_length, struct nes_root_vpbl *root_vpbl, - dma_addr_t single_buffer, u16 pbl_count, u16 residual_page_count, - int acc, u64 *iova_start) + dma_addr_t single_buffer, u16 pbl_count_4k, + u16 residual_page_count_4k, int acc, u64 *iova_start, + u16 *actual_pbl_cnt, u8 *used_4k_pbls) { struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; unsigned long flags; int ret; struct nes_adapter *nesadapter = nesdev->nesadapter; - /* int count; */ + uint pg_cnt = 0; + u16 pbl_count_256; + u16 pbl_count = 0; + u8 use_256_pbls = 0; + u8 use_4k_pbls = 0; + u16 use_two_level = (pbl_count_4k > 1) ? 1 : 0; + struct nes_root_vpbl new_root = {0, 0, 0}; u32 opcode = 0; u16 major_code; @@ -1914,41 +1968,70 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, cqp_request->waiting = 1; cqp_wqe = &cqp_request->cqp_wqe; - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - /* track PBL resources */ - if (pbl_count != 0) { - if (pbl_count > 1) { - /* Two level PBL */ - if ((pbl_count+1) > nesadapter->free_4kpbl) { - nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n"); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - nes_free_cqp_request(nesdev, cqp_request); - return -ENOMEM; - } else { - nesadapter->free_4kpbl -= pbl_count+1; - } - } else if (residual_page_count > 32) { - if (pbl_count > nesadapter->free_4kpbl) { - nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n"); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - nes_free_cqp_request(nesdev, cqp_request); - return -ENOMEM; - } else { - nesadapter->free_4kpbl -= pbl_count; + if (pbl_count_4k) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + + pg_cnt = ((pbl_count_4k - 1) * 512) + residual_page_count_4k; + pbl_count_256 = (pg_cnt + 31) / 32; + if (pg_cnt <= 32) { + if (pbl_count_256 <= nesadapter->free_256pbl) + use_256_pbls = 1; + else if (pbl_count_4k <= nesadapter->free_4kpbl) + use_4k_pbls = 1; + } else if (pg_cnt <= 2048) { + if (((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) && + (nesadapter->free_4kpbl > (nesadapter->max_4kpbl >> 1))) { + use_4k_pbls = 1; + } else if ((pbl_count_256 + 1) <= nesadapter->free_256pbl) { + use_256_pbls = 1; + use_two_level = 1; + } else if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) { + use_4k_pbls = 1; } } else { - if (pbl_count > nesadapter->free_256pbl) { - nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n"); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - nes_free_cqp_request(nesdev, cqp_request); - return -ENOMEM; - } else { - nesadapter->free_256pbl -= pbl_count; - } + if ((pbl_count_4k + 1) <= nesadapter->free_4kpbl) + use_4k_pbls = 1; } + + if (use_256_pbls) { + pbl_count = pbl_count_256; + nesadapter->free_256pbl -= pbl_count + use_two_level; + } else if (use_4k_pbls) { + pbl_count = pbl_count_4k; + nesadapter->free_4kpbl -= pbl_count + use_two_level; + } else { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_debug(NES_DBG_MR, "Out of Pbls\n"); + nes_free_cqp_request(nesdev, cqp_request); + return -ENOMEM; + } + + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); } - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + if (use_256_pbls && use_two_level) { + if (root_256(nesdev, root_vpbl, &new_root, pbl_count_4k, pbl_count_256) == 1) { + if (new_root.pbl_pbase != 0) + root_vpbl = &new_root; + } else { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl += pbl_count_256 + use_two_level; + use_256_pbls = 0; + + if (pbl_count_4k == 1) + use_two_level = 0; + pbl_count = pbl_count_4k; + + if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) { + nesadapter->free_4kpbl -= pbl_count + use_two_level; + use_4k_pbls = 1; + } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + + if (use_4k_pbls == 0) + return -ENOMEM; + } + } opcode = NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR; @@ -1977,10 +2060,9 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, } else { set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, root_vpbl->pbl_pbase); set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, pbl_count); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, - (((pbl_count - 1) * 4096) + (residual_page_count*8))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (pg_cnt * 8)); - if ((pbl_count > 1) || (residual_page_count > 32)) + if (use_4k_pbls) cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE); } barrier(); @@ -1996,23 +2078,26 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, stag, ret, cqp_request->major_code, cqp_request->minor_code); major_code = cqp_request->major_code; nes_put_cqp_request(nesdev, cqp_request); + if ((!ret || major_code) && pbl_count != 0) { spin_lock_irqsave(&nesadapter->pbl_lock, flags); - if (pbl_count > 1) - nesadapter->free_4kpbl += pbl_count+1; - else if (residual_page_count > 32) - nesadapter->free_4kpbl += pbl_count; - else - nesadapter->free_256pbl += pbl_count; + if (use_256_pbls) + nesadapter->free_256pbl += pbl_count + use_two_level; + else if (use_4k_pbls) + nesadapter->free_4kpbl += pbl_count + use_two_level; spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); } + if (new_root.pbl_pbase) + pci_free_consistent(nesdev->pcidev, 512, new_root.pbl_vbase, + new_root.pbl_pbase); + if (!ret) return -ETIME; else if (major_code) return -EIO; - else - return 0; + *actual_pbl_cnt = pbl_count + use_two_level; + *used_4k_pbls = use_4k_pbls; return 0; } @@ -2177,18 +2262,14 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, pbl_count = root_pbl_index; } ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl, - buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start); + buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start, + &nesmr->pbls_used, &nesmr->pbl_4k); if (ret == 0) { nesmr->ibmr.rkey = stag; nesmr->ibmr.lkey = stag; nesmr->mode = IWNES_MEMREG_TYPE_MEM; ibmr = &nesmr->ibmr; - nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0; - nesmr->pbls_used = pbl_count; - if (pbl_count > 1) { - nesmr->pbls_used++; - } } else { kfree(nesmr); ibmr = ERR_PTR(-ENOMEM); @@ -2466,8 +2547,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, stag, (unsigned int)iova_start, (unsigned int)region_length, stag_index, (unsigned long long)region->length, pbl_count); - ret = nes_reg_mr( nesdev, nespd, stag, region->length, &root_vpbl, - first_dma_addr, pbl_count, (u16)cur_pbl_index, acc, &iova_start); + ret = nes_reg_mr(nesdev, nespd, stag, region->length, &root_vpbl, + first_dma_addr, pbl_count, (u16)cur_pbl_index, acc, + &iova_start, &nesmr->pbls_used, &nesmr->pbl_4k); nes_debug(NES_DBG_MR, "ret=%d\n", ret); @@ -2476,11 +2558,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nesmr->ibmr.lkey = stag; nesmr->mode = IWNES_MEMREG_TYPE_MEM; ibmr = &nesmr->ibmr; - nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0; - nesmr->pbls_used = pbl_count; - if (pbl_count > 1) { - nesmr->pbls_used++; - } } else { ib_umem_release(region); kfree(nesmr); From 9d5ab13325d0bb855cf856946c140a68ceb20e32 Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Fri, 6 Mar 2009 15:15:01 -0800 Subject: [PATCH 26/30] RDMA/nes: Handle MPA Reject message properly While doing testing, there are failures as MPA Reject call is not handled. To handle MPA Reject call, following changes are done: *Handle inbound/outbound MPA Reject response message. When nes_reject() is called for pending MPA request reply, send the MPA Reject message to its peer (active side)cm_node. The peer cm_node (active side) will indicate Reject message event for the pending Connect Request. *Handle MPA Reject response message for loopback connections and listener. When MPA Request is rejected, check if it is a loopback connection and if it is then it will send Reject message event to its peer loopback node. Also when destroying listener, check if the cm_nodes for that listener are loopback or not. *Add gracefull connection close with the MPA Reject response message. Send gracefull close (FIN, FIN ACK..) to terminate the cm_nodes. *Some code re-org while making the above changes. Removed recv_list and recv_list_lock from the cm_node structure as there can be only one receive close entry on the timer. Also implemented handle_recv_entry() as receive close entry is processed from both nes_rem_ref_cm_node() as well as nes_cm_timer_tick(). Signed-off-by: Faisal Latif Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 588 +++++++++++++++++++---------- drivers/infiniband/hw/nes/nes_cm.h | 10 +- 2 files changed, 403 insertions(+), 195 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index bd918df3d22c..5327f2bec6bf 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -103,6 +103,7 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt); static void nes_disconnect_worker(struct work_struct *work); static int send_mpa_request(struct nes_cm_node *, struct sk_buff *); +static int send_mpa_reject(struct nes_cm_node *); static int send_syn(struct nes_cm_node *, u32, struct sk_buff *); static int send_reset(struct nes_cm_node *, struct sk_buff *); static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb); @@ -113,8 +114,7 @@ static void process_packet(struct nes_cm_node *, struct sk_buff *, static void active_open_err(struct nes_cm_node *, struct sk_buff *, int); static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int); static void cleanup_retrans_entry(struct nes_cm_node *); -static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *, - enum nes_cm_event_type); +static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *); static void free_retrans_entry(struct nes_cm_node *cm_node); static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, struct sk_buff *skb, int optionsize, int passive); @@ -124,6 +124,8 @@ static void cm_event_connected(struct nes_cm_event *); static void cm_event_connect_error(struct nes_cm_event *); static void cm_event_reset(struct nes_cm_event *); static void cm_event_mpa_req(struct nes_cm_event *); +static void cm_event_mpa_reject(struct nes_cm_event *); +static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node); static void print_core(struct nes_cm_core *core); @@ -196,7 +198,6 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, */ static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) { - int ret; if (!skb) { nes_debug(NES_DBG_CM, "skb set to NULL\n"); return -1; @@ -206,11 +207,27 @@ static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame, cm_node->mpa_frame_size, SET_ACK); - ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); - if (ret < 0) - return ret; + return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); +} - return 0; + + +static int send_mpa_reject(struct nes_cm_node *cm_node) +{ + struct sk_buff *skb = NULL; + + skb = dev_alloc_skb(MAX_CM_BUFFER); + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -ENOMEM; + } + + /* send an MPA reject frame */ + form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame, + cm_node->mpa_frame_size, SET_ACK | SET_FIN); + + cm_node->state = NES_CM_STATE_FIN_WAIT1; + return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); } @@ -218,14 +235,17 @@ static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) * recv_mpa - process a received TCP pkt, we are expecting an * IETF MPA frame */ -static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len) +static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, + u32 len) { struct ietf_mpa_frame *mpa_frame; + *type = NES_MPA_REQUEST_ACCEPT; + /* assume req frame is in tcp data payload */ if (len < sizeof(struct ietf_mpa_frame)) { nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len); - return -1; + return -EINVAL; } mpa_frame = (struct ietf_mpa_frame *)buffer; @@ -234,14 +254,25 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len) if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) { nes_debug(NES_DBG_CM, "The received ietf buffer was not right" " complete (%x + %x != %x)\n", - cm_node->mpa_frame_size, (u32)sizeof(struct ietf_mpa_frame), len); - return -1; + cm_node->mpa_frame_size, + (u32)sizeof(struct ietf_mpa_frame), len); + return -EINVAL; + } + /* make sure it does not exceed the max size */ + if (len > MAX_CM_BUFFER) { + nes_debug(NES_DBG_CM, "The received ietf buffer was too large" + " (%x + %x != %x)\n", + cm_node->mpa_frame_size, + (u32)sizeof(struct ietf_mpa_frame), len); + return -EINVAL; } /* copy entire MPA frame to our cm_node's frame */ memcpy(cm_node->mpa_frame_buf, buffer + sizeof(struct ietf_mpa_frame), cm_node->mpa_frame_size); + if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT) + *type = NES_MPA_REQUEST_REJECT; return 0; } @@ -380,7 +411,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) - return -1; + return -ENOMEM; /* new_send->timetosend = currenttime */ new_send->retrycount = NES_DEFAULT_RETRYS; @@ -394,9 +425,11 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, if (type == NES_TIMER_TYPE_CLOSE) { new_send->timetosend += (HZ/10); - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_add_tail(&new_send->list, &cm_node->recv_list); - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + if (cm_node->recv_entry) { + WARN_ON(1); + return -EINVAL; + } + cm_node->recv_entry = new_send; } if (type == NES_TIMER_TYPE_SEND) { @@ -435,24 +468,78 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, return ret; } +static void nes_retrans_expired(struct nes_cm_node *cm_node) +{ + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_CLOSING: + rem_ref_cm_node(cm_node->cm_core, cm_node); + break; + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_MPAREJ_RCVD: + send_reset(cm_node, NULL); + break; + default: + create_event(cm_node, NES_CM_EVENT_ABORTED); + } +} + +static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node) +{ + struct nes_timer_entry *recv_entry = cm_node->recv_entry; + struct iw_cm_id *cm_id = cm_node->cm_id; + struct nes_qp *nesqp; + unsigned long qplockflags; + + if (!recv_entry) + return; + nesqp = (struct nes_qp *)recv_entry->skb; + if (nesqp) { + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with something " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with nothing " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + } + } else if (rem_node) { + /* TIME_WAIT state */ + rem_ref_cm_node(cm_node->cm_core, cm_node); + } + if (cm_node->cm_id) + cm_id->rem_ref(cm_id); + kfree(recv_entry); + cm_node->recv_entry = NULL; +} /** * nes_cm_timer_tick */ static void nes_cm_timer_tick(unsigned long pass) { - unsigned long flags, qplockflags; + unsigned long flags; unsigned long nexttimeout = jiffies + NES_LONG_TIME; - struct iw_cm_id *cm_id; struct nes_cm_node *cm_node; struct nes_timer_entry *send_entry, *recv_entry; - struct list_head *list_core, *list_core_temp; - struct list_head *list_node, *list_node_temp; + struct list_head *list_core_temp; + struct list_head *list_node; struct nes_cm_core *cm_core = g_cm_core; - struct nes_qp *nesqp; u32 settimer = 0; int ret = NETDEV_TX_OK; - enum nes_cm_node_state last_state; struct list_head timer_list; INIT_LIST_HEAD(&timer_list); @@ -461,7 +548,7 @@ static void nes_cm_timer_tick(unsigned long pass) list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { cm_node = container_of(list_node, struct nes_cm_node, list); - if (!list_empty(&cm_node->recv_list) || (cm_node->send_entry)) { + if ((cm_node->recv_entry) || (cm_node->send_entry)) { add_ref_cm_node(cm_node); list_add(&cm_node->timer_entry, &timer_list); } @@ -471,54 +558,18 @@ static void nes_cm_timer_tick(unsigned long pass) list_for_each_safe(list_node, list_core_temp, &timer_list) { cm_node = container_of(list_node, struct nes_cm_node, timer_entry); - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_for_each_safe(list_core, list_node_temp, - &cm_node->recv_list) { - recv_entry = container_of(list_core, - struct nes_timer_entry, list); - if (!recv_entry) - break; + recv_entry = cm_node->recv_entry; + + if (recv_entry) { if (time_after(recv_entry->timetosend, jiffies)) { if (nexttimeout > recv_entry->timetosend || - !settimer) { + !settimer) { nexttimeout = recv_entry->timetosend; settimer = 1; } - continue; - } - list_del(&recv_entry->list); - cm_id = cm_node->cm_id; - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - nesqp = (struct nes_qp *)recv_entry->skb; - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " - "refcount = %d: HIT A " - "NES_TIMER_TYPE_CLOSE with something " - "to do!!!\n", nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - nesqp->ibqp_state = IB_QPS_ERR; - spin_unlock_irqrestore(&nesqp->lock, - qplockflags); - nes_cm_disconn(nesqp); - } else { - spin_unlock_irqrestore(&nesqp->lock, - qplockflags); - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " - "refcount = %d: HIT A " - "NES_TIMER_TYPE_CLOSE with nothing " - "to do!!!\n", nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - } - if (cm_id) - cm_id->rem_ref(cm_id); - - kfree(recv_entry); - spin_lock_irqsave(&cm_node->recv_list_lock, flags); + } else + handle_recv_entry(cm_node, 1); } - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); spin_lock_irqsave(&cm_node->retrans_list_lock, flags); do { @@ -533,12 +584,11 @@ static void nes_cm_timer_tick(unsigned long pass) nexttimeout = send_entry->timetosend; settimer = 1; - break; } } else { free_retrans_entry(cm_node); - break; } + break; } if ((cm_node->state == NES_CM_STATE_TSA) || @@ -550,16 +600,12 @@ static void nes_cm_timer_tick(unsigned long pass) if (!send_entry->retranscount || !send_entry->retrycount) { cm_packets_dropped++; - last_state = cm_node->state; - cm_node->state = NES_CM_STATE_CLOSED; free_retrans_entry(cm_node); + spin_unlock_irqrestore( &cm_node->retrans_list_lock, flags); - if (last_state == NES_CM_STATE_SYN_RCVD) - rem_ref_cm_node(cm_core, cm_node); - else - create_event(cm_node, - NES_CM_EVENT_ABORTED); + nes_retrans_expired(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; spin_lock_irqsave(&cm_node->retrans_list_lock, flags); break; @@ -714,7 +760,7 @@ static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb) skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); - return -1; + return -ENOMEM; } form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags); @@ -871,7 +917,8 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, struct nes_cm_listener *listener, int free_hanging_nodes) { - int ret = 1; + int ret = -EINVAL; + int err = 0; unsigned long flags; struct list_head *list_pos = NULL; struct list_head *list_temp = NULL; @@ -900,10 +947,60 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, list_for_each_safe(list_pos, list_temp, &reset_list) { cm_node = container_of(list_pos, struct nes_cm_node, - reset_entry); - cleanup_retrans_entry(cm_node); - send_reset(cm_node, NULL); - rem_ref_cm_node(cm_node->cm_core, cm_node); + reset_entry); + { + struct nes_cm_node *loopback = cm_node->loopbackpartner; + if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) { + rem_ref_cm_node(cm_node->cm_core, cm_node); + } else { + if (!loopback) { + cleanup_retrans_entry(cm_node); + err = send_reset(cm_node, NULL); + if (err) { + cm_node->state = + NES_CM_STATE_CLOSED; + WARN_ON(1); + } else { + cm_node->state = + NES_CM_STATE_CLOSED; + rem_ref_cm_node( + cm_node->cm_core, + cm_node); + } + } else { + struct nes_cm_event event; + + event.cm_node = loopback; + event.cm_info.rem_addr = + loopback->rem_addr; + event.cm_info.loc_addr = + loopback->loc_addr; + event.cm_info.rem_port = + loopback->rem_port; + event.cm_info.loc_port = + loopback->loc_port; + event.cm_info.cm_id = loopback->cm_id; + cm_event_connect_error(&event); + loopback->state = NES_CM_STATE_CLOSED; + + event.cm_node = cm_node; + event.cm_info.rem_addr = + cm_node->rem_addr; + event.cm_info.loc_addr = + cm_node->loc_addr; + event.cm_info.rem_port = + cm_node->rem_port; + event.cm_info.loc_port = + cm_node->loc_port; + event.cm_info.cm_id = cm_node->cm_id; + cm_event_reset(&event); + + rem_ref_cm_node(cm_node->cm_core, + cm_node); + + } + } + } } spin_lock_irqsave(&cm_core->listen_list_lock, flags); @@ -964,6 +1061,7 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, if (cm_node->accept_pend) { BUG_ON(!cm_node->listener); atomic_dec(&cm_node->listener->pend_accepts_cnt); + cm_node->accept_pend = 0; BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); } @@ -990,7 +1088,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip) memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = htonl(dst_ip); if (ip_route_output_key(&init_net, &rt, &fl)) { - printk("%s: ip_route_output_key failed for 0x%08X\n", + printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n", __func__, dst_ip); return rc; } @@ -1053,8 +1151,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->cm_id); spin_lock_init(&cm_node->retrans_list_lock); - INIT_LIST_HEAD(&cm_node->recv_list); - spin_lock_init(&cm_node->recv_list_lock); cm_node->loopbackpartner = NULL; atomic_set(&cm_node->ref_count, 1); @@ -1122,10 +1218,7 @@ static int add_ref_cm_node(struct nes_cm_node *cm_node) static int rem_ref_cm_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) { - unsigned long flags, qplockflags; - struct nes_timer_entry *recv_entry; - struct iw_cm_id *cm_id; - struct list_head *list_core, *list_node_temp; + unsigned long flags; struct nes_qp *nesqp; if (!cm_node) @@ -1146,38 +1239,9 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, atomic_dec(&cm_node->listener->pend_accepts_cnt); BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); } - BUG_ON(cm_node->send_entry); - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { - recv_entry = container_of(list_core, struct nes_timer_entry, - list); - list_del(&recv_entry->list); - cm_id = cm_node->cm_id; - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - nesqp = (struct nes_qp *)recv_entry->skb; - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A " - "NES_TIMER_TYPE_CLOSE with something to do!\n", - nesqp->hwqp.qp_id, cm_id); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - nesqp->ibqp_state = IB_QPS_ERR; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_cm_disconn(nesqp); - } else { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A " - "NES_TIMER_TYPE_CLOSE with nothing to do!\n", - nesqp->hwqp.qp_id, cm_id); - } - cm_id->rem_ref(cm_id); - - kfree(recv_entry); - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - } - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - + WARN_ON(cm_node->send_entry); + if (cm_node->recv_entry) + handle_recv_entry(cm_node, 0); if (cm_node->listener) { mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0); } else { @@ -1262,8 +1326,7 @@ static void drop_packet(struct sk_buff *skb) dev_kfree_skb_any(skb); } -static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct tcphdr *tcph) +static void handle_fin_pkt(struct nes_cm_node *cm_node) { nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. " "refcnt=%d\n", cm_node, cm_node->state, @@ -1275,23 +1338,30 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, case NES_CM_STATE_SYN_SENT: case NES_CM_STATE_ESTABLISHED: case NES_CM_STATE_MPAREQ_SENT: + case NES_CM_STATE_MPAREJ_RCVD: cm_node->state = NES_CM_STATE_LAST_ACK; - send_fin(cm_node, skb); + send_fin(cm_node, NULL); break; case NES_CM_STATE_FIN_WAIT1: cm_node->state = NES_CM_STATE_CLOSING; - send_ack(cm_node, skb); + send_ack(cm_node, NULL); + /* Wait for ACK as this is simultanous close.. + * After we receive ACK, do not send anything.. + * Just rm the node.. Done.. */ break; case NES_CM_STATE_FIN_WAIT2: cm_node->state = NES_CM_STATE_TIME_WAIT; - send_ack(cm_node, skb); + send_ack(cm_node, NULL); + schedule_nes_timer(cm_node, NULL, NES_TIMER_TYPE_CLOSE, 1, 0); + break; + case NES_CM_STATE_TIME_WAIT: cm_node->state = NES_CM_STATE_CLOSED; + rem_ref_cm_node(cm_node->cm_core, cm_node); break; case NES_CM_STATE_TSA: default: nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n", cm_node, cm_node->state); - drop_packet(skb); break; } } @@ -1337,23 +1407,35 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, cleanup_retrans_entry(cm_node); drop_packet(skb); break; + case NES_CM_STATE_TIME_WAIT: + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + rem_ref_cm_node(cm_node->cm_core, cm_node); + drop_packet(skb); + break; + case NES_CM_STATE_FIN_WAIT1: + cleanup_retrans_entry(cm_node); + nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__); default: drop_packet(skb); break; } } -static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb, - enum nes_cm_event_type type) + +static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb) { - int ret; + int ret = 0; int datasize = skb->len; u8 *dataloc = skb->data; - ret = parse_mpa(cm_node, dataloc, datasize); - if (ret < 0) { + + enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN; + u32 res_type; + ret = parse_mpa(cm_node, dataloc, &res_type, datasize); + if (ret) { nes_debug(NES_DBG_CM, "didn't like MPA Request\n"); - if (type == NES_CM_EVENT_CONNECTED) { + if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) { nes_debug(NES_DBG_CM, "%s[%u] create abort for " "cm_node=%p listener=%p state=%d\n", __func__, __LINE__, cm_node, cm_node->listener, @@ -1362,18 +1444,38 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb, } else { passive_open_err(cm_node, skb, 1); } - } else { - cleanup_retrans_entry(cm_node); - dev_kfree_skb_any(skb); - if (type == NES_CM_EVENT_CONNECTED) - cm_node->state = NES_CM_STATE_TSA; - else - atomic_set(&cm_node->passive_state, - NES_PASSIVE_STATE_INDICATED); - create_event(cm_node, type); - + return; } - return ; + + switch (cm_node->state) { + case NES_CM_STATE_ESTABLISHED: + if (res_type == NES_MPA_REQUEST_REJECT) { + /*BIG problem as we are receiving the MPA.. So should + * not be REJECT.. This is Passive Open.. We can + * only receive it Reject for Active Open...*/ + WARN_ON(1); + } + cm_node->state = NES_CM_STATE_MPAREQ_RCVD; + type = NES_CM_EVENT_MPA_REQ; + atomic_set(&cm_node->passive_state, + NES_PASSIVE_STATE_INDICATED); + break; + case NES_CM_STATE_MPAREQ_SENT: + if (res_type == NES_MPA_REQUEST_REJECT) { + type = NES_CM_EVENT_MPA_REJECT; + cm_node->state = NES_CM_STATE_MPAREJ_RCVD; + } else { + type = NES_CM_EVENT_CONNECTED; + cm_node->state = NES_CM_STATE_TSA; + } + + break; + default: + WARN_ON(1); + break; + } + dev_kfree_skb_any(skb); + create_event(cm_node, type); } static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb) @@ -1461,8 +1563,6 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, break; case NES_CM_STATE_LISTENING: /* Passive OPEN */ - cm_node->accept_pend = 1; - atomic_inc(&cm_node->listener->pend_accepts_cnt); if (atomic_read(&cm_node->listener->pend_accepts_cnt) > cm_node->listener->backlog) { nes_debug(NES_DBG_CM, "drop syn due to backlog " @@ -1480,6 +1580,9 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, } cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; BUG_ON(cm_node->send_entry); + cm_node->accept_pend = 1; + atomic_inc(&cm_node->listener->pend_accepts_cnt); + cm_node->state = NES_CM_STATE_SYN_RCVD; send_syn(cm_node, 1, skb); break; @@ -1514,6 +1617,7 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, inc_sequence = ntohl(tcph->seq); switch (cm_node->state) { case NES_CM_STATE_SYN_SENT: + cleanup_retrans_entry(cm_node); /* active open */ if (check_syn(cm_node, tcph, skb)) return; @@ -1563,10 +1667,7 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, u32 rem_seq; int ret; int optionsize; - u32 temp_seq = cm_node->tcp_cntxt.loc_seq_num; - optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); - cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); if (check_seq(cm_node, tcph, skb)) return; @@ -1576,7 +1677,7 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, rem_seq = ntohl(tcph->seq); rem_seq_ack = ntohl(tcph->ack_seq); datasize = skb->len; - + cleanup_retrans_entry(cm_node); switch (cm_node->state) { case NES_CM_STATE_SYN_RCVD: /* Passive OPEN */ @@ -1584,7 +1685,6 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, if (ret) break; cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); - cm_node->tcp_cntxt.loc_seq_num = temp_seq; if (cm_node->tcp_cntxt.rem_ack_num != cm_node->tcp_cntxt.loc_seq_num) { nes_debug(NES_DBG_CM, "rem_ack_num != loc_seq_num\n"); @@ -1593,31 +1693,30 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, return; } cm_node->state = NES_CM_STATE_ESTABLISHED; + cleanup_retrans_entry(cm_node); if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; - cm_node->state = NES_CM_STATE_MPAREQ_RCVD; - handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_MPA_REQ); - } else { /* rcvd ACK only */ + handle_rcv_mpa(cm_node, skb); + } else { /* rcvd ACK only */ dev_kfree_skb_any(skb); cleanup_retrans_entry(cm_node); } break; case NES_CM_STATE_ESTABLISHED: /* Passive OPEN */ - /* We expect mpa frame to be received only */ + cleanup_retrans_entry(cm_node); if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; - cm_node->state = NES_CM_STATE_MPAREQ_RCVD; - handle_rcv_mpa(cm_node, skb, - NES_CM_EVENT_MPA_REQ); + handle_rcv_mpa(cm_node, skb); } else drop_packet(skb); break; case NES_CM_STATE_MPAREQ_SENT: + cleanup_retrans_entry(cm_node); cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; - handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_CONNECTED); + handle_rcv_mpa(cm_node, skb); } else { /* Could be just an ack pkt.. */ cleanup_retrans_entry(cm_node); dev_kfree_skb_any(skb); @@ -1628,13 +1727,24 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, cleanup_retrans_entry(cm_node); send_reset(cm_node, skb); break; + case NES_CM_STATE_LAST_ACK: + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + cm_node->cm_id->rem_ref(cm_node->cm_id); + case NES_CM_STATE_CLOSING: + cleanup_retrans_entry(cm_node); + rem_ref_cm_node(cm_node->cm_core, cm_node); + drop_packet(skb); + break; case NES_CM_STATE_FIN_WAIT1: + cleanup_retrans_entry(cm_node); + drop_packet(skb); + cm_node->state = NES_CM_STATE_FIN_WAIT2; + break; case NES_CM_STATE_SYN_SENT: case NES_CM_STATE_FIN_WAIT2: case NES_CM_STATE_TSA: case NES_CM_STATE_MPAREQ_RCVD: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_CLOSING: case NES_CM_STATE_UNKNOWN: default: drop_packet(skb); @@ -1744,6 +1854,7 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, { enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN; struct tcphdr *tcph = tcp_hdr(skb); + u32 fin_set = 0; skb_pull(skb, ip_hdr(skb)->ihl << 2); nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d " @@ -1756,10 +1867,10 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, pkt_type = NES_PKT_TYPE_SYN; if (tcph->ack) pkt_type = NES_PKT_TYPE_SYNACK; - } else if (tcph->fin) - pkt_type = NES_PKT_TYPE_FIN; - else if (tcph->ack) + } else if (tcph->ack) pkt_type = NES_PKT_TYPE_ACK; + if (tcph->fin) + fin_set = 1; switch (pkt_type) { case NES_PKT_TYPE_SYN: @@ -1770,15 +1881,16 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, break; case NES_PKT_TYPE_ACK: handle_ack_pkt(cm_node, skb, tcph); + if (fin_set) + handle_fin_pkt(cm_node); break; case NES_PKT_TYPE_RST: handle_rst_pkt(cm_node, skb, tcph); break; - case NES_PKT_TYPE_FIN: - handle_fin_pkt(cm_node, skb, tcph); - break; default: drop_packet(skb); + if (fin_set) + handle_fin_pkt(cm_node); break; } } @@ -1921,7 +2033,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, loopbackremotenode->tcp_cntxt.rcv_wscale; loopbackremotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale; - + loopbackremotenode->state = NES_CM_STATE_MPAREQ_RCVD; create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ); } return cm_node; @@ -1976,7 +2088,11 @@ static int mini_cm_reject(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node) { int ret = 0; + int err = 0; int passive_state; + struct nes_cm_event event; + struct iw_cm_id *cm_id = cm_node->cm_id; + struct nes_cm_node *loopback = cm_node->loopbackpartner; nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n", __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); @@ -1985,12 +2101,38 @@ static int mini_cm_reject(struct nes_cm_core *cm_core, return ret; cleanup_retrans_entry(cm_node); - passive_state = atomic_add_return(1, &cm_node->passive_state); - cm_node->state = NES_CM_STATE_CLOSED; - if (passive_state == NES_SEND_RESET_EVENT) + if (!loopback) { + passive_state = atomic_add_return(1, &cm_node->passive_state); + if (passive_state == NES_SEND_RESET_EVENT) { + cm_node->state = NES_CM_STATE_CLOSED; + rem_ref_cm_node(cm_core, cm_node); + } else { + ret = send_mpa_reject(cm_node); + if (ret) { + cm_node->state = NES_CM_STATE_CLOSED; + err = send_reset(cm_node, NULL); + if (err) + WARN_ON(1); + } else + cm_id->add_ref(cm_id); + } + } else { + cm_node->cm_id = NULL; + event.cm_node = loopback; + event.cm_info.rem_addr = loopback->rem_addr; + event.cm_info.loc_addr = loopback->loc_addr; + event.cm_info.rem_port = loopback->rem_port; + event.cm_info.loc_port = loopback->loc_port; + event.cm_info.cm_id = loopback->cm_id; + cm_event_mpa_reject(&event); rem_ref_cm_node(cm_core, cm_node); - else - ret = send_reset(cm_node, NULL); + loopback->state = NES_CM_STATE_CLOSING; + + cm_id = loopback->cm_id; + rem_ref_cm_node(cm_core, loopback); + cm_id->rem_ref(cm_id); + } + return ret; } @@ -2027,6 +2169,7 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod case NES_CM_STATE_CLOSING: ret = -1; break; + case NES_CM_STATE_MPAREJ_RCVD: case NES_CM_STATE_LISTENING: case NES_CM_STATE_UNKNOWN: case NES_CM_STATE_INITED: @@ -2223,15 +2366,15 @@ static int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value) int ret = 0; switch (type) { - case NES_CM_SET_PKT_SIZE: - cm_core->mtu = value; - break; - case NES_CM_SET_FREE_PKT_Q_SIZE: - cm_core->free_tx_pkt_max = value; - break; - default: - /* unknown set option */ - ret = -EINVAL; + case NES_CM_SET_PKT_SIZE: + cm_core->mtu = value; + break; + case NES_CM_SET_FREE_PKT_Q_SIZE: + cm_core->free_tx_pkt_max = value; + break; + default: + /* unknown set option */ + ret = -EINVAL; } return ret; @@ -2621,9 +2764,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) NES_QPCONTEXT_ORDIRD_WRPDU); } else { nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU | - NES_QPCONTEXT_ORDIRD_ALSMM)); + cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU); } nesqp->skip_lsmm = 1; @@ -2745,23 +2886,35 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { struct nes_cm_node *cm_node; + struct nes_cm_node *loopback; + struct nes_cm_core *cm_core; atomic_inc(&cm_rejects); cm_node = (struct nes_cm_node *) cm_id->provider_data; + loopback = cm_node->loopbackpartner; cm_core = cm_node->cm_core; + cm_node->cm_id = cm_id; cm_node->mpa_frame_size = sizeof(struct ietf_mpa_frame) + pdata_len; - strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP); - memcpy(&cm_node->mpa_frame.priv_data, pdata, pdata_len); + if (cm_node->mpa_frame_size > MAX_CM_BUFFER) + return -EINVAL; + + strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP); + if (loopback) { + memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len); + loopback->mpa_frame.priv_data_len = pdata_len; + loopback->mpa_frame_size = sizeof(struct ietf_mpa_frame) + + pdata_len; + } else { + memcpy(&cm_node->mpa_frame.priv_data, pdata, pdata_len); + cm_node->mpa_frame.priv_data_len = cpu_to_be16(pdata_len); + } - cm_node->mpa_frame.priv_data_len = cpu_to_be16(pdata_len); cm_node->mpa_frame.rev = mpa_version; cm_node->mpa_frame.flags = IETF_MPA_FLAGS_CRC | IETF_MPA_FLAGS_REJECT; - cm_core->api->reject(cm_core, &cm_node->mpa_frame, cm_node); - - return 0; + return cm_core->api->reject(cm_core, &cm_node->mpa_frame, cm_node); } @@ -3270,13 +3423,56 @@ static void cm_event_mpa_req(struct nes_cm_event *event) cm_event.remote_addr.sin_family = AF_INET; cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port); cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr); - - cm_event.private_data = cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8) cm_node->mpa_frame_size; + cm_event.private_data = cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8) cm_node->mpa_frame_size; ret = cm_id->event_handler(cm_id, &cm_event); if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", + __func__, __LINE__, ret); + return; +} + + +static void cm_event_mpa_reject(struct nes_cm_event *event) +{ + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + struct nes_cm_node *cm_node; + int ret; + + cm_node = event->cm_node; + if (!cm_node) + return; + cm_id = cm_node->cm_id; + + atomic_inc(&cm_connect_reqs); + nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", + cm_node, cm_id, jiffies); + + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.status = -ECONNREFUSED; + cm_event.provider_data = cm_id->provider_data; + + cm_event.local_addr.sin_family = AF_INET; + cm_event.local_addr.sin_port = htons(event->cm_info.loc_port); + cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr); + + cm_event.remote_addr.sin_family = AF_INET; + cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port); + cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr); + + cm_event.private_data = cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8) cm_node->mpa_frame_size; + + nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, " + "remove_addr=%08x\n", + cm_event.local_addr.sin_addr.s_addr, + cm_event.remote_addr.sin_addr.s_addr); + + ret = cm_id->event_handler(cm_id, &cm_event); + if (ret) + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", __func__, __LINE__, ret); return; @@ -3341,6 +3537,14 @@ static void nes_cm_event_handler(struct work_struct *work) cm_event_connected(event); nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); break; + case NES_CM_EVENT_MPA_REJECT: + if ((!event->cm_node->cm_id) || + (event->cm_node->state == NES_CM_STATE_TSA)) + break; + cm_event_mpa_reject(event); + nes_debug(NES_DBG_CM, "CM Event: REJECT\n"); + break; + case NES_CM_EVENT_ABORTED: if ((!event->cm_node->cm_id) || (event->cm_node->state == NES_CM_STATE_TSA)) diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 4ab2bebf12ac..d5f778202eb7 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -39,6 +39,9 @@ #define NES_MANAGE_APBVT_DEL 0 #define NES_MANAGE_APBVT_ADD 1 +#define NES_MPA_REQUEST_ACCEPT 1 +#define NES_MPA_REQUEST_REJECT 2 + /* IETF MPA -- defines, enums, structs */ #define IEFT_MPA_KEY_REQ "MPA ID Req Frame" #define IEFT_MPA_KEY_REP "MPA ID Rep Frame" @@ -186,6 +189,7 @@ enum nes_cm_node_state { NES_CM_STATE_ACCEPTING, NES_CM_STATE_MPAREQ_SENT, NES_CM_STATE_MPAREQ_RCVD, + NES_CM_STATE_MPAREJ_RCVD, NES_CM_STATE_TSA, NES_CM_STATE_FIN_WAIT1, NES_CM_STATE_FIN_WAIT2, @@ -278,13 +282,12 @@ struct nes_cm_node { struct nes_timer_entry *send_entry; spinlock_t retrans_list_lock; - struct list_head recv_list; - spinlock_t recv_list_lock; + struct nes_timer_entry *recv_entry; int send_write0; union { struct ietf_mpa_frame mpa_frame; - u8 mpa_frame_buf[NES_CM_DEFAULT_MTU]; + u8 mpa_frame_buf[MAX_CM_BUFFER]; }; u16 mpa_frame_size; struct iw_cm_id *cm_id; @@ -326,6 +329,7 @@ enum nes_cm_event_type { NES_CM_EVENT_MPA_REQ, NES_CM_EVENT_MPA_CONNECT, NES_CM_EVENT_MPA_ACCEPT, + NES_CM_EVENT_MPA_REJECT, NES_CM_EVENT_MPA_ESTABLISHED, NES_CM_EVENT_CONNECTED, NES_CM_EVENT_CLOSED, From 793730bfb6711d6d14629e63845c25a3c14d205e Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Wed, 11 Mar 2009 15:47:18 -0700 Subject: [PATCH 27/30] mlx4_core: Don't perform SET_PORT command for Ethernet ports The same operation is performed when the Ethernet driver initializes the port. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- drivers/net/mlx4/port.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c index 0a057e5dc63b..7cce3342ef8c 100644 --- a/drivers/net/mlx4/port.c +++ b/drivers/net/mlx4/port.c @@ -298,20 +298,17 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) { struct mlx4_cmd_mailbox *mailbox; int err; - u8 is_eth = dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memset(mailbox->buf, 0, 256); - if (is_eth) { - ((u8 *) mailbox->buf)[3] = 6; - ((__be16 *) mailbox->buf)[4] = cpu_to_be16(1 << 15); - ((__be16 *) mailbox->buf)[6] = cpu_to_be16(1 << 15); - } else - ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port]; - err = mlx4_cmd(dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + return 0; + + ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port]; + err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B); mlx4_free_cmd_mailbox(dev, mailbox); From 27bf91d6a0d5a9c7224e8687754249bba67dd4cf Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Wed, 18 Mar 2009 19:45:11 -0700 Subject: [PATCH 28/30] mlx4_core: Add link type autosensing When a port's link is down (except to driver restart) and the port is configured for auto sensing, we try to sense port link type (Ethernet or InfiniBand) in order to determine how to initialize the port. If the port type needs to be changed, all mlx4 for the device interfaces are unregistered and then registered again with the new port types. Sensing is done with intervals of 3 seconds. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- drivers/net/mlx4/Makefile | 2 +- drivers/net/mlx4/catas.c | 16 +--- drivers/net/mlx4/eq.c | 16 ++-- drivers/net/mlx4/main.c | 106 +++++++++++++++++------- drivers/net/mlx4/mlx4.h | 27 ++++++- drivers/net/mlx4/sense.c | 156 ++++++++++++++++++++++++++++++++++++ include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/device.h | 6 +- 8 files changed, 278 insertions(+), 52 deletions(-) create mode 100644 drivers/net/mlx4/sense.c diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index a7a97bf998f8..21040a0d81fe 100644 --- a/drivers/net/mlx4/Makefile +++ b/drivers/net/mlx4/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_MLX4_CORE) += mlx4_core.o mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \ - mr.o pd.o port.o profile.o qp.o reset.o srq.o + mr.o pd.o port.o profile.o qp.o reset.o sense.o srq.o obj-$(CONFIG_MLX4_EN) += mlx4_en.o diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c index f094ee00c416..aa9674b7f19c 100644 --- a/drivers/net/mlx4/catas.c +++ b/drivers/net/mlx4/catas.c @@ -42,7 +42,6 @@ enum { static DEFINE_SPINLOCK(catas_lock); static LIST_HEAD(catas_list); -static struct workqueue_struct *catas_wq; static struct work_struct catas_work; static int internal_err_reset = 1; @@ -77,7 +76,7 @@ static void poll_catas(unsigned long dev_ptr) list_add(&priv->catas_err.list, &catas_list); spin_unlock(&catas_lock); - queue_work(catas_wq, &catas_work); + queue_work(mlx4_wq, &catas_work); } } else mod_timer(&priv->catas_err.timer, @@ -146,18 +145,7 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev) spin_unlock_irq(&catas_lock); } -int __init mlx4_catas_init(void) +void __init mlx4_catas_init(void) { INIT_WORK(&catas_work, catas_reset); - - catas_wq = create_singlethread_workqueue("mlx4_err"); - if (!catas_wq) - return -ENOMEM; - - return 0; -} - -void mlx4_catas_cleanup(void) -{ - destroy_workqueue(catas_wq); } diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index 2c19bff7cbab..8830dcb92ec8 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -163,6 +163,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) int cqn; int eqes_found = 0; int set_ci = 0; + int port; while ((eqe = next_eqe_sw(eq))) { /* @@ -203,11 +204,16 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; case MLX4_EVENT_TYPE_PORT_CHANGE: - mlx4_dispatch_event(dev, - eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ? - MLX4_DEV_EVENT_PORT_UP : - MLX4_DEV_EVENT_PORT_DOWN, - be32_to_cpu(eqe->event.port_change.port) >> 28); + port = be32_to_cpu(eqe->event.port_change.port) >> 28; + if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN, + port); + mlx4_priv(dev)->sense.do_sense_port[port] = 1; + } else { + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, + port); + mlx4_priv(dev)->sense.do_sense_port[port] = 0; + } break; case MLX4_EVENT_TYPE_CQ_ERROR: diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 8480f0346844..a66f5b2fd288 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -51,6 +51,8 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); +struct workqueue_struct *mlx4_wq; + #ifdef CONFIG_MLX4_DEBUG int mlx4_debug_level = 0; @@ -98,24 +100,23 @@ module_param_named(use_prio, use_prio, bool, 0444); MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports " "(0/1, default 0)"); -static int mlx4_check_port_params(struct mlx4_dev *dev, - enum mlx4_port_type *port_type) +int mlx4_check_port_params(struct mlx4_dev *dev, + enum mlx4_port_type *port_type) { int i; for (i = 0; i < dev->caps.num_ports - 1; i++) { - if (port_type[i] != port_type[i+1] && - !(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { - mlx4_err(dev, "Only same port types supported " - "on this HCA, aborting.\n"); - return -EINVAL; + if (port_type[i] != port_type[i + 1]) { + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { + mlx4_err(dev, "Only same port types supported " + "on this HCA, aborting.\n"); + return -EINVAL; + } + if (port_type[i] == MLX4_PORT_TYPE_ETH && + port_type[i + 1] == MLX4_PORT_TYPE_IB) + return -EINVAL; } } - if ((port_type[0] == MLX4_PORT_TYPE_ETH) && - (port_type[1] == MLX4_PORT_TYPE_IB)) { - mlx4_err(dev, "eth-ib configuration is not supported.\n"); - return -EINVAL; - } for (i = 0; i < dev->caps.num_ports; i++) { if (!(port_type[i] & dev->caps.supported_type[i+1])) { @@ -225,6 +226,9 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; else dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; + dev->caps.possible_type[i] = dev->caps.port_type[i]; + mlx4_priv(dev)->sense.sense_allowed[i] = + dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO; if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { dev->caps.log_num_macs = dev_cap->log_max_macs[i]; @@ -263,14 +267,16 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) * Change the port configuration of the device. * Every user of this function must hold the port mutex. */ -static int mlx4_change_port_types(struct mlx4_dev *dev, - enum mlx4_port_type *port_types) +int mlx4_change_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *port_types) { int err = 0; int change = 0; int port; for (port = 0; port < dev->caps.num_ports; port++) { + /* Change the port type only if the new type is different + * from the current, and not set to Auto */ if (port_types[port] != dev->caps.port_type[port + 1]) { change = 1; dev->caps.port_type[port + 1] = port_types[port]; @@ -302,10 +308,17 @@ static ssize_t show_port_type(struct device *dev, struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_attr); struct mlx4_dev *mdev = info->dev; + char type[8]; - return sprintf(buf, "%s\n", - mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB ? - "ib" : "eth"); + sprintf(type, "%s", + (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? + "ib" : "eth"); + if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) + sprintf(buf, "auto (%s)\n", type); + else + sprintf(buf, "%s\n", type); + + return strlen(buf); } static ssize_t set_port_type(struct device *dev, @@ -317,6 +330,7 @@ static ssize_t set_port_type(struct device *dev, struct mlx4_dev *mdev = info->dev; struct mlx4_priv *priv = mlx4_priv(mdev); enum mlx4_port_type types[MLX4_MAX_PORTS]; + enum mlx4_port_type new_types[MLX4_MAX_PORTS]; int i; int err = 0; @@ -324,26 +338,56 @@ static ssize_t set_port_type(struct device *dev, info->tmp_type = MLX4_PORT_TYPE_IB; else if (!strcmp(buf, "eth\n")) info->tmp_type = MLX4_PORT_TYPE_ETH; + else if (!strcmp(buf, "auto\n")) + info->tmp_type = MLX4_PORT_TYPE_AUTO; else { mlx4_err(mdev, "%s is not supported port type\n", buf); return -EINVAL; } + mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); - for (i = 0; i < mdev->caps.num_ports; i++) - types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : - mdev->caps.port_type[i+1]; + /* Possible type is always the one that was delivered */ + mdev->caps.possible_type[info->port] = info->tmp_type; - err = mlx4_check_port_params(mdev, types); + for (i = 0; i < mdev->caps.num_ports; i++) { + types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : + mdev->caps.possible_type[i+1]; + if (types[i] == MLX4_PORT_TYPE_AUTO) + types[i] = mdev->caps.port_type[i+1]; + } + + if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { + for (i = 1; i <= mdev->caps.num_ports; i++) { + if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { + mdev->caps.possible_type[i] = mdev->caps.port_type[i]; + err = -EINVAL; + } + } + } + if (err) { + mlx4_err(mdev, "Auto sensing is not supported on this HCA. " + "Set only 'eth' or 'ib' for both ports " + "(should be the same)\n"); + goto out; + } + + mlx4_do_sense_ports(mdev, new_types, types); + + err = mlx4_check_port_params(mdev, new_types); if (err) goto out; - for (i = 1; i <= mdev->caps.num_ports; i++) - priv->port[i].tmp_type = 0; + /* We are about to apply the changes after the configuration + * was verified, no need to remember the temporary types + * any more */ + for (i = 0; i < mdev->caps.num_ports; i++) + priv->port[i + 1].tmp_type = 0; - err = mlx4_change_port_types(mdev, types); + err = mlx4_change_port_types(mdev, new_types); out: + mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); return err ? err : count; } @@ -1117,6 +1161,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_port; + mlx4_sense_init(dev); + mlx4_start_sense(dev); + pci_set_drvdata(pdev, dev); return 0; @@ -1182,6 +1229,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) int p; if (dev) { + mlx4_stop_sense(dev); mlx4_unregister_device(dev); for (p = 1; p <= dev->caps.num_ports; p++) { @@ -1266,9 +1314,11 @@ static int __init mlx4_init(void) if (mlx4_verify_params()) return -EINVAL; - ret = mlx4_catas_init(); - if (ret) - return ret; + mlx4_catas_init(); + + mlx4_wq = create_singlethread_workqueue("mlx4"); + if (!mlx4_wq) + return -ENOMEM; ret = pci_register_driver(&mlx4_driver); return ret < 0 ? ret : 0; @@ -1277,7 +1327,7 @@ static int __init mlx4_init(void) static void __exit mlx4_cleanup(void) { pci_unregister_driver(&mlx4_driver); - mlx4_catas_cleanup(); + destroy_workqueue(mlx4_wq); } module_init(mlx4_init); diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index e0213bad61c7..5bd79c2b184f 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -276,6 +277,13 @@ struct mlx4_port_info { struct mlx4_vlan_table vlan_table; }; +struct mlx4_sense { + struct mlx4_dev *dev; + u8 do_sense_port[MLX4_MAX_PORTS + 1]; + u8 sense_allowed[MLX4_MAX_PORTS + 1]; + struct delayed_work sense_poll; +}; + struct mlx4_priv { struct mlx4_dev dev; @@ -305,6 +313,7 @@ struct mlx4_priv { struct mlx4_uar driver_uar; void __iomem *kar; struct mlx4_port_info port[MLX4_MAX_PORTS + 1]; + struct mlx4_sense sense; struct mutex port_mutex; }; @@ -313,6 +322,10 @@ static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) return container_of(dev, struct mlx4_priv, dev); } +#define MLX4_SENSE_RANGE (HZ * 3) + +extern struct workqueue_struct *mlx4_wq; + u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap); void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj); u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align); @@ -346,8 +359,7 @@ void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); void mlx4_start_catas_poll(struct mlx4_dev *dev); void mlx4_stop_catas_poll(struct mlx4_dev *dev); -int mlx4_catas_init(void); -void mlx4_catas_cleanup(void); +void mlx4_catas_init(void); int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); @@ -379,6 +391,17 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); void mlx4_handle_catas_err(struct mlx4_dev *dev); +void mlx4_do_sense_ports(struct mlx4_dev *dev, + enum mlx4_port_type *stype, + enum mlx4_port_type *defaults); +void mlx4_start_sense(struct mlx4_dev *dev); +void mlx4_stop_sense(struct mlx4_dev *dev); +void mlx4_sense_init(struct mlx4_dev *dev); +int mlx4_check_port_params(struct mlx4_dev *dev, + enum mlx4_port_type *port_type); +int mlx4_change_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *port_types); + void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); diff --git a/drivers/net/mlx4/sense.c b/drivers/net/mlx4/sense.c new file mode 100644 index 000000000000..6d5089ecb5af --- /dev/null +++ b/drivers/net/mlx4/sense.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include + +#include + +#include "mlx4.h" + +static int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, + enum mlx4_port_type *type) +{ + u64 out_param; + int err = 0; + + err = mlx4_cmd_imm(dev, 0, &out_param, port, 0, + MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B); + if (err) { + mlx4_err(dev, "Sense command failed for port: %d\n", port); + return err; + } + + if (out_param > 2) { + mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", out_param); + return EINVAL; + } + + *type = out_param; + return 0; +} + +void mlx4_do_sense_ports(struct mlx4_dev *dev, + enum mlx4_port_type *stype, + enum mlx4_port_type *defaults) +{ + struct mlx4_sense *sense = &mlx4_priv(dev)->sense; + int err; + int i; + + for (i = 1; i <= dev->caps.num_ports; i++) { + stype[i - 1] = 0; + if (sense->do_sense_port[i] && sense->sense_allowed[i] && + dev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { + err = mlx4_SENSE_PORT(dev, i, &stype[i - 1]); + if (err) + stype[i - 1] = defaults[i - 1]; + } else + stype[i - 1] = defaults[i - 1]; + } + + /* + * Adjust port configuration: + * If port 1 sensed nothing and port 2 is IB, set both as IB + * If port 2 sensed nothing and port 1 is Eth, set both as Eth + */ + if (stype[0] == MLX4_PORT_TYPE_ETH) { + for (i = 1; i < dev->caps.num_ports; i++) + stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_ETH; + } + if (stype[dev->caps.num_ports - 1] == MLX4_PORT_TYPE_IB) { + for (i = 0; i < dev->caps.num_ports - 1; i++) + stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_IB; + } + + /* + * If sensed nothing, remain in current configuration. + */ + for (i = 0; i < dev->caps.num_ports; i++) + stype[i] = stype[i] ? stype[i] : defaults[i]; + +} + +static void mlx4_sense_port(struct work_struct *work) +{ + struct delayed_work *delay = container_of(work, struct delayed_work, work); + struct mlx4_sense *sense = container_of(delay, struct mlx4_sense, + sense_poll); + struct mlx4_dev *dev = sense->dev; + struct mlx4_priv *priv = mlx4_priv(dev); + enum mlx4_port_type stype[MLX4_MAX_PORTS]; + + mutex_lock(&priv->port_mutex); + mlx4_do_sense_ports(dev, stype, &dev->caps.port_type[1]); + + if (mlx4_check_port_params(dev, stype)) + goto sense_again; + + if (mlx4_change_port_types(dev, stype)) + mlx4_err(dev, "Failed to change port_types\n"); + +sense_again: + mutex_unlock(&priv->port_mutex); + queue_delayed_work(mlx4_wq , &sense->sense_poll, + round_jiffies_relative(MLX4_SENSE_RANGE)); +} + +void mlx4_start_sense(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_sense *sense = &priv->sense; + + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) + return; + + queue_delayed_work(mlx4_wq , &sense->sense_poll, + round_jiffies_relative(MLX4_SENSE_RANGE)); +} + +void mlx4_stop_sense(struct mlx4_dev *dev) +{ + cancel_delayed_work_sync(&mlx4_priv(dev)->sense.sense_poll); +} + +void mlx4_sense_init(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_sense *sense = &priv->sense; + int port; + + sense->dev = dev; + for (port = 1; port <= dev->caps.num_ports; port++) + sense->do_sense_port[port] = 1; + + INIT_DELAYED_WORK_DEFERRABLE(&sense->sense_poll, mlx4_sense_port); +} diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index cf9c679ab38b..0f82293a82ed 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -55,6 +55,7 @@ enum { MLX4_CMD_CLOSE_PORT = 0xa, MLX4_CMD_QUERY_HCA = 0xb, MLX4_CMD_QUERY_PORT = 0x43, + MLX4_CMD_SENSE_PORT = 0x4d, MLX4_CMD_SET_PORT = 0xc, MLX4_CMD_ACCESS_DDR = 0x2e, MLX4_CMD_MAP_ICM = 0xffa, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8f659cc29960..3aff8a6a389e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -155,8 +155,9 @@ enum mlx4_qp_region { }; enum mlx4_port_type { - MLX4_PORT_TYPE_IB = 1 << 0, - MLX4_PORT_TYPE_ETH = 1 << 1, + MLX4_PORT_TYPE_IB = 1, + MLX4_PORT_TYPE_ETH = 2, + MLX4_PORT_TYPE_AUTO = 3 }; enum mlx4_special_vlan_idx { @@ -237,6 +238,7 @@ struct mlx4_caps { enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; u8 supported_type[MLX4_MAX_PORTS + 1]; u32 port_mask; + enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; }; struct mlx4_buf_list { From a6a47771b113be8e694aedd80f66ea94d05bd8df Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Wed, 18 Mar 2009 19:49:54 -0700 Subject: [PATCH 29/30] IB/mlx4: Unregister IB device prior to CLOSE PORT command According to the ConnectX programmer's reference manual, all operations should be stopped, all QPs should be torn down and all WQEs flushed before the CLOSE_PORT command is invoked. In some cases reversing the order of operations (as implemented now) could cause a loss of completions. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 61588bd273bd..2ccb9d31771f 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -699,11 +699,12 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) struct mlx4_ib_dev *ibdev = ibdev_ptr; int p; + mlx4_ib_mad_cleanup(ibdev); + ib_unregister_device(&ibdev->ib_dev); + for (p = 1; p <= ibdev->num_ports; ++p) mlx4_CLOSE_PORT(dev, p); - mlx4_ib_mad_cleanup(ibdev); - ib_unregister_device(&ibdev->ib_dev); iounmap(ibdev->uar_map); mlx4_uar_free(dev, &ibdev->priv_uar); mlx4_pd_free(dev, ibdev->priv_pdn); From d1fbe04eee32ed2642cff139b8592866f1d43f41 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 24 Mar 2009 20:44:18 -0700 Subject: [PATCH 30/30] RDMA/cxgb3: Enforce required firmware The cxgb3 NIC driver can handle more firmware versions than iw_cxgb3, and since commit 8207befa ("cxgb3: untie strict FW matching") cxgb3 will load with firmware versions that iw_cxgb3 can't handle. The FW major number indicates a specific interface between the FW and iw_cxgb3. Thus if the major number of the running firmware does not match the required version compiled into iw_cxgb3, then iw_cxgb3 must not register that device. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 17 +++++++++++++++++ drivers/infiniband/hw/cxgb3/cxio_hal.h | 3 +++ 2 files changed, 20 insertions(+) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index c2740e790f73..d4d7204c11ed 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -938,6 +938,23 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p) if (!rdev_p->t3cdev_p) rdev_p->t3cdev_p = dev2t3cdev(netdev_p); rdev_p->t3cdev_p->ulp = (void *) rdev_p; + + err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO, + &(rdev_p->fw_info)); + if (err) { + printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n", + __func__, rdev_p->t3cdev_p, err); + goto err1; + } + if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) { + printk(KERN_ERR MOD "fatal firmware version mismatch: " + "need version %u but adapter has version %u\n", + CXIO_FW_MAJ, + G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers)); + err = -EINVAL; + goto err1; + } + err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS, &(rdev_p->rnic_info)); if (err) { diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 656fe47bc84f..e44dc2289471 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -61,6 +61,8 @@ #define T3_MAX_DEV_NAME_LEN 32 +#define CXIO_FW_MAJ 7 + struct cxio_hal_ctrl_qp { u32 wptr; u32 rptr; @@ -108,6 +110,7 @@ struct cxio_rdev { struct gen_pool *pbl_pool; struct gen_pool *rqt_pool; struct list_head entry; + struct ch_embedded_info fw_info; }; static inline int cxio_num_stags(struct cxio_rdev *rdev_p)