From: Doug Ledford <dledford@redhat.com> Date: Tue, 14 Apr 2009 15:23:37 -0400 Subject: [openib] qlgc_vnic: update to OFED 1.4.1-rc3 Message-id: 1239737023-31222-11-git-send-email-dledford@redhat.com O-Subject: [Patch RHEL5.4 10/16] [qlgc_vnic] Update to OFED 1.4.1-rc3 version Bugzilla: 476301 Signed-off-by: Doug Ledford <dledford@redhat.com> diff --git a/drivers/infiniband/ulp/qlgc_vnic/Kconfig b/drivers/infiniband/ulp/qlgc_vnic/Kconfig index 0d33757..7b4030e 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/Kconfig +++ b/drivers/infiniband/ulp/qlgc_vnic/Kconfig @@ -1,28 +1,19 @@ config INFINIBAND_QLGC_VNIC - tristate "QLGC_VNIC - Support for QLogic Virtual Ethernet I/O Controller" + tristate "QLogic VNIC - Support for QLogic Ethernet Virtual I/O Controller" depends on INFINIBAND && NETDEVICES && INET ---help--- - Support for the QLogic Virtual Ethernet I/O Controller - (VEx). In conjunction with the VEx, this provides virtual + Support for the QLogic Ethernet Virtual I/O Controller + (EVIC). In conjunction with the EVIC, this provides virtual ethernet interfaces and transports ethernet packets over InfiniBand so that you can communicate with Ethernet networks using your IB device. -config INFINIBAND_QLGC_VNIC_DEBUG - bool "QLGC_VNIC Verbose debugging" - depends on INFINIBAND_QLGC_VNIC - default n - ---help--- - This option causes verbose debugging code to be compiled - into the QLGC_VNIC driver. The output can be turned on via the - vnic_debug module parameter. - config INFINIBAND_QLGC_VNIC_STATS - bool "QLGC_VNIC Statistics" + bool "QLogic VNIC Statistics" depends on INFINIBAND_QLGC_VNIC default n ---help--- This option compiles statistics collecting code into the - data path of the VNIC driver to help in profiling and fine + data path of the QLogic VNIC driver to help in profiling and fine tuning. This adds some overhead in the interest of gathering data. diff --git a/drivers/infiniband/ulp/qlgc_vnic/Makefile b/drivers/infiniband/ulp/qlgc_vnic/Makefile index 0586bf7..509dd67 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/Makefile +++ b/drivers/infiniband/ulp/qlgc_vnic/Makefile @@ -7,6 +7,7 @@ qlgc_vnic-y := vnic_main.o \ vnic_data.o \ vnic_netpath.o \ vnic_config.o \ - vnic_sys.o + vnic_sys.o \ + vnic_multicast.o qlgc_vnic-$(CONFIG_INFINIBAND_QLGC_VNIC_STATS) += vnic_stats.o diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_config.c b/drivers/infiniband/ulp/qlgc_vnic/vnic_config.c index e9db52d..b2dab66 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_config.c +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_config.c @@ -35,23 +35,12 @@ #include <linux/utsname.h> #include <linux/if_vlan.h> -#include <rdma/ib_cache.h> - #include "vnic_util.h" #include "vnic_config.h" #include "vnic_trailer.h" +#include "vnic_main.h" -#define SST_AGN 0x10ULL -#define SST_OUI 0x00066AULL - -enum { - CONTROL_PATH_ID = 0x0, - DATA_PATH_ID = 0x1 -}; - -#define IOC_NUMBER(GUID) (((GUID) >> 32) & 0xFF) - -static u16 max_mtu = MAX_MTU; +u16 vnic_max_mtu = MAX_MTU; static u32 default_no_path_timeout = DEFAULT_NO_PATH_TIMEOUT; static u32 sa_path_rec_get_timeout = SA_PATH_REC_GET_TIMEOUT; @@ -64,8 +53,10 @@ static int use_rx_csum = VNIC_USE_RX_CSUM; static int use_tx_csum = VNIC_USE_TX_CSUM; static u32 control_response_timeout = CONTROL_RSP_TIMEOUT; -module_param(max_mtu, ushort, 0444); -MODULE_PARM_DESC(max_mtu, "Maximum MTU size (1500-9500). Default is 9500"); +static u32 completion_limit = DEFAULT_COMPLETION_LIMIT; + +module_param(vnic_max_mtu, ushort, 0444); +MODULE_PARM_DESC(vnic_max_mtu, "Maximum MTU size (1500-9500). Default is 9500"); module_param(default_prefer_primary, bool, 0444); MODULE_PARM_DESC(default_prefer_primary, "Determines if primary path is" @@ -95,6 +86,10 @@ module_param(control_response_timeout, uint, 0444); MODULE_PARM_DESC(control_response_timeout, "Time out value in milliseconds" " to wait for response to control requests"); +module_param(completion_limit, uint, 0444); +MODULE_PARM_DESC(completion_limit, "Maximum completions to process" + " in a single completion callback invocation. Default is 100" + " Minimum value is 10"); static void config_control_defaults(struct control_config *control_config, struct path_param *params) @@ -110,6 +105,9 @@ static void config_control_defaults(struct control_config *control_config, control_config->ib_config.conn_data.path_id = 0; control_config->ib_config.conn_data.vnic_instance = params->instance; control_config->ib_config.conn_data.path_num = 0; + control_config->ib_config.conn_data.features_supported = + __constant_cpu_to_be32((u32) (VNIC_FEAT_IGNORE_VLAN | + VNIC_FEAT_RDMA_IMMED)); dot = strchr(init_utsname()->nodename, '.'); if (dot) @@ -121,7 +119,23 @@ static void config_control_defaults(struct control_config *control_config, len = VNIC_MAX_NODENAME_LEN; memcpy(control_config->ib_config.conn_data.nodename, - init_utsname()->nodename, len); + init_utsname()->nodename, len); + + if (params->ib_multicast == 1) + control_config->ib_multicast = 1; + else if (params->ib_multicast == 0) + control_config->ib_multicast = 0; + else { + /* parameter is not set - enable it by default */ + control_config->ib_multicast = 1; + CONFIG_ERROR ("IOCGUID=%llx INSTANCE=%d IB_MULTICAST defaulted" + " to TRUE\n", be64_to_cpu(params->ioc_guid), + (char)params->instance); + } + + if (control_config->ib_multicast) + control_config->ib_config.conn_data.features_supported |= + __constant_cpu_to_be32(VNIC_FEAT_INBOUND_IB_MC); control_config->ib_config.retry_count = RETRY_COUNT; control_config->ib_config.rnr_retry_count = RETRY_COUNT; @@ -132,6 +146,7 @@ static void config_control_defaults(struct control_config *control_config, control_config->ib_config.num_sends = 1; control_config->ib_config.recv_scatter = 1; control_config->ib_config.send_gather = 1; + control_config->ib_config.completion_limit = completion_limit; control_config->num_recvs = control_config->ib_config.num_recvs; @@ -177,6 +192,7 @@ static void config_data_defaults(struct data_config *data_config, data_config->ib_config.recv_scatter = 1; /* not configurable */ data_config->ib_config.send_gather = 2; /* not configurable */ + data_config->ib_config.completion_limit = completion_limit; data_config->num_recvs = data_config->ib_config.num_recvs; data_config->path_id = data_config->ib_config.conn_data.path_id; @@ -187,7 +203,7 @@ static void config_data_defaults(struct data_config *data_config, data_config->host_min.size_recv_pool_entry = cpu_to_be32(BUFFER_SIZE(VLAN_ETH_HLEN + MIN_MTU)); data_config->host_max.size_recv_pool_entry = - cpu_to_be32(BUFFER_SIZE(VLAN_ETH_HLEN + max_mtu)); + cpu_to_be32(BUFFER_SIZE(VLAN_ETH_HLEN + vnic_max_mtu)); data_config->eioc_min.size_recv_pool_entry = cpu_to_be32(BUFFER_SIZE(VLAN_ETH_HLEN + MIN_MTU)); data_config->eioc_max.size_recv_pool_entry = @@ -242,11 +258,11 @@ static void config_path_info_defaults(struct viport_config *config, struct path_param *params) { int i; - ib_get_cached_gid(config->ibdev, config->port, 0, + ib_query_gid(config->ibdev, config->port, 0, &config->path_info.path.sgid); - for (i = 0; i < 16; i++) { + for (i = 0; i < 16; i++) config->path_info.path.dgid.raw[i] = params->dgid[i]; - } + config->path_info.path.pkey = params->pkey; config->path_info.path.numb_path = 1; config->sa_path_rec_get_timeout = sa_path_rec_get_timeout; @@ -263,10 +279,12 @@ static void config_viport_defaults(struct viport_config *config, config->hb_interval = msecs_to_jiffies(VIPORT_HEARTBEAT_INTERVAL); config->hb_timeout = VIPORT_HEARTBEAT_TIMEOUT * 1000; /*hb_timeout needs to be in usec*/ + strcpy(config->ioc_string, params->ioc_string); config_path_info_defaults(config, params); config_control_defaults(&config->control_config, params); config_data_defaults(&config->data_config, params); + config->path_info.path.service_id = config->control_config.ib_config.service_id; } static void config_vnic_defaults(struct vnic_config *config) @@ -328,8 +346,8 @@ char *config_viport_name(struct viport_config *config) int config_start(void) { - max_mtu = min_t(u16, max_mtu, MAX_MTU); - max_mtu = max_t(u16, max_mtu, MIN_MTU); + vnic_max_mtu = min_t(u16, vnic_max_mtu, MAX_MTU); + vnic_max_mtu = max_t(u16, vnic_max_mtu, MIN_MTU); sa_path_rec_get_timeout = min_t(u32, sa_path_rec_get_timeout, MAX_SA_TIMEOUT); @@ -337,10 +355,13 @@ int config_start(void) MIN_SA_TIMEOUT); control_response_timeout = min_t(u32, control_response_timeout, - MIN_CONTROL_RSP_TIMEOUT); + MAX_CONTROL_RSP_TIMEOUT); control_response_timeout = max_t(u32, control_response_timeout, - MAX_CONTROL_RSP_TIMEOUT); + MIN_CONTROL_RSP_TIMEOUT); + + completion_limit = max_t(u32, completion_limit, + MIN_COMPLETION_LIMIT); if (!default_no_path_timeout) default_no_path_timeout = DEFAULT_NO_PATH_TIMEOUT; diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_config.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_config.h index e0a473d..dca5f98 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_config.h +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_config.h @@ -40,6 +40,16 @@ #include "vnic_control.h" #include "vnic_ib.h" +#define SST_AGN 0x10ULL +#define SST_OUI 0x00066AULL + +enum { + CONTROL_PATH_ID = 0x0, + DATA_PATH_ID = 0x1 +}; + +#define IOC_NUMBER(GUID) (((GUID) >> 32) & 0xFF) + enum { VNIC_CLASS_SUBCLASS = 0x2000066A, VNIC_PROTOCOL = 0, @@ -100,11 +110,22 @@ enum { }; enum { - CONTROL_RSP_TIMEOUT = 1000, /* 1 sec */ + /* 5 sec increased for EVIC support for large number of + * host connections + */ + CONTROL_RSP_TIMEOUT = 5000, MIN_CONTROL_RSP_TIMEOUT = 1000, /* 1 sec */ MAX_CONTROL_RSP_TIMEOUT = 60000 /* 60 sec */ }; +/* Maximum number of completions to be processed + * during a single completion callback invocation + */ +enum { + DEFAULT_COMPLETION_LIMIT = 100, + MIN_COMPLETION_LIMIT = 10 +}; + /* infiniband connection parameters */ enum { RETRY_COUNT = 3, @@ -123,8 +144,12 @@ enum { #define VNIC_USE_TX_CSUM 1 #define DEFAULT_PREFER_PRIMARY 0 +/* As per IBTA specification, IOCString Maximum length can be 512 bits. */ +#define MAX_IOC_STRING_LEN (512/8) + struct path_param { __be64 ioc_guid; + u8 ioc_string[MAX_IOC_STRING_LEN+1]; u8 port; u8 instance; struct ib_device *ibdev; @@ -135,6 +160,7 @@ struct path_param { int rx_csum; int tx_csum; int heartbeat; + int ib_multicast; }; struct vnic_ib_config { @@ -147,6 +173,7 @@ struct vnic_ib_config { u32 num_recvs; u32 recv_scatter; /* 1 */ u32 send_gather; /* 1 or 2 */ + u32 completion_limit; }; struct control_config { @@ -156,6 +183,7 @@ struct control_config { u16 max_address_entries; u16 min_address_entries; u32 rsp_timeout; + u32 ib_multicast; }; struct data_config { @@ -178,10 +206,11 @@ struct viport_config { u32 sa_path_rec_get_timeout; struct ib_device *ibdev; u32 port; - u32 stats_interval; + unsigned long stats_interval; u32 hb_interval; u32 hb_timeout; __be64 ioc_guid; + u8 ioc_string[MAX_IOC_STRING_LEN+1]; size_t path_idx; }; @@ -196,7 +225,7 @@ struct viport_config { struct vnic_config { struct vnic *vnic; char name[IFNAMSIZ]; - u32 no_path_timeout; + unsigned long no_path_timeout; u32 primary_connect_timeout; u32 primary_reconnect_timeout; u32 primary_switch_timeout; diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_control.c b/drivers/infiniband/ulp/qlgc_vnic/vnic_control.c index 5d582db..df4d2a8 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_control.c +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_control.c @@ -37,14 +37,14 @@ #include "vnic_util.h" #include "vnic_main.h" #include "vnic_viport.h" -#include "vnic_control.h" -#include "vnic_config.h" -#include "vnic_control_pkt.h" #include "vnic_stats.h" +#define vnic_multicast_address(rsp2_address, index) \ + ((rsp2_address)->list_address_ops[index].address[0] & 0x01) + static void control_log_control_packet(struct vnic_control_packet *pkt); -static inline char *control_ifcfg_name(struct control *control) +char *control_ifcfg_name(struct control *control) { if (!control) return "nctl"; @@ -56,7 +56,7 @@ static inline char *control_ifcfg_name(struct control *control) return "nppp"; if (!control->parent->parent->parent->config) return "npppc"; - return (control->parent->parent->parent->config->name); + return control->parent->parent->parent->config->name; } static void control_recv(struct control *control, struct recv_io *recv_io) @@ -101,8 +101,9 @@ static void control_recv_complete(struct io *io) case RSP_RECEIVED: case REQ_COMPLETED: CONTROL_ERROR("%s: Unexpected control" - "response received: CMD = %d\n", - control_ifcfg_name(control), c_hdr->pkt_cmd); + "response received: CMD = %d\n", + control_ifcfg_name(control), + c_hdr->pkt_cmd); control_log_control_packet(pkt); control->req_state = REQ_FAILED; fail = 1; @@ -111,7 +112,7 @@ static void control_recv_complete(struct io *io) case REQ_SENT: if (c_hdr->pkt_cmd != control->last_cmd || c_hdr->pkt_seq_num != control->seq_num) { - CONTROL_ERROR("%s: Incorrect Control Response" + CONTROL_ERROR("%s: Incorrect Control Response " "received\n", control_ifcfg_name(control)); CONTROL_ERROR("%s: Sent control request:\n", @@ -286,7 +287,7 @@ static int control_send(struct control *control, struct send_io *send_io) CONTROL_INFO("%s:Attempt to send in failed state." "New CMD: %d Last CMD: %d\n", control_ifcfg_name(control), pkt->hdr.pkt_cmd, - control->last_cmd ); + control->last_cmd); /* stay in REQ_FAILED state*/ break; } @@ -385,9 +386,9 @@ void control_process_async(struct control *control) } } if ((pkt->hdr.pkt_cmd != CMD_REPORT_STATUS) || - pkt->cmd.report_status.is_fatal) { + pkt->cmd.report_status.is_fatal) viport_failure(control->parent); - } + control_recv(control, recv_io); spin_lock_irqsave(&control->io_lock, flags); } @@ -406,9 +407,9 @@ void control_process_async(struct control *control) control_log_control_packet(pkt); if ((pkt->hdr.pkt_type != TYPE_ERR) || (pkt->hdr.pkt_cmd != CMD_REPORT_STATUS) - || pkt->cmd.report_status.is_fatal) { + || pkt->cmd.report_status.is_fatal) viport_failure(control->parent); - } + control_recv(control, recv_io); spin_lock_irqsave(&control->io_lock, flags); } @@ -417,7 +418,7 @@ void control_process_async(struct control *control) control->recv_dma, control->recv_len, DMA_FROM_DEVICE); - CONTROL_INFO("%s: done control_process_async\n", + CONTROL_FUNCTION("%s: done control_process_async\n", control_ifcfg_name(control)); } @@ -467,7 +468,7 @@ static struct recv_io *control_get_rsp(struct control *control) break; case REQ_COMPLETED: recv_io = control->response; - if (!recv_io){ + if (!recv_io) { control->req_state = REQ_FAILED; fail = 1; break; @@ -532,7 +533,8 @@ failure: static int control_chk_vnic_rsp_values(struct control *control, u16 *num_addrs, u8 num_data_paths, - u8 num_lan_switches) + u8 num_lan_switches, + u32 *features) { struct control_config *config = control->config; @@ -570,6 +572,14 @@ static int control_chk_vnic_rsp_values(struct control *control, control_ifcfg_name(control)); goto failure; } + CONTROL_ERROR("%s checking features %x ib_multicast:%d\n", + control_ifcfg_name(control), + *features, config->ib_multicast); + if ((*features & VNIC_FEAT_INBOUND_IB_MC) && !config->ib_multicast) { + /* disable multicast if it is not on in the cfg file, or + if we turned it off because join failed */ + *features &= ~VNIC_FEAT_INBOUND_IB_MC; + } return 0; failure: @@ -610,7 +620,8 @@ int control_init_vnic_rsp(struct control *control, u32 *features, if (control_chk_vnic_rsp_values(control, num_addrs, num_data_paths, - num_lan_switches)) + num_lan_switches, + features)) goto failure; control->lan_switch.lan_switch_num = @@ -721,14 +732,14 @@ static int check_recv_pool_config(struct vnic_recv_pool_config *src, "free_recv_pool_entries_per_update")) goto failure; - if (!is_power_of2(be32_to_cpu(dst->num_recv_pool_entries))) { + if (!is_power_of_2(be32_to_cpu(dst->num_recv_pool_entries))) { CONTROL_ERROR("num_recv_pool_entries (%d)" " must be power of 2\n", dst->num_recv_pool_entries); goto failure; } - if (!is_power_of2(be32_to_cpu(dst-> + if (!is_power_of_2(be32_to_cpu(dst-> free_recv_pool_entries_per_update))) { CONTROL_ERROR("free_recv_pool_entries_per_update (%d)" " must be power of 2\n", @@ -759,9 +770,9 @@ failure: return -1; } -int control_config_data_path_req(struct control * control, u64 path_id, - struct vnic_recv_pool_config * host, - struct vnic_recv_pool_config * eioc) +int control_config_data_path_req(struct control *control, u64 path_id, + struct vnic_recv_pool_config *host, + struct vnic_recv_pool_config *eioc) { struct send_io *send_io; struct vnic_control_packet *pkt; @@ -801,13 +812,13 @@ failure: return -1; } -int control_config_data_path_rsp(struct control * control, - struct vnic_recv_pool_config * host, - struct vnic_recv_pool_config * eioc, - struct vnic_recv_pool_config * max_host, - struct vnic_recv_pool_config * max_eioc, - struct vnic_recv_pool_config * min_host, - struct vnic_recv_pool_config * min_eioc) +int control_config_data_path_rsp(struct control *control, + struct vnic_recv_pool_config *host, + struct vnic_recv_pool_config *eioc, + struct vnic_recv_pool_config *max_host, + struct vnic_recv_pool_config *max_eioc, + struct vnic_recv_pool_config *min_host, + struct vnic_recv_pool_config *min_eioc) { struct recv_io *recv_io; struct vnic_control_packet *pkt; @@ -860,7 +871,7 @@ out: return -1; } -int control_exchange_pools_req(struct control * control, u64 addr, u32 rkey) +int control_exchange_pools_req(struct control *control, u64 addr, u32 rkey) { struct send_io *send_io; struct vnic_control_packet *pkt; @@ -895,8 +906,8 @@ failure: return -1; } -int control_exchange_pools_rsp(struct control * control, u64 * addr, - u32 * rkey) +int control_exchange_pools_rsp(struct control *control, u64 *addr, + u32 *rkey) { struct recv_io *recv_io; struct vnic_control_packet *pkt; @@ -942,7 +953,7 @@ out: return -1; } -int control_config_link_req(struct control * control, u16 flags, u16 mtu) +int control_config_link_req(struct control *control, u16 flags, u16 mtu) { struct send_io *send_io; struct vnic_cmd_config_link *config_link_req; @@ -997,8 +1008,7 @@ failure: return -1; } -int control_config_link_rsp(struct control * control, u16 * flags, - u16 * mtu) +int control_config_link_rsp(struct control *control, u16 *flags, u16 *mtu) { struct recv_io *recv_io; struct vnic_control_packet *pkt; @@ -1027,6 +1037,32 @@ int control_config_link_rsp(struct control * control, u16 * flags, *mtu = be16_to_cpu(config_link_rsp->mtu_size); + if (control->parent->features_supported & VNIC_FEAT_INBOUND_IB_MC) { + /* featuresSupported might include INBOUND_IB_MC but + MTU might cause it to be auto-disabled at embedded */ + if (config_link_rsp->cmd_flags & VNIC_FLAG_ENABLE_MCAST_ALL) { + union ib_gid mgid = config_link_rsp->allmulti_mgid; + if (mgid.raw[0] != 0xff) { + CONTROL_ERROR("%s: invalid formatprefix " + VNIC_GID_FMT "\n", + control_ifcfg_name(control), + VNIC_GID_RAW_ARG(mgid.raw)); + } else { + /* rather than issuing join here, which might + * arrive at SM before EVIC creates the MC + * group, postpone it. + */ + vnic_mc_join_setup(control->parent, &mgid); + CONTROL_ERROR("join setup for ALL_MULTI\n"); + } + } + /* we don't want to leave mcast group if MCAST_ALL is disabled + * because there are no doubt multicast addresses set and we + * want to stay joined so we can get that traffic via the + * mcast group. + */ + } + control_recv(control, recv_io); ib_dma_sync_single_for_device(control->parent->config->ibdev, control->recv_dma, control->recv_len, @@ -1049,7 +1085,7 @@ out: * 1: complete */ int control_config_addrs_req(struct control *control, - struct vnic_address_op *addrs, u16 num) + struct vnic_address_op2 *addrs, u16 num) { u16 i; u8 j; @@ -1057,6 +1093,7 @@ int control_config_addrs_req(struct control *control, struct send_io *send_io; struct vnic_control_packet *pkt; struct vnic_cmd_config_addresses *config_addrs_req; + struct vnic_cmd_config_addresses2 *config_addrs_req2; CONTROL_FUNCTION("%s: control_config_addrs_req()\n", control_ifcfg_name(control)); @@ -1064,26 +1101,87 @@ int control_config_addrs_req(struct control *control, control->send_dma, control->send_len, DMA_TO_DEVICE); - send_io = control_init_hdr(control, CMD_CONFIG_ADDRESSES); - if (!send_io) - goto failure; - - pkt = control_packet(send_io); - config_addrs_req = &pkt->cmd.config_addresses_req; - config_addrs_req->lan_switch_num = - control->lan_switch.lan_switch_num; - for (i = 0, j = 0; (i < num) && (j < 16); i++) { - if (!addrs[i].operation) - continue; - config_addrs_req->list_address_ops[j].index = cpu_to_be16(i); - config_addrs_req->list_address_ops[j].operation = + if (control->parent->features_supported & VNIC_FEAT_INBOUND_IB_MC) { + CONTROL_INFO("Sending CMD_CONFIG_ADDRESSES2 %lx MAX:%d " + "sizes:%d %d(off:%d) sizes2:%d %d %d" + "(off:%d - %d %d %d %d %d %d %d)\n", jiffies, + (int)MAX_CONFIG_ADDR_ENTRIES2, + (int)sizeof(struct vnic_cmd_config_addresses), + (int)sizeof(struct vnic_address_op), + (int)offsetof(struct vnic_cmd_config_addresses, + list_address_ops), + (int)sizeof(struct vnic_cmd_config_addresses2), + (int)sizeof(struct vnic_address_op2), + (int)sizeof(union ib_gid), + (int)offsetof(struct vnic_cmd_config_addresses2, + list_address_ops), + (int)offsetof(struct vnic_address_op2, index), + (int)offsetof(struct vnic_address_op2, operation), + (int)offsetof(struct vnic_address_op2, valid), + (int)offsetof(struct vnic_address_op2, address), + (int)offsetof(struct vnic_address_op2, vlan), + (int)offsetof(struct vnic_address_op2, reserved), + (int)offsetof(struct vnic_address_op2, mgid) + ); + send_io = control_init_hdr(control, CMD_CONFIG_ADDRESSES2); + if (!send_io) + goto failure; + + pkt = control_packet(send_io); + config_addrs_req2 = &pkt->cmd.config_addresses_req2; + memset(pkt->cmd.cmd_data, 0, VNIC_MAX_CONTROLDATASZ); + config_addrs_req2->lan_switch_num = + control->lan_switch.lan_switch_num; + for (i = 0, j = 0; (i < num) && (j < MAX_CONFIG_ADDR_ENTRIES2); i++) { + if (!addrs[i].operation) + continue; + config_addrs_req2->list_address_ops[j].index = + cpu_to_be16(i); + config_addrs_req2->list_address_ops[j].operation = + VNIC_OP_SET_ENTRY; + config_addrs_req2->list_address_ops[j].valid = + addrs[i].valid; + memcpy(config_addrs_req2->list_address_ops[j].address, + addrs[i].address, ETH_ALEN); + config_addrs_req2->list_address_ops[j].vlan = + addrs[i].vlan; + addrs[i].operation = 0; + CONTROL_INFO("%s i=%d " + "addr[%d]=%02x:%02x:%02x:%02x:%02x:%02x " + "valid:%d\n", control_ifcfg_name(control), i, j, + addrs[i].address[0], addrs[i].address[1], + addrs[i].address[2], addrs[i].address[3], + addrs[i].address[4], addrs[i].address[5], + addrs[i].valid); + j++; + } + config_addrs_req2->num_address_ops = j; + } else { + send_io = control_init_hdr(control, CMD_CONFIG_ADDRESSES); + if (!send_io) + goto failure; + + pkt = control_packet(send_io); + config_addrs_req = &pkt->cmd.config_addresses_req; + config_addrs_req->lan_switch_num = + control->lan_switch.lan_switch_num; + for (i = 0, j = 0; (i < num) && (j < 16); i++) { + if (!addrs[i].operation) + continue; + config_addrs_req->list_address_ops[j].index = + cpu_to_be16(i); + config_addrs_req->list_address_ops[j].operation = VNIC_OP_SET_ENTRY; - config_addrs_req->list_address_ops[j].valid = addrs[i].valid; - memcpy(config_addrs_req->list_address_ops[j].address, - addrs[i].address, ETH_ALEN); - config_addrs_req->list_address_ops[j].vlan = addrs[i].vlan; - addrs[i].operation = 0; - j++; + config_addrs_req->list_address_ops[j].valid = + addrs[i].valid; + memcpy(config_addrs_req->list_address_ops[j].address, + addrs[i].address, ETH_ALEN); + config_addrs_req->list_address_ops[j].vlan = + addrs[i].vlan; + addrs[i].operation = 0; + j++; + } + config_addrs_req->num_address_ops = j; } for (; i < num; i++) { if (addrs[i].operation) { @@ -1091,7 +1189,6 @@ int control_config_addrs_req(struct control *control, break; } } - config_addrs_req->num_address_ops = j; control->last_cmd = pkt->hdr.pkt_cmd; ib_dma_sync_single_for_device(control->parent->config->ibdev, @@ -1108,11 +1205,112 @@ failure: return -1; } -int control_config_addrs_rsp(struct control * control) +static int process_cmd_config_address2_rsp(struct control *control, + struct vnic_control_packet *pkt, + struct recv_io *recv_io) +{ + struct vnic_cmd_config_addresses2 *config_addrs_rsp2; + int idx, mcaddrs, nomgid; + union ib_gid mgid, rsp_mgid; + + config_addrs_rsp2 = &pkt->cmd.config_addresses_rsp2; + CONTROL_INFO("%s rsp to CONFIG_ADDRESSES2\n", + control_ifcfg_name(control)); + + for (idx = 0, mcaddrs = 0, nomgid = 1; + idx < config_addrs_rsp2->num_address_ops; + idx++) { + if (!config_addrs_rsp2->list_address_ops[idx].valid) + continue; + + /* check if address is multicasts */ + if (!vnic_multicast_address(config_addrs_rsp2, idx)) + continue; + + mcaddrs++; + mgid = config_addrs_rsp2->list_address_ops[idx].mgid; + CONTROL_INFO("%s: got mgid " VNIC_GID_FMT + " MCAST_MSG_SIZE:%d mtu:%d\n", + control_ifcfg_name(control), + VNIC_GID_RAW_ARG(mgid.raw), + (int)MCAST_MSG_SIZE, + control->parent->mtu); + + /* Embedded should have turned off multicast + * due to large MTU size; mgid had better be 0. + */ + if (control->parent->mtu > MCAST_MSG_SIZE) { + if ((mgid.global.subnet_prefix != 0) || + (mgid.global.interface_id != 0)) { + CONTROL_ERROR("%s: invalid mgid; " + "expected 0 " + VNIC_GID_FMT "\n", + control_ifcfg_name(control), + VNIC_GID_RAW_ARG(mgid.raw)); + } + continue; + } + if (mgid.raw[0] != 0xff) { + CONTROL_ERROR("%s: invalid formatprefix " + VNIC_GID_FMT "\n", + control_ifcfg_name(control), + VNIC_GID_RAW_ARG(mgid.raw)); + continue; + } + nomgid = 0; /* got a valid mgid */ + + /* let's verify that all the mgids match this one */ + for (; idx < config_addrs_rsp2->num_address_ops; idx++) { + if (!config_addrs_rsp2->list_address_ops[idx].valid) + continue; + + /* check if address is multicasts */ + if (!vnic_multicast_address(config_addrs_rsp2, idx)) + continue; + + rsp_mgid = config_addrs_rsp2->list_address_ops[idx].mgid; + if (memcmp(&mgid, &rsp_mgid, sizeof(union ib_gid)) == 0) + continue; + + CONTROL_ERROR("%s: Multicast Group MGIDs not " + "unique; mgids: " VNIC_GID_FMT + " " VNIC_GID_FMT "\n", + control_ifcfg_name(control), + VNIC_GID_RAW_ARG(mgid.raw), + VNIC_GID_RAW_ARG(rsp_mgid.raw)); + return 1; + } + + /* rather than issuing join here, which might arrive + * at SM before EVIC creates the MC group, postpone it. + */ + vnic_mc_join_setup(control->parent, &mgid); + + /* there is only one multicast group to join, so we're done. */ + break; + } + + /* we sent atleast one multicast address but got no MGID + * back so, if it is not allmulti case, leave the group + * we joined before. (for allmulti case we have to stay + * joined) + */ + if ((config_addrs_rsp2->num_address_ops > 0) && (mcaddrs > 0) && + nomgid && !(control->parent->flags & IFF_ALLMULTI)) { + CONTROL_INFO("numaddrops:%d mcadrs:%d nomgid:%d\n", + config_addrs_rsp2->num_address_ops, + mcaddrs > 0, nomgid); + + vnic_mc_leave(control->parent); + } + + return 0; +} + +int control_config_addrs_rsp(struct control *control) { struct recv_io *recv_io; struct vnic_control_packet *pkt; - struct vnic_cmd_config_addresses *config_addrs_rsp; CONTROL_FUNCTION("%s: control_config_addrs_rsp()\n", control_ifcfg_name(control)); @@ -1125,9 +1323,28 @@ int control_config_addrs_rsp(struct control * control) goto out; pkt = control_packet(recv_io); - if (pkt->hdr.pkt_cmd != CMD_CONFIG_ADDRESSES) + if ((pkt->hdr.pkt_cmd != CMD_CONFIG_ADDRESSES) && + (pkt->hdr.pkt_cmd != CMD_CONFIG_ADDRESSES2)) + goto failure; + + if (((pkt->hdr.pkt_cmd == CMD_CONFIG_ADDRESSES2) && + !control->parent->features_supported & VNIC_FEAT_INBOUND_IB_MC) || + ((pkt->hdr.pkt_cmd == CMD_CONFIG_ADDRESSES) && + control->parent->features_supported & VNIC_FEAT_INBOUND_IB_MC)) { + CONTROL_ERROR("%s unexpected response pktCmd:%d flag:%x\n", + control_ifcfg_name(control), pkt->hdr.pkt_cmd, + control->parent->features_supported & + VNIC_FEAT_INBOUND_IB_MC); goto failure; - config_addrs_rsp = &pkt->cmd.config_addresses_rsp; + } + + if (pkt->hdr.pkt_cmd == CMD_CONFIG_ADDRESSES2) { + if (process_cmd_config_address2_rsp(control, pkt, recv_io)) + goto failure; + } else { + struct vnic_cmd_config_addresses *config_addrs_rsp; + config_addrs_rsp = &pkt->cmd.config_addresses_rsp; + } control_recv(control, recv_io); ib_dma_sync_single_for_device(control->parent->config->ibdev, @@ -1143,7 +1360,7 @@ out: return -1; } -int control_report_statistics_req(struct control * control) +int control_report_statistics_req(struct control *control) { struct send_io *send_io; struct vnic_control_packet *pkt; @@ -1176,8 +1393,8 @@ failure: return -1; } -int control_report_statistics_rsp(struct control * control, - struct vnic_cmd_report_stats_rsp * stats) +int control_report_statistics_rsp(struct control *control, + struct vnic_cmd_report_stats_rsp *stats) { struct recv_io *recv_io; struct vnic_control_packet *pkt; @@ -1237,7 +1454,7 @@ out: return -1; } -int control_reset_req(struct control * control) +int control_reset_req(struct control *control) { struct send_io *send_io; struct vnic_control_packet *pkt; @@ -1266,7 +1483,7 @@ failure: return -1; } -int control_reset_rsp(struct control * control) +int control_reset_rsp(struct control *control) { struct recv_io *recv_io; struct vnic_control_packet *pkt; @@ -1299,7 +1516,7 @@ out: return -1; } -int control_heartbeat_req(struct control * control, u32 hb_interval) +int control_heartbeat_req(struct control *control, u32 hb_interval) { struct send_io *send_io; struct vnic_control_packet *pkt; @@ -1331,7 +1548,7 @@ failure: return -1; } -int control_heartbeat_rsp(struct control * control) +int control_heartbeat_rsp(struct control *control) { struct recv_io *recv_io; struct vnic_control_packet *pkt; @@ -1367,9 +1584,9 @@ out: return -1; } -static int control_init_recv_ios(struct control * control, - struct viport * viport, - struct vnic_control_packet * pkt) +static int control_init_recv_ios(struct control *control, + struct viport *viport, + struct vnic_control_packet *pkt) { struct io *io; struct ib_device *ibdev = viport->config->ibdev; @@ -1413,7 +1630,7 @@ static int control_init_recv_ios(struct control * control, return 0; unmap_recv: ib_dma_unmap_single(control->parent->config->ibdev, - control->recv_dma, control->send_len, + control->recv_dma, control->recv_len, DMA_FROM_DEVICE); failure: return -1; @@ -1421,12 +1638,12 @@ failure: static int control_init_send_ios(struct control *control, struct viport *viport, - struct vnic_control_packet * pkt) + struct vnic_control_packet *pkt) { - struct io * io; + struct io *io; struct ib_device *ibdev = viport->config->ibdev; - control->send_io.virtual_addr = (u8*)pkt; + control->send_io.virtual_addr = (u8 *)pkt; control->send_len = sizeof *pkt; control->send_dma = ib_dma_map_single(ibdev, pkt, control->send_len, @@ -1456,8 +1673,8 @@ failure: return -1; } -int control_init(struct control * control, struct viport * viport, - struct control_config * config, struct ib_pd * pd) +int control_init(struct control *control, struct viport *viport, + struct control_config *config, struct ib_pd *pd) { struct vnic_control_packet *pkt; unsigned int sz; @@ -1469,6 +1686,8 @@ int control_init(struct control * control, struct viport * viport, control->ib_conn.viport = viport; control->ib_conn.ib_config = &config->ib_config; control->ib_conn.state = IB_CONN_UNINITTED; + control->ib_conn.callback_thread = NULL; + control->ib_conn.callback_thread_end = 0; control->req_state = REQ_INACTIVE; control->last_cmd = CMD_INVALID; control->seq_num = 0; @@ -1497,19 +1716,19 @@ int control_init(struct control * control, struct viport * viport, &control->ib_conn); if (IS_ERR(control->ib_conn.cm_id)) { CONTROL_ERROR("creating control CM ID failed\n"); - goto destroy_conn; + goto destroy_mr; } sz = sizeof(struct recv_io) * config->num_recvs; control->recv_ios = vmalloc(sz); - memset(control->recv_ios, 0, sz); if (!control->recv_ios) { CONTROL_ERROR("%s: failed allocating space for recv ios\n", control_ifcfg_name(control)); - goto destroy_conn; + goto destroy_cm_id; } + memset(control->recv_ios, 0, sz); /*One send buffer and num_recvs recv buffers */ control->local_storage = kzalloc(sizeof *pkt * (config->num_recvs + 1), @@ -1519,7 +1738,7 @@ int control_init(struct control * control, struct viport * viport, CONTROL_ERROR("%s: failed allocating space" " for local storage\n", control_ifcfg_name(control)); - goto destroy_conn; + goto free_recv_ios; } pkt = control->local_storage; @@ -1537,8 +1756,13 @@ unmap_send: control->send_dma, control->send_len, DMA_TO_DEVICE); free_storage: - vfree(control->recv_ios); kfree(control->local_storage); +free_recv_ios: + vfree(control->recv_ios); +destroy_cm_id: + ib_destroy_cm_id(control->ib_conn.cm_id); +destroy_mr: + ib_dereg_mr(control->mr); destroy_conn: ib_destroy_qp(control->ib_conn.qp); ib_destroy_cq(control->ib_conn.cq); @@ -1552,12 +1776,14 @@ void control_cleanup(struct control *control) control_ifcfg_name(control)); if (ib_send_cm_dreq(control->ib_conn.cm_id, NULL, 0)) - printk(KERN_DEBUG "control CM DREQ sending failed\n"); + CONTROL_ERROR("control CM DREQ sending failed\n"); + control->ib_conn.state = IB_CONN_DISCONNECTED; control_timer_stop(control); control->req_state = REQ_INACTIVE; control->response = NULL; control->last_cmd = CMD_INVALID; + vnic_completion_cleanup(&control->ib_conn); ib_destroy_cm_id(control->ib_conn.cm_id); ib_destroy_qp(control->ib_conn.qp); ib_destroy_cq(control->ib_conn.cq); @@ -1566,7 +1792,7 @@ void control_cleanup(struct control *control) control->send_dma, control->send_len, DMA_TO_DEVICE); ib_dma_unmap_single(control->parent->config->ibdev, - control->recv_dma, control->send_len, + control->recv_dma, control->recv_len, DMA_FROM_DEVICE); vfree(control->recv_ios); kfree(control->local_storage); @@ -1743,83 +1969,86 @@ static void control_log_config_link_pkt(struct vnic_control_packet *pkt) } } -static void control_log_config_addrs_pkt(struct vnic_control_packet *pkt) +static void print_config_addr(struct vnic_address_op *list, + int num_address_ops, size_t mgidoff) { - int i; + int i = 0; - printk(KERN_INFO - " pkt_cmd = CMD_CONFIG_ADDRESSES\n"); - printk(KERN_INFO - " pkt_seq_num = %u," - " pkt_retry_count = %u\n", - pkt->hdr.pkt_seq_num, - pkt->hdr.pkt_retry_count); - printk(KERN_INFO - " num_address_ops = %x," - " lan_switch_num = %d\n", - pkt->cmd.config_addresses_req.num_address_ops, - pkt->cmd.config_addresses_req.lan_switch_num); - for (i = 0; (i < pkt->cmd.config_addresses_req.num_address_ops) - && (i < 16); i++) { - printk(KERN_INFO - " list_address_ops[%u].index" - " = %u\n", - i, - be16_to_cpu(pkt->cmd.config_addresses_req. - list_address_ops[i].index)); - switch (pkt->cmd.config_addresses_req. - list_address_ops[i].operation) { + while (i < num_address_ops && i < 16) { + printk(KERN_INFO " list_address_ops[%u].index" + " = %u\n", i, be16_to_cpu(list->index)); + switch (list->operation) { case VNIC_OP_GET_ENTRY: - printk(KERN_INFO - " list_address_ops[%u]." - "operation = VNIC_OP_GET_ENTRY\n", - i); + printk(KERN_INFO " list_address_ops[%u]." + "operation = VNIC_OP_GET_ENTRY\n", i); break; case VNIC_OP_SET_ENTRY: - printk(KERN_INFO - " list_address_ops[%u]." - "operation = VNIC_OP_SET_ENTRY\n", - i); + printk(KERN_INFO " list_address_ops[%u]." + "operation = VNIC_OP_SET_ENTRY\n", i); break; default: - printk(KERN_INFO - " list_address_ops[%u]." - "operation = UNKNOWN(%d)\n", - i, - pkt->cmd.config_addresses_req. - list_address_ops[i].operation); + printk(KERN_INFO " list_address_ops[%u]." + "operation = UNKNOWN(%d)\n", i, + list->operation); break; } + printk(KERN_INFO " list_address_ops[%u].valid" + " = %u\n", i, list->valid); + printk(KERN_INFO " list_address_ops[%u].address" + " = %02x:%02x:%02x:%02x:%02x:%02x\n", i, + list->address[0], list->address[1], + list->address[2], list->address[3], + list->address[4], list->address[5]); + printk(KERN_INFO " list_address_ops[%u].vlan" + " = %u\n", i, be16_to_cpu(list->vlan)); + if (mgidoff) { + printk(KERN_INFO + " list_address_ops[%u].mgid" + " = " VNIC_GID_FMT "\n", i, + VNIC_GID_RAW_ARG((char *)list + mgidoff)); + list = (struct vnic_address_op *) + ((char *)list + sizeof(struct vnic_address_op2)); + } else + list = (struct vnic_address_op *) + ((char *)list + sizeof(struct vnic_address_op)); + i++; + } +} + +static void control_log_config_addrs_pkt(struct vnic_control_packet *pkt, + u8 addresses2) +{ + struct vnic_address_op *list; + int no_address_ops; + + if (addresses2) printk(KERN_INFO - " list_address_ops[%u].valid" - " = %u\n", - i, - pkt->cmd.config_addresses_req. - list_address_ops[i].valid); - printk(KERN_INFO - " list_address_ops[%u].address" - " = %02x:%02x:%02x:%02x:%02x:%02x\n", - i, - pkt->cmd.config_addresses_req. - list_address_ops[i].address[0], - pkt->cmd.config_addresses_req. - list_address_ops[i].address[1], - pkt->cmd.config_addresses_req. - list_address_ops[i].address[2], - pkt->cmd.config_addresses_req. - list_address_ops[i].address[3], - pkt->cmd.config_addresses_req. - list_address_ops[i].address[4], - pkt->cmd.config_addresses_req. - list_address_ops[i].address[5]); + " pkt_cmd = CMD_CONFIG_ADDRESSES2\n"); + else printk(KERN_INFO - " list_address_ops[%u].vlan" - " = %u\n", - i, - be16_to_cpu(pkt->cmd.config_addresses_req. - list_address_ops[i].vlan)); + " pkt_cmd = CMD_CONFIG_ADDRESSES\n"); + printk(KERN_INFO " pkt_seq_num = %u," + " pkt_retry_count = %u\n", + pkt->hdr.pkt_seq_num, pkt->hdr.pkt_retry_count); + if (addresses2) { + printk(KERN_INFO " num_address_ops = %x," + " lan_switch_num = %d\n", + pkt->cmd.config_addresses_req2.num_address_ops, + pkt->cmd.config_addresses_req2.lan_switch_num); + list = (struct vnic_address_op *) + pkt->cmd.config_addresses_req2.list_address_ops; + no_address_ops = pkt->cmd.config_addresses_req2.num_address_ops; + print_config_addr(list, no_address_ops, + offsetof(struct vnic_address_op2, mgid)); + } else { + printk(KERN_INFO " num_address_ops = %x," + " lan_switch_num = %d\n", + pkt->cmd.config_addresses_req.num_address_ops, + pkt->cmd.config_addresses_req.lan_switch_num); + list = pkt->cmd.config_addresses_req.list_address_ops; + no_address_ops = pkt->cmd.config_addresses_req.num_address_ops; + print_config_addr(list, no_address_ops, 0); } - } static void control_log_exch_pools_pkt(struct vnic_control_packet *pkt) @@ -1999,7 +2228,10 @@ static void control_log_control_packet(struct vnic_control_packet *pkt) control_log_exch_pools_pkt(pkt); break; case CMD_CONFIG_ADDRESSES: - control_log_config_addrs_pkt(pkt); + control_log_config_addrs_pkt(pkt, 0); + break; + case CMD_CONFIG_ADDRESSES2: + control_log_config_addrs_pkt(pkt, 1); break; case CMD_CONFIG_LINK: control_log_config_link_pkt(pkt); diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_control.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_control.h index e3ab27a..77307da 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_control.h +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_control.h @@ -48,37 +48,38 @@ enum control_timer_state { }; enum control_request_state { - REQ_INACTIVE, /* quiet state, all previous operations done - * response is NULL - * last_cmd = CMD_INVALID - * timer_state = IDLE - */ - REQ_POSTED, /* REQ put on send Q - * response is NULL - * last_cmd = command issued - * timer_state = ACTIVE - */ - REQ_SENT, /* Send completed for REQ - * response is NULL - * last_cmd = command issued - * timer_state = ACTIVE - */ + REQ_INACTIVE, /* quiet state, all previous operations done + * response is NULL + * last_cmd = CMD_INVALID + * timer_state = IDLE + */ + REQ_POSTED, /* REQ put on send Q + * response is NULL + * last_cmd = command issued + * timer_state = ACTIVE + */ + REQ_SENT, /* Send completed for REQ + * response is NULL + * last_cmd = command issued + * timer_state = ACTIVE + */ RSP_RECEIVED, /* Received Resp, but no Send completion yet - * response is response buffer received - * last_cmd = command issued - * timer_state = ACTIVE - */ - REQ_COMPLETED, /* all processing for REQ completed, ready to be gotten - * response is response buffer received - * last_cmd = command issued - * timer_state = ACTIVE - */ - REQ_FAILED, /* processing of REQ/RSP failed. - * response is NULL - * last_cmd = CMD_INVALID - * timer_state = IDLE or EXPIRED - * viport has been moved to error state to force recovery - */ + * response is response buffer received + * last_cmd = command issued + * timer_state = ACTIVE + */ + REQ_COMPLETED, /* all processing for REQ completed, ready to be gotten + * response is response buffer received + * last_cmd = command issued + * timer_state = ACTIVE + */ + REQ_FAILED, /* processing of REQ/RSP failed. + * response is NULL + * last_cmd = CMD_INVALID + * timer_state = IDLE or EXPIRED + * viport has been moved to error state to force + * recovery + */ }; struct control { @@ -126,8 +127,8 @@ void control_cleanup(struct control *control); void control_process_async(struct control *control); int control_init_vnic_req(struct control *control); -int control_init_vnic_rsp(struct control *control, u32 * features, - u8 * mac_address, u16 * num_addrs, u16 * vlan); +int control_init_vnic_rsp(struct control *control, u32 *features, + u8 *mac_address, u16 *num_addrs, u16 *vlan); int control_config_data_path_req(struct control *control, u64 path_id, struct vnic_recv_pool_config *host, @@ -143,15 +144,15 @@ int control_config_data_path_rsp(struct control *control, int control_exchange_pools_req(struct control *control, u64 addr, u32 rkey); int control_exchange_pools_rsp(struct control *control, - u64 * addr, u32 * rkey); + u64 *addr, u32 *rkey); int control_config_link_req(struct control *control, u16 flags, u16 mtu); int control_config_link_rsp(struct control *control, - u16 * flags, u16 * mtu); + u16 *flags, u16 *mtu); int control_config_addrs_req(struct control *control, - struct vnic_address_op *addrs, u16 num); + struct vnic_address_op2 *addrs, u16 num); int control_config_addrs_rsp(struct control *control); int control_report_statistics_req(struct control *control); @@ -164,16 +165,16 @@ int control_heartbeat_rsp(struct control *control); int control_reset_req(struct control *control); int control_reset_rsp(struct control *control); +#define control_packet(io) \ + ((struct vnic_control_packet *)(io)->virtual_addr) -#define control_packet(io) \ - (struct vnic_control_packet *)(io)->virtual_addr #define control_is_connected(control) \ (vnic_ib_conn_connected(&((control)->ib_conn))) #define control_last_req(control) control_packet(&(control)->send_io) -#define control_features(control) (control)->features_supported +#define control_features(control) ((control)->features_supported) #define control_get_mac_address(control,addr) \ - memcpy(addr,(control)->lan_switch.hw_mac_address, ETH_ALEN) + memcpy(addr, (control)->lan_switch.hw_mac_address, ETH_ALEN) #endif /* VNIC_CONTROL_H_INCLUDED */ diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_control_pkt.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_control_pkt.h index 6e875a8..1fc62fb 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_control_pkt.h +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_control_pkt.h @@ -34,6 +34,7 @@ #define VNIC_CONTROL_PKT_H_INCLUDED #include <linux/utsname.h> +#include <rdma/ib_verbs.h> #define VNIC_MAX_NODENAME_LEN 64 @@ -42,6 +43,8 @@ struct vnic_connection_data { u8 vnic_instance; u8 path_num; u8 nodename[VNIC_MAX_NODENAME_LEN + 1]; + u8 reserved; /* for alignment */ + __be32 features_supported; }; struct vnic_control_header { @@ -72,7 +75,8 @@ enum { CMD_CLEAR_STATISTICS = 7, CMD_REPORT_STATUS = 8, CMD_RESET = 9, - CMD_HEARTBEAT = 10 + CMD_HEARTBEAT = 10, + CMD_CONFIG_ADDRESSES2 = 11, }; /* pkt_cmd CMD_INIT_VNIC, pkt_type TYPE_REQ data format */ @@ -119,7 +123,13 @@ enum { VNIC_FEAT_FCS_PROPAGATE = 0x0800, VNIC_FEAT_PF_KICK = 0x1000, VNIC_FEAT_PF_FORCE_ROUTE = 0x2000, - VNIC_FEAT_CHASH_OFFLOAD = 0x4000 + VNIC_FEAT_CHASH_OFFLOAD = 0x4000, + /* host send with immediate data */ + VNIC_FEAT_RDMA_IMMED = 0x8000, + /* host ignore inbound PF_VLAN_INSERT flag */ + VNIC_FEAT_IGNORE_VLAN = 0x10000, + /* host supports IB multicast for inbound Ethernet mcast traffic */ + VNIC_FEAT_INBOUND_IB_MC = 0x20000, }; /* pkt_cmd CMD_CONFIG_DATA_PATH subdata format */ @@ -158,6 +168,17 @@ struct vnic_address_op { __be16 vlan; }; +/* pkt_cmd CMD_CONFIG_ADDRESSES2 subdata format */ +struct vnic_address_op2 { + __be16 index; + u8 operation; + u8 valid; + u8 address[6]; + __be16 vlan; + u32 reserved; /* for alignment */ + union ib_gid mgid; /* valid in rsp only if both ends support mcast */ +}; + /* operation values */ enum { VNIC_OP_SET_ENTRY = 0x01, @@ -171,6 +192,16 @@ struct vnic_cmd_config_addresses { struct vnic_address_op list_address_ops[1]; }; +/* pkt_cmd CMD_CONFIG_ADDRESSES2 data format */ +struct vnic_cmd_config_addresses2 { + u8 num_address_ops; + u8 lan_switch_num; + u8 reserved1; + u8 reserved2; + u8 reserved3; + struct vnic_address_op2 list_address_ops[1]; +}; + /* CMD_CONFIG_LINK data format */ struct vnic_cmd_config_link { u8 cmd_flags; @@ -178,6 +209,9 @@ struct vnic_cmd_config_link { __be16 mtu_size; __be16 default_vlan; u8 hw_mac_address[6]; + u32 reserved; /* for alignment */ + /* valid in rsp only if both ends support mcast */ + union ib_gid allmulti_mgid; }; /* cmd_flags values */ @@ -275,7 +309,9 @@ struct vnic_control_packet { struct vnic_cmd_exchange_pools exchange_pools_req; struct vnic_cmd_exchange_pools exchange_pools_rsp; struct vnic_cmd_config_addresses config_addresses_req; + struct vnic_cmd_config_addresses2 config_addresses_req2; struct vnic_cmd_config_addresses config_addresses_rsp; + struct vnic_cmd_config_addresses2 config_addresses_rsp2; struct vnic_cmd_config_link config_link_req; struct vnic_cmd_config_link config_link_rsp; struct vnic_cmd_report_stats_req report_statistics_req; @@ -290,4 +326,43 @@ struct vnic_control_packet { } cmd; }; +union ib_gid_cpu { + u8 raw[16]; + struct { + u64 subnet_prefix; + u64 interface_id; + } global; +}; + +static inline void bswap_ib_gid(union ib_gid *mgid1, union ib_gid_cpu *mgid2) +{ + /* swap hi & low */ + __be64 low = mgid1->global.subnet_prefix; + mgid2->global.subnet_prefix = be64_to_cpu(mgid1->global.interface_id); + mgid2->global.interface_id = be64_to_cpu(low); +} + +#define VNIC_GID_FMT "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x" + +#define VNIC_GID_RAW_ARG(gid) be16_to_cpu(*(__be16 *)&(gid)[0]), \ + be16_to_cpu(*(__be16 *)&(gid)[2]), \ + be16_to_cpu(*(__be16 *)&(gid)[4]), \ + be16_to_cpu(*(__be16 *)&(gid)[6]), \ + be16_to_cpu(*(__be16 *)&(gid)[8]), \ + be16_to_cpu(*(__be16 *)&(gid)[10]), \ + be16_to_cpu(*(__be16 *)&(gid)[12]), \ + be16_to_cpu(*(__be16 *)&(gid)[14]) + + +/* These defines are used to figure out how many address entries can be passed + * in config_addresses request. + */ +#define MAX_CONFIG_ADDR_ENTRIES \ + ((VNIC_MAX_CONTROLDATASZ - (sizeof(struct vnic_cmd_config_addresses) \ + - sizeof(struct vnic_address_op)))/sizeof(struct vnic_address_op)) +#define MAX_CONFIG_ADDR_ENTRIES2 \ + ((VNIC_MAX_CONTROLDATASZ - (sizeof(struct vnic_cmd_config_addresses2) \ + - sizeof(struct vnic_address_op2)))/sizeof(struct vnic_address_op2)) + + #endif /* VNIC_CONTROL_PKT_H_INCLUDED */ diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_data.c b/drivers/infiniband/ulp/qlgc_vnic/vnic_data.c index 66fc15d..a5f20d2 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_data.c +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_data.c @@ -38,7 +38,6 @@ #include "vnic_util.h" #include "vnic_viport.h" #include "vnic_main.h" -#include "vnic_config.h" #include "vnic_data.h" #include "vnic_trailer.h" #include "vnic_stats.h" @@ -46,20 +45,25 @@ static void data_received_kick(struct io *io); static void data_xmit_complete(struct io *io); -u32 min_rcv_skb = 60; +static void mc_data_recv_routine(struct io *io); +static void mc_data_post_recvs(struct mc_data *mc_data); +static void mc_data_recv_to_skbuff(struct viport *viport, struct sk_buff *skb, + struct viport_trailer *trailer); + +static u32 min_rcv_skb = 60; module_param(min_rcv_skb, int, 0444); MODULE_PARM_DESC(min_rcv_skb, "Packets of size (in bytes) less than" " or equal this value will be copied during receive." " Default 60"); -u32 min_xmt_skb = 60; +static u32 min_xmt_skb = 60; module_param(min_xmt_skb, int, 0444); MODULE_PARM_DESC(min_xmit_skb, "Packets of size (in bytes) less than" " or equal to this value will be copied during transmit." "Default 60"); -int data_init(struct data * data, struct viport * viport, - struct data_config * config, struct ib_pd *pd) +int data_init(struct data *data, struct viport *viport, + struct data_config *config, struct ib_pd *pd) { DATA_FUNCTION("data_init()\n"); @@ -68,6 +72,8 @@ int data_init(struct data * data, struct viport * viport, data->ib_conn.viport = viport; data->ib_conn.ib_config = &config->ib_config; data->ib_conn.state = IB_CONN_UNINITTED; + data->ib_conn.callback_thread = NULL; + data->ib_conn.callback_thread_end = 0; if ((min_xmt_skb < 60) || (min_xmt_skb > 9000)) { DATA_ERROR("min_xmt_skb (%d) must be between 60 and 9000\n", @@ -95,12 +101,15 @@ int data_init(struct data * data, struct viport * viport, if (IS_ERR(data->ib_conn.cm_id)) { DATA_ERROR("creating data CM ID failed\n"); - goto destroy_conn; + goto dereg_mr; } return 0; +dereg_mr: + ib_dereg_mr(data->mr); destroy_conn: + vnic_completion_cleanup(&data->ib_conn); ib_destroy_qp(data->ib_conn.qp); ib_destroy_cq(data->ib_conn.cq); failure: @@ -110,6 +119,7 @@ failure: static void data_post_recvs(struct data *data) { unsigned long flags; + int i = 0; DATA_FUNCTION("data_post_recvs()\n"); spin_lock_irqsave(&data->recv_ios_lock, flags); @@ -124,13 +134,15 @@ static void data_post_recvs(struct data *data) viport_failure(data->parent); return; } + i++; spin_lock_irqsave(&data->recv_ios_lock, flags); } spin_unlock_irqrestore(&data->recv_ios_lock, flags); + DATA_INFO("data posted %d %p\n", i, &data->recv_ios); } -static void data_init_pool_work_reqs(struct data * data, - struct recv_io * recv_io) +static void data_init_pool_work_reqs(struct data *data, + struct recv_io *recv_io) { struct recv_pool *recv_pool = &data->recv_pool; struct xmit_pool *xmit_pool = &data->xmit_pool; @@ -197,7 +209,7 @@ static void data_init_pool_work_reqs(struct data * data, xmit_pool->rdma_addr = xmit_pool->buf_pool_dma; } -static void data_init_free_bufs_swrs(struct data * data) +static void data_init_free_bufs_swrs(struct data *data) { struct rdma_io *rdma_io; struct send_io *send_io; @@ -231,7 +243,7 @@ static void data_init_free_bufs_swrs(struct data * data) send_io->io.type = SEND; } -static int data_init_buf_pools(struct data * data) +static int data_init_buf_pools(struct data *data) { struct recv_pool *recv_pool = &data->recv_pool; struct xmit_pool *xmit_pool = &data->xmit_pool; @@ -318,7 +330,7 @@ failure: return -1; } -static void data_init_xmit_pool(struct data * data) +static void data_init_xmit_pool(struct data *data) { struct xmit_pool *xmit_pool = &data->xmit_pool; @@ -333,18 +345,23 @@ static void data_init_xmit_pool(struct data * data) xmit_pool->num_xmit_bufs = xmit_pool->notify_bundle * 2; xmit_pool->next_xmit_buf = 0; xmit_pool->last_comp_buf = xmit_pool->num_xmit_bufs - 1; + /* This assumes that data_init_recv_pool has been called + * before. + */ + data->max_mtu = MAX_PAYLOAD(min((data)->recv_pool.buffer_sz, + (data)->xmit_pool.buffer_sz)) - VLAN_ETH_HLEN; xmit_pool->kick_count = 0; xmit_pool->kick_byte_count = 0; xmit_pool->send_kicks = be32_to_cpu(data-> - eioc_pool_parms.num_recv_pool_entries_before_kick) + eioc_pool_parms.num_recv_pool_entries_before_kick) || be32_to_cpu(data-> - eioc_pool_parms.num_recv_pool_bytes_before_kick); + eioc_pool_parms.num_recv_pool_bytes_before_kick); xmit_pool->kick_bundle = be32_to_cpu(data-> - eioc_pool_parms.num_recv_pool_entries_before_kick); + eioc_pool_parms.num_recv_pool_entries_before_kick); xmit_pool->kick_byte_bundle = be32_to_cpu(data-> eioc_pool_parms.num_recv_pool_bytes_before_kick); @@ -355,7 +372,7 @@ static void data_init_xmit_pool(struct data * data) BUFFER_SIZE(min_xmt_skb) * xmit_pool->num_xmit_bufs; } -static void data_init_recv_pool(struct data * data) +static void data_init_recv_pool(struct data *data) { struct recv_pool *recv_pool = &data->recv_pool; @@ -380,16 +397,21 @@ static void data_init_recv_pool(struct data * data) recv_pool->kick_on_free = 0; } -int data_connect(struct data * data) +int data_connect(struct data *data) { struct xmit_pool *xmit_pool = &data->xmit_pool; struct recv_pool *recv_pool = &data->recv_pool; - struct recv_io * recv_io; + struct recv_io *recv_io; unsigned int sz; struct viport *viport = data->parent; DATA_FUNCTION("data_connect()\n"); + /* Do not interchange the order of the functions + * called below as this will affect the MAX MTU + * calculation + */ + data_init_recv_pool(data); data_init_xmit_pool(data); @@ -470,8 +492,9 @@ static void data_add_free_buffer(struct data *data, int index, bpe = &pool->buf_pool[index]; bpe->rkey = cpu_to_be32(data->mr->rkey); vaddr_dma = ib_dma_map_single(data->parent->config->ibdev, - rdma_dest->data, pool->buffer_sz, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(data->parent->config->ibdev, vaddr_dma)) { + rdma_dest->data, pool->buffer_sz, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(data->parent->config->ibdev, vaddr_dma)) { DATA_ERROR("rdma_dest->data dma map error\n"); goto failure; } @@ -506,10 +529,8 @@ static void data_alloc_buffers(struct data *data, int initial_allocation) GFP_KERNEL); else skb = dev_alloc_skb(pool->buffer_sz + 2); - if (!skb) { - DATA_ERROR("failed to alloc skb\n"); + if (!skb) break; - } skb_reserve(skb, 2); skb_put(skb, pool->buffer_sz); rdma_dest->skb = skb; @@ -570,9 +591,8 @@ static void data_send_free_recv_buffers(struct data *data) next_increment = num_to_send + pool->sz_free_bundle; if ((next_increment <= pool->num_free_bufs) && (pool->next_free_buf + next_increment <= - pool->eioc_pool_sz)) { + pool->eioc_pool_sz)) continue; - } offset = pool->next_free_buf * sizeof(struct buff_pool_entry); @@ -586,7 +606,7 @@ static void data_send_free_recv_buffers(struct data *data) &data->free_bufs_io.io)) { DATA_ERROR("failed to post send\n"); viport_failure(data->parent); - break; + return; } INC(pool->next_free_buf, num_to_send, pool->eioc_pool_sz); pool->num_free_bufs -= num_to_send; @@ -599,8 +619,27 @@ static void data_send_free_recv_buffers(struct data *data) data_send_kick_message(data); } if (pool->num_posted_bufs == 0) { - DATA_ERROR("%s: unable to allocate receive buffers\n", - config_viport_name(data->parent->config)); + struct vnic *vnic = data->parent->vnic; + unsigned long flags; + + spin_lock_irqsave(&vnic->current_path_lock, flags); + if (vnic->current_path == &vnic->primary_path) { + spin_unlock_irqrestore(&vnic->current_path_lock, flags); + DATA_ERROR("%s: primary path: " + "unable to allocate receive buffers\n", + vnic->config->name); + } else { + if (vnic->current_path == &vnic->secondary_path) { + spin_unlock_irqrestore(&vnic->current_path_lock, + flags); + DATA_ERROR("%s: secondary path: " + "unable to allocate receive buffers\n", + vnic->config->name); + } else + spin_unlock_irqrestore(&vnic->current_path_lock, + flags); + } + data->ib_conn.state = IB_CONN_ERRORED; viport_failure(data->parent); } } @@ -629,6 +668,12 @@ void data_disconnect(struct data *data) data->kick_timer_on = 0; } + if (ib_send_cm_dreq(data->ib_conn.cm_id, NULL, 0)) + DATA_ERROR("data CM DREQ sending failed\n"); + data->ib_conn.state = IB_CONN_DISCONNECTED; + + vnic_completion_cleanup(&data->ib_conn); + for (i = 0; i < xmit_pool->num_xmit_bufs; i++) { if (xmit_pool->xmit_bufs[i].skb) dev_kfree_skb(xmit_pool->xmit_bufs[i].skb); @@ -672,10 +717,14 @@ void data_disconnect(struct data *data) void data_cleanup(struct data *data) { - if (ib_send_cm_dreq(data->ib_conn.cm_id, NULL, 0)) - printk(KERN_DEBUG "data CM DREQ sending failed\n"); - ib_destroy_cm_id(data->ib_conn.cm_id); + + /* Completion callback cleanup called again. + * This is to cleanup the threads in case there is an + * error before state LINK_DATACONNECT due to which + * data_disconnect is not called. + */ + vnic_completion_cleanup(&data->ib_conn); ib_destroy_qp(data->ib_conn.qp); ib_destroy_cq(data->ib_conn.cq); ib_dereg_mr(data->mr); @@ -775,8 +824,8 @@ static void data_rdma_packet(struct data *data, struct buff_pool_entry *bpe, swr->sg_list[0].lkey = data->mr->lkey; skb_data_dma = ib_dma_map_single(viport->config->ibdev, - skb->data, skb->len, - DMA_TO_DEVICE); + skb->data, skb->len, + DMA_TO_DEVICE); if (ib_dma_mapping_error(viport->config->ibdev, skb_data_dma)) { DATA_ERROR("skb data dma map error\n"); @@ -804,6 +853,15 @@ static void data_rdma_packet(struct data *data, struct buff_pool_entry *bpe, xmit_pool->buf_pool_dma, xmit_pool->buf_pool_len, DMA_TO_DEVICE); + /* If VNIC_FEAT_RDMA_IMMED is supported then change the work request + * opcode to IB_WR_RDMA_WRITE_WITH_IMM + */ + + if (data->parent->features_supported & VNIC_FEAT_RDMA_IMMED) { + swr->ex.imm_data = 0; + swr->opcode = IB_WR_RDMA_WRITE_WITH_IMM; + } + data->xmit_pool.notify_count++; if (data->xmit_pool.notify_count >= data->xmit_pool.notify_bundle) { data->xmit_pool.notify_count = 0; @@ -867,7 +925,7 @@ int data_xmit_packet(struct data *data, struct sk_buff *skb) if (skb->sk) trailer->connection_hash_and_valid = 0x40 | ((be16_to_cpu(inet_sk(skb->sk)->sport) + - be16_to_cpu( inet_sk(skb->sk)->dport)) & 0x3f); + be16_to_cpu(inet_sk(skb->sk)->dport)) & 0x3f); trailer->connection_hash_and_valid |= CHV_VALID; @@ -876,6 +934,7 @@ int data_xmit_packet(struct data *data, struct sk_buff *skb) trailer->vlan = *(__be16 *) (skb->data + 14); memmove(skb->data + 4, skb->data, 12); skb_pull(skb, 4); + sz -= 4; trailer->pkt_flags |= PF_VLAN_INSERT; } if (last) @@ -926,7 +985,7 @@ int data_xmit_packet(struct data *data, struct sk_buff *skb) return 0; } -void data_check_xmit_buffers(struct data *data) +static void data_check_xmit_buffers(struct data *data) { struct xmit_pool *pool = &data->xmit_pool; unsigned long flags; @@ -1006,7 +1065,8 @@ static struct sk_buff *data_recv_to_skbuff(struct data *data, else skb->ip_summed = CHECKSUM_NONE; - if (trailer->pkt_flags & PF_VLAN_INSERT) { + if ((trailer->pkt_flags & PF_VLAN_INSERT) && + !(data->parent->features_supported & VNIC_FEAT_IGNORE_VLAN)) { u8 *rv; rv = skb_push(skb, 4); @@ -1063,8 +1123,9 @@ static int data_incoming_recv(struct data *data) DMA_TO_DEVICE); bpe->valid = 0; - ib_dma_sync_single_for_device(data->parent->config->ibdev, pool->buf_pool_dma, - pool->buf_pool_len, DMA_TO_DEVICE); + ib_dma_sync_single_for_device(data->parent->config->ibdev, + pool->buf_pool_dma, pool->buf_pool_len, + DMA_TO_DEVICE); INC(pool->next_full_buf, 1, pool->eioc_pool_sz); pool->num_posted_bufs--; @@ -1118,3 +1179,314 @@ static void data_xmit_complete(struct io *io) data_check_xmit_buffers(data); } + +static int mc_data_alloc_skb(struct ud_recv_io *recv_io, u32 len, + int initial_allocation) +{ + struct sk_buff *skb; + struct mc_data *mc_data = &recv_io->io.viport->mc_data; + + DATA_FUNCTION("mc_data_alloc_skb\n"); + if (initial_allocation) + skb = alloc_skb(len, GFP_KERNEL); + else + skb = dev_alloc_skb(len); + if (!skb) { + DATA_ERROR("failed to alloc MULTICAST skb\n"); + return -1; + } + skb_put(skb, len); + recv_io->skb = skb; + + recv_io->skb_data_dma = ib_dma_map_single( + recv_io->io.viport->config->ibdev, + skb->data, skb->len, + DMA_FROM_DEVICE); + + if (ib_dma_mapping_error(recv_io->io.viport->config->ibdev, + recv_io->skb_data_dma)) { + DATA_ERROR("skb data dma map error\n"); + dev_kfree_skb(skb); + return -1; + } + + recv_io->list[0].addr = recv_io->skb_data_dma; + recv_io->list[0].length = sizeof(struct ib_grh); + recv_io->list[0].lkey = mc_data->mr->lkey; + + recv_io->list[1].addr = recv_io->skb_data_dma + sizeof(struct ib_grh); + recv_io->list[1].length = len - sizeof(struct ib_grh); + recv_io->list[1].lkey = mc_data->mr->lkey; + + recv_io->io.rwr.wr_id = (u64)&recv_io->io; + recv_io->io.rwr.sg_list = recv_io->list; + recv_io->io.rwr.num_sge = 2; + recv_io->io.rwr.next = NULL; + + return 0; +} + +static int mc_data_alloc_buffers(struct mc_data *mc_data) +{ + unsigned int i, num; + struct ud_recv_io *bufs = NULL, *recv_io; + + DATA_FUNCTION("mc_data_alloc_buffers\n"); + if (!mc_data->skb_len) { + unsigned int len; + /* align multicast msg buffer on viport_trailer boundary */ + len = (MCAST_MSG_SIZE + VIPORT_TRAILER_ALIGNMENT - 1) & + (~((unsigned int)VIPORT_TRAILER_ALIGNMENT - 1)); + /* + * Add size of grh and trailer - + * note, we don't need a + 4 for vlan because we have room in + * netbuf for grh & trailer and we'll strip them both, so there + * will be room enough to handle the 4 byte insertion for vlan. + */ + len += sizeof(struct ib_grh) + + sizeof(struct viport_trailer); + mc_data->skb_len = len; + DATA_INFO("mc_data->skb_len %d (sizes:%d %d)\n", + len, (int)sizeof(struct ib_grh), + (int)sizeof(struct viport_trailer)); + } + mc_data->recv_len = sizeof(struct ud_recv_io) * mc_data->num_recvs; + bufs = kmalloc(mc_data->recv_len, GFP_KERNEL); + if (!bufs) { + DATA_ERROR("failed to allocate MULTICAST buffers size:%d\n", + mc_data->recv_len); + return -1; + } + DATA_INFO("allocated num_recvs:%d recv_len:%d \n", + mc_data->num_recvs, mc_data->recv_len); + for (num = 0; num < mc_data->num_recvs; num++) { + recv_io = &bufs[num]; + recv_io->len = mc_data->skb_len; + recv_io->io.type = RECV_UD; + recv_io->io.viport = mc_data->parent; + recv_io->io.routine = mc_data_recv_routine; + + if (mc_data_alloc_skb(recv_io, mc_data->skb_len, 1)) { + for (i = 0; i < num; i++) { + recv_io = &bufs[i]; + ib_dma_unmap_single(recv_io->io.viport->config->ibdev, + recv_io->skb_data_dma, + recv_io->skb->len, + DMA_FROM_DEVICE); + dev_kfree_skb(recv_io->skb); + } + kfree(bufs); + return -1; + } + list_add_tail(&recv_io->io.list_ptrs, + &mc_data->avail_recv_ios_list); + } + mc_data->recv_ios = bufs; + return 0; +} + +void vnic_mc_data_cleanup(struct mc_data *mc_data) +{ + unsigned int num; + + DATA_FUNCTION("vnic_mc_data_cleanup()\n"); + vnic_completion_cleanup(&mc_data->ib_conn); + if (!IS_ERR(mc_data->ib_conn.qp)) { + ib_destroy_qp(mc_data->ib_conn.qp); + mc_data->ib_conn.qp = (struct ib_qp *)ERR_PTR(-EINVAL); + } + if (!IS_ERR(mc_data->ib_conn.cq)) { + ib_destroy_cq(mc_data->ib_conn.cq); + mc_data->ib_conn.cq = (struct ib_cq *)ERR_PTR(-EINVAL); + } + if (mc_data->recv_ios) { + for (num = 0; num < mc_data->num_recvs; num++) { + if (mc_data->recv_ios[num].skb) + dev_kfree_skb(mc_data->recv_ios[num].skb); + mc_data->recv_ios[num].skb = NULL; + } + kfree(mc_data->recv_ios); + mc_data->recv_ios = (struct ud_recv_io *)NULL; + } + if (mc_data->mr) { + ib_dereg_mr(mc_data->mr); + mc_data->mr = (struct ib_mr *)NULL; + } + DATA_FUNCTION("vnic_mc_data_cleanup done\n"); + +} + +int mc_data_init(struct mc_data *mc_data, struct viport *viport, + struct data_config *config, struct ib_pd *pd) +{ + DATA_FUNCTION("mc_data_init()\n"); + + mc_data->num_recvs = viport->data.config->num_recvs; + + INIT_LIST_HEAD(&mc_data->avail_recv_ios_list); + spin_lock_init(&mc_data->recv_lock); + + mc_data->parent = viport; + mc_data->config = config; + + mc_data->ib_conn.cm_id = NULL; + mc_data->ib_conn.viport = viport; + mc_data->ib_conn.ib_config = &config->ib_config; + mc_data->ib_conn.state = IB_CONN_UNINITTED; + mc_data->ib_conn.callback_thread = NULL; + mc_data->ib_conn.callback_thread_end = 0; + + if (vnic_ib_mc_init(mc_data, viport, pd, + &config->ib_config)) { + DATA_ERROR("vnic_ib_mc_init failed\n"); + goto failure; + } + mc_data->mr = ib_get_dma_mr(pd, + IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); + if (IS_ERR(mc_data->mr)) { + DATA_ERROR("failed to register memory for" + " mc_data connection\n"); + goto destroy_conn; + } + + if (mc_data_alloc_buffers(mc_data)) + goto dereg_mr; + + mc_data_post_recvs(mc_data); + if (vnic_ib_mc_mod_qp_to_rts(mc_data->ib_conn.qp)) + goto dereg_mr; + + return 0; + +dereg_mr: + ib_dereg_mr(mc_data->mr); + mc_data->mr = (struct ib_mr *)NULL; +destroy_conn: + vnic_completion_cleanup(&mc_data->ib_conn); + ib_destroy_qp(mc_data->ib_conn.qp); + mc_data->ib_conn.qp = (struct ib_qp *)ERR_PTR(-EINVAL); + ib_destroy_cq(mc_data->ib_conn.cq); + mc_data->ib_conn.cq = (struct ib_cq *)ERR_PTR(-EINVAL); +failure: + return -1; +} + +static void mc_data_post_recvs(struct mc_data *mc_data) +{ + unsigned long flags; + int i = 0; + DATA_FUNCTION("mc_data_post_recvs\n"); + spin_lock_irqsave(&mc_data->recv_lock, flags); + while (!list_empty(&mc_data->avail_recv_ios_list)) { + struct io *io = list_entry(mc_data->avail_recv_ios_list.next, + struct io, list_ptrs); + struct ud_recv_io *recv_io = + container_of(io, struct ud_recv_io, io); + list_del(&recv_io->io.list_ptrs); + spin_unlock_irqrestore(&mc_data->recv_lock, flags); + if (vnic_ib_mc_post_recv(mc_data, &recv_io->io)) { + viport_failure(mc_data->parent); + return; + } + spin_lock_irqsave(&mc_data->recv_lock, flags); + i++; + } + DATA_INFO("mcdata posted %d %p\n", i, &mc_data->avail_recv_ios_list); + spin_unlock_irqrestore(&mc_data->recv_lock, flags); +} + +static void mc_data_recv_routine(struct io *io) +{ + struct sk_buff *skb; + struct ib_grh *grh; + struct viport_trailer *trailer; + struct mc_data *mc_data; + unsigned long flags; + struct ud_recv_io *recv_io = container_of(io, struct ud_recv_io, io); + union ib_gid_cpu sgid; + + DATA_FUNCTION("mc_data_recv_routine\n"); + skb = recv_io->skb; + grh = (struct ib_grh *)skb->data; + mc_data = &recv_io->io.viport->mc_data; + + ib_dma_unmap_single(recv_io->io.viport->config->ibdev, + recv_io->skb_data_dma, recv_io->skb->len, + DMA_FROM_DEVICE); + + /* first - check if we've got our own mc packet */ + /* convert sgid from host to cpu form before comparing */ + bswap_ib_gid(&grh->sgid, &sgid); + if (cpu_to_be64(sgid.global.interface_id) == + io->viport->config->path_info.path.sgid.global.interface_id) { + DATA_ERROR("dropping - our mc packet\n"); + dev_kfree_skb(skb); + } else { + /* GRH is at head and trailer at end. Remove GRH from head. */ + trailer = (struct viport_trailer *) + (skb->data + recv_io->len - + sizeof(struct viport_trailer)); + skb_pull(skb, sizeof(struct ib_grh)); + if (trailer->connection_hash_and_valid & CHV_VALID) { + mc_data_recv_to_skbuff(io->viport, skb, trailer); + vnic_recv_packet(io->viport->vnic, io->viport->parent, + skb); + vnic_multicast_recv_pkt_stats(io->viport->vnic); + } else { + DATA_ERROR("dropping - no CHV_VALID in HashAndValid\n"); + dev_kfree_skb(skb); + } + } + recv_io->skb = NULL; + if (mc_data_alloc_skb(recv_io, mc_data->skb_len, 0)) + return; + + spin_lock_irqsave(&mc_data->recv_lock, flags); + list_add_tail(&recv_io->io.list_ptrs, &mc_data->avail_recv_ios_list); + spin_unlock_irqrestore(&mc_data->recv_lock, flags); + mc_data_post_recvs(mc_data); + return; +} + +static void mc_data_recv_to_skbuff(struct viport *viport, struct sk_buff *skb, + struct viport_trailer *trailer) +{ + u8 rx_chksum_flags = trailer->rx_chksum_flags; + + /* drop alignment bytes at start */ + skb_pull(skb, trailer->data_alignment_offset); + /* drop excess from end */ + skb_trim(skb, __be16_to_cpu(trailer->data_length)); + + if ((rx_chksum_flags & RX_CHKSUM_FLAGS_LOOPBACK) + || ((rx_chksum_flags & RX_CHKSUM_FLAGS_IP_CHECKSUM_SUCCEEDED) + && ((rx_chksum_flags & RX_CHKSUM_FLAGS_TCP_CHECKSUM_SUCCEEDED) + || (rx_chksum_flags & + RX_CHKSUM_FLAGS_UDP_CHECKSUM_SUCCEEDED)))) + skb->ip_summed = CHECKSUM_UNNECESSARY; + else + skb->ip_summed = CHECKSUM_NONE; + + if ((trailer->pkt_flags & PF_VLAN_INSERT) && + !(viport->features_supported & VNIC_FEAT_IGNORE_VLAN)) { + u8 *rv; + + /* insert VLAN id between source & length */ + DATA_INFO("VLAN adjustment\n"); + rv = skb_push(skb, 4); + memmove(rv, rv + 4, 12); + *(__be16 *) (rv + 12) = __constant_cpu_to_be16(ETH_P_8021Q); + if (trailer->pkt_flags & PF_PVID_OVERRIDDEN) + /* + * Indicates VLAN is 0 but we keep the protocol id. + */ + *(__be16 *) (rv + 14) = trailer->vlan & + __constant_cpu_to_be16(0xF000); + else + *(__be16 *) (rv + 14) = trailer->vlan; + DATA_INFO("vlan:%x\n", *(int *)(rv+14)); + } + + return; +} diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_data.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_data.h index 98ed90a..365251d 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_data.h +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_data.h @@ -47,7 +47,7 @@ struct rdma_dest { struct list_head list_ptrs; struct sk_buff *skb; u8 *data; - struct viport_trailer *trailer; + struct viport_trailer *trailer __attribute__((aligned(32))); }; struct buff_pool_entry { @@ -120,6 +120,7 @@ struct data { spinlock_t xmit_buf_lock; int kick_timer_on; int connected; + u16 max_mtu; struct timer_list kick_timer; struct completion done; #ifdef CONFIG_INFINIBAND_QLGC_VNIC_STATS @@ -137,6 +138,20 @@ struct data { #endif /* CONFIG_INFINIBAND_QLGC_VNIC_STATS */ }; +struct mc_data { + struct viport *parent; + struct data_config *config; + struct ib_mr *mr; + struct vnic_ib_conn ib_conn; + + u32 num_recvs; + u32 skb_len; + spinlock_t recv_lock; + int recv_len; + struct ud_recv_io *recv_ios; + struct list_head avail_recv_ios_list; +}; + int data_init(struct data *data, struct viport *viport, struct data_config *config, struct ib_pd *pd); @@ -150,27 +165,26 @@ void data_cleanup(struct data *data); #define data_is_connected(data) \ (vnic_ib_conn_connected(&((data)->ib_conn))) -#define data_path_id(data) (data)->config->path_id -#define data_eioc_pool(data) &(data)->eioc_pool_parms -#define data_host_pool(data) &(data)->host_pool_parms -#define data_eioc_pool_min(data) &(data)->config->eioc_min -#define data_host_pool_min(data) &(data)->config->host_min -#define data_eioc_pool_max(data) &(data)->config->eioc_max -#define data_host_pool_max(data) &(data)->config->host_max -#define data_local_pool_addr(data) (data)->xmit_pool.rdma_addr -#define data_local_pool_rkey(data) (data)->xmit_pool.rdma_rkey -#define data_remote_pool_addr(data) &(data)->recv_pool.eioc_rdma_addr -#define data_remote_pool_rkey(data) &(data)->recv_pool.eioc_rdma_rkey - -#define data_max_mtu(data) \ - MAX_PAYLOAD(min((data)->recv_pool.buffer_sz, \ - (data)->xmit_pool.buffer_sz)) - VLAN_ETH_HLEN +#define data_path_id(data) ((data)->config->path_id) +#define data_eioc_pool(data) (&(data)->eioc_pool_parms) +#define data_host_pool(data) (&(data)->host_pool_parms) +#define data_eioc_pool_min(data) (&(data)->config->eioc_min) +#define data_host_pool_min(data) (&(data)->config->host_min) +#define data_eioc_pool_max(data) (&(data)->config->eioc_max) +#define data_host_pool_max(data) (&(data)->config->host_max) +#define data_local_pool_addr(data) ((data)->xmit_pool.rdma_addr) +#define data_local_pool_rkey(data) ((data)->xmit_pool.rdma_rkey) +#define data_remote_pool_addr(data) (&(data)->recv_pool.eioc_rdma_addr) +#define data_remote_pool_rkey(data) (&(data)->recv_pool.eioc_rdma_rkey) + +#define data_max_mtu(data) ((data)->max_mtu) + #define data_len(data, trailer) be16_to_cpu(trailer->data_length) #define data_offset(data, trailer) \ - data->recv_pool.buffer_sz - sizeof(struct viport_trailer) \ - - ALIGN(data_len(data, trailer), VIPORT_TRAILER_ALIGNMENT) \ - + trailer->data_alignment_offset + ((data)->recv_pool.buffer_sz - sizeof(struct viport_trailer) \ + - ALIGN(data_len((data), (trailer)), VIPORT_TRAILER_ALIGNMENT) \ + + (trailer->data_alignment_offset)) /* the following macros manipulate ring buffer indexes. * the ring buffer size must be a power of 2. @@ -179,4 +193,14 @@ void data_cleanup(struct data *data); #define NEXT(index, size) ADD(index, 1, size) #define INC(index, increment, size) (index) = ADD(index, increment, size) +/* this is max multicast msg embedded will send */ +#define MCAST_MSG_SIZE \ + (2048 - sizeof(struct ib_grh) - sizeof(struct viport_trailer)) + +int mc_data_init(struct mc_data *mc_data, struct viport *viport, + struct data_config *config, + struct ib_pd *pd); + +void vnic_mc_data_cleanup(struct mc_data *mc_data); + #endif /* VNIC_DATA_H_INCLUDED */ diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.c b/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.c index 18e2a2a..a78156c 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.c +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.c @@ -34,9 +34,9 @@ #include <linux/random.h> #include <linux/netdevice.h> #include <linux/list.h> -#include <rdma/ib_cache.h> #include "vnic_util.h" +#include "vnic_data.h" #include "vnic_config.h" #include "vnic_ib.h" #include "vnic_viport.h" @@ -44,11 +44,15 @@ #include "vnic_main.h" #include "vnic_stats.h" -static int vnic_ib_inited = 0; -extern struct list_head vnic_list; - +static int vnic_ib_inited; static void vnic_add_one(struct ib_device *device); static void vnic_remove_one(struct ib_device *device); +static int vnic_defer_completion(void *ptr); + +static int vnic_ib_mc_init_qp(struct mc_data *mc_data, + struct vnic_ib_config *config, + struct ib_pd *pd, + struct viport_config *viport_config); static struct ib_client vnic_client = { .name = "vnic", @@ -56,14 +60,87 @@ static struct ib_client vnic_client = { .remove = vnic_remove_one }; -static struct ib_sa_client vnic_sa_client; +struct ib_sa_client vnic_sa_client; + +int vnic_ib_init(void) +{ + int ret = -1; + + IB_FUNCTION("vnic_ib_init()\n"); + + /* class has to be registered before + * calling ib_register_client() because, that call + * will trigger vnic_add_port() which will register + * class_device for the port with the parent class + * as vnic_class + */ + ret = class_register(&vnic_class); + if (ret) { + printk(KERN_ERR PFX "couldn't register class" + " infiniband_qlgc_vnic; error %d", ret); + goto out; + } + + ib_sa_register_client(&vnic_sa_client); + ret = ib_register_client(&vnic_client); + if (ret) { + printk(KERN_ERR PFX "couldn't register IB client;" + " error %d", ret); + goto err_ib_reg; + } + + interface_cdev.class_dev.class = &vnic_class; + snprintf(interface_cdev.class_dev.class_id, + BUS_ID_SIZE, "interfaces"); + init_completion(&interface_cdev.released); + ret = class_device_register(&interface_cdev.class_dev); + if (ret) { + printk(KERN_ERR PFX "couldn't register class interfaces;" + " error %d", ret); + goto err_class_dev; + } + ret = class_device_create_file(&interface_cdev.class_dev, + &class_device_attr_delete_vnic); + if (ret) { + printk(KERN_ERR PFX "couldn't create class file" + " 'delete_vnic'; error %d", ret); + goto err_class_file; + } + + ret = class_device_create_file(&interface_cdev.class_dev, + &class_device_attr_force_failover); + if (ret) { + printk(KERN_ERR PFX "couldn't create class file" + " 'force_failover'; error %d", ret); + goto err_force_failover_file; + } -static CLASS_DEVICE_ATTR(create_primary, S_IWUSR, NULL, - vnic_create_primary); -static CLASS_DEVICE_ATTR(create_secondary, S_IWUSR, NULL, - vnic_create_secondary); + ret = class_device_create_file(&interface_cdev.class_dev, + &class_device_attr_unfailover); + if (ret) { + printk(KERN_ERR PFX "couldn't create class file" + " 'unfailover'; error %d", ret); + goto err_unfailover_file; + } + vnic_ib_inited = 1; -static CLASS_DEVICE_ATTR(delete_vnic, S_IWUSR, NULL, vnic_delete); + return ret; +err_unfailover_file: + class_device_remove_file(&interface_cdev.class_dev, + &class_device_attr_force_failover); +err_force_failover_file: + class_device_remove_file(&interface_cdev.class_dev, + &class_device_attr_delete_vnic); +err_class_file: + class_device_unregister(&interface_cdev.class_dev); +err_class_dev: + ib_unregister_client(&vnic_client); +err_ib_reg: + ib_sa_unregister_client(&vnic_sa_client); + class_unregister(&vnic_class); +out: + return ret; +} static struct vnic_ib_port *vnic_add_port(struct vnic_ib_device *device, u8 port_num) @@ -83,14 +160,15 @@ static struct vnic_ib_port *vnic_add_port(struct vnic_ib_device *device, snprintf(port->cdev_info.class_dev.class_id, BUS_ID_SIZE, "vnic-%s-%d", device->dev->name, port_num); - if (class_device_register(&port->cdev_info.class_dev)) + if (class_device_register(&port->cdev_info.class_dev)) goto free_port; - if (class_device_create_file(&port->cdev_info.class_dev, - &class_device_attr_create_primary)) + if (class_device_create_file(&port->cdev_info.class_dev, + &class_device_attr_create_primary)) goto err_class; - if (class_device_create_file(&port->cdev_info.class_dev, - &class_device_attr_create_secondary)) + + if (class_device_create_file(&port->cdev_info.class_dev, + &class_device_attr_create_secondary)) goto err_class; return port; @@ -143,6 +221,11 @@ static void vnic_remove_one(struct ib_device *device) vnic_dev = ib_get_client_data(device, &vnic_client); list_for_each_entry_safe(port, tmp_port, &vnic_dev->port_list, list) { + + class_device_remove_file(&port->cdev_info.class_dev, + &class_device_attr_create_primary); + class_device_remove_file(&port->cdev_info.class_dev, + &class_device_attr_create_secondary); class_device_unregister(&port->cdev_info.class_dev); /* * wait for sysfs entries to go away, so that no new vnics @@ -156,8 +239,8 @@ static void vnic_remove_one(struct ib_device *device) /* TODO Only those vnic interfaces associated with * the HCA whose remove event is called should be freed - * Currently all the vnic interfaces are freeed - */ + * Currently all the vnic interfaces are freed + */ while (!list_empty(&vnic_list)) { struct vnic *vnic = @@ -170,74 +253,21 @@ static void vnic_remove_one(struct ib_device *device) } -int vnic_ib_init(void) -{ - int ret = -1; - - IB_FUNCTION("vnic_ib_init()\n"); - - /* class has to be registered before - * calling ib_register_client() because, that call - * will trigger vnic_add_port() which will register - * class_device for the port with the parent class - * as vnic_class - */ - ret = class_register(&vnic_class); - if (ret) { - printk(KERN_ERR PFX "couldn't register class" - " infiniband_vnic; error %d", ret); - goto out; - } - - ib_sa_register_client(&vnic_sa_client); - ret = ib_register_client(&vnic_client); - if (ret) { - printk(KERN_ERR PFX "couldn't register IB client;" - " error %d", ret); - goto err_ib_reg; - } - - interface_cdev.class_dev.class = &vnic_class; - snprintf(interface_cdev.class_dev.class_id, - BUS_ID_SIZE, "interfaces"); - init_completion(&interface_cdev.released); - ret = class_device_register(&interface_cdev.class_dev); - if (ret) { - printk(KERN_ERR PFX "couldn't register class interfaces;" - " error %d", ret); - goto err_class_dev; - } - ret = class_device_create_file(&interface_cdev.class_dev, - &class_device_attr_delete_vnic); - if (ret) { - printk(KERN_ERR PFX "couldn't create class file" - " 'delete_vnic'; error %d", ret); - goto err_class_file; - } - - vnic_ib_inited = 1; - - return ret; -err_class_file: - class_device_unregister(&interface_cdev.class_dev); -err_class_dev: - ib_unregister_client(&vnic_client); -err_ib_reg: - ib_sa_unregister_client(&vnic_sa_client); - class_unregister(&vnic_class); -out: - return ret; -} - void vnic_ib_cleanup(void) { IB_FUNCTION("vnic_ib_cleanup()\n"); if (!vnic_ib_inited) return; + class_device_remove_file(&interface_cdev.class_dev, + &class_device_attr_unfailover); + class_device_remove_file(&interface_cdev.class_dev, + &class_device_attr_force_failover); + class_device_remove_file(&interface_cdev.class_dev, + &class_device_attr_delete_vnic); - class_device_unregister(&interface_cdev.class_dev); - wait_for_completion(&interface_cdev.released); + class_device_unregister(&interface_cdev.class_dev); + wait_for_completion(&interface_cdev.released); ib_unregister_client(&vnic_client); ib_sa_unregister_client(&vnic_sa_client); @@ -250,13 +280,14 @@ static void vnic_path_rec_completion(int status, { struct vnic_ib_path_info *p = context; p->status = status; + IB_INFO("Service level for VNIC is %d\n", pathrec->sl); if (!status) p->path = *pathrec; complete(&p->done); } -int vnic_ib_get_path(struct netpath *netpath, struct vnic * vnic) +int vnic_ib_get_path(struct netpath *netpath, struct vnic *vnic) { struct viport_config *config = netpath->viport->config; int ret = 0; @@ -269,9 +300,10 @@ int vnic_ib_get_path(struct netpath *netpath, struct vnic * vnic) config->ibdev, config->port, &config->path_info.path, - IB_SA_PATH_REC_DGID | - IB_SA_PATH_REC_SGID | - IB_SA_PATH_REC_NUMB_PATH | + IB_SA_PATH_REC_SERVICE_ID | + IB_SA_PATH_REC_DGID | + IB_SA_PATH_REC_SGID | + IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_PKEY, config->sa_path_rec_get_timeout, GFP_KERNEL, @@ -282,15 +314,15 @@ int vnic_ib_get_path(struct netpath *netpath, struct vnic * vnic) if (config->path_info.path_query_id < 0) { IB_ERROR("SA path record query failed; error %d\n", config->path_info.path_query_id); - ret= config->path_info.path_query_id; + ret = config->path_info.path_query_id; goto out; } wait_for_completion(&config->path_info.done); if (config->path_info.status < 0) { - printk(KERN_WARNING PFX "path record query failed for dgid " - "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + printk(KERN_WARNING PFX "connection not available to dgid " + "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x", (int)be16_to_cpu(*(__be16 *) &config->path_info.path. dgid.raw[0]), (int)be16_to_cpu(*(__be16 *) &config->path_info.path. @@ -309,14 +341,11 @@ int vnic_ib_get_path(struct netpath *netpath, struct vnic * vnic) dgid.raw[14])); if (config->path_info.status == -ETIMEDOUT) - printk(KERN_WARNING PFX - "reason: path record query timed out\n"); + printk(KERN_INFO " path query timed out\n"); else if (config->path_info.status == -EIO) - printk(KERN_WARNING PFX - "reason: error in sending path record query\n"); + printk(KERN_INFO " path query sending error\n"); else - printk(KERN_WARNING PFX "reason: error %d in sending" - " path record query\n", + printk(KERN_INFO " error %d\n", config->path_info.status); ret = config->path_info.status; @@ -328,6 +357,31 @@ out: return ret; } +static inline void vnic_ib_handle_completions(struct ib_wc *wc, + struct vnic_ib_conn *ib_conn, + u32 *comp_num, + cycles_t *comp_time) +{ + struct io *io; + + io = (struct io *)(wc->wr_id); + vnic_ib_comp_stats(ib_conn, comp_num); + if (wc->status) { + IB_INFO("completion error wc.status %d" + " wc.opcode %d vendor err 0x%x\n", + wc->status, wc->opcode, wc->vendor_err); + } else if (io) { + vnic_ib_io_stats(io, ib_conn, *comp_time); + if (io->type == RECV_UD) { + struct ud_recv_io *recv_io = + container_of(io, struct ud_recv_io, io); + recv_io->len = wc->byte_len; + } + if (io->routine) + (*io->routine) (io); + } +} + static void ib_qp_event(struct ib_event *event, void *context) { IB_ERROR("QP event %d\n", event->event); @@ -335,36 +389,69 @@ static void ib_qp_event(struct ib_event *event, void *context) static void vnic_ib_completion(struct ib_cq *cq, void *ptr) { - struct ib_wc wc; - struct io *io; struct vnic_ib_conn *ib_conn = ptr; - cycles_t comp_time; - u32 comp_num = 0; + unsigned long flags; + int compl_received; + struct ib_wc wc; + cycles_t comp_time; + u32 comp_num = 0; - vnic_ib_note_comptime_stats(&comp_time); - vnic_ib_callback_stats(ib_conn); - - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - while (ib_poll_cq(cq, 1, &wc) > 0) { - io = (struct io *)(wc.wr_id); - vnic_ib_comp_stats(ib_conn, &comp_num); - if (wc.status) { -#if 0 - IB_ERROR("completion error wc.status %d" - " wc.opcode %d vendor err 0x%x\n", - wc.status, wc.opcode, wc.vendor_err); -#endif - } else if (io) { - vnic_ib_io_stats(io, ib_conn, comp_time); - if (io->routine) - (*io->routine) (io); + /* for multicast, cm_id is NULL, so skip that test */ + if (ib_conn->cm_id && + (ib_conn->state != IB_CONN_CONNECTED)) + return; + + /* Check if completion processing is taking place in thread + * If not then process completions in this handler, + * else set compl_received if not set, to indicate that + * there are more completions to process in thread. + */ + + spin_lock_irqsave(&ib_conn->compl_received_lock, flags); + compl_received = ib_conn->compl_received; + spin_unlock_irqrestore(&ib_conn->compl_received_lock, flags); + + if (ib_conn->in_thread || compl_received) { + if (!compl_received) { + spin_lock_irqsave(&ib_conn->compl_received_lock, flags); + ib_conn->compl_received = 1; + spin_unlock_irqrestore(&ib_conn->compl_received_lock, + flags); } + wake_up(&(ib_conn->callback_wait_queue)); + } else { + vnic_ib_note_comptime_stats(&comp_time); + vnic_ib_callback_stats(ib_conn); + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + while (ib_poll_cq(cq, 1, &wc) > 0) { + vnic_ib_handle_completions(&wc, ib_conn, &comp_num, + &comp_time); + if (ib_conn->cm_id && + ib_conn->state != IB_CONN_CONNECTED) + break; + + /* If we get more completions than the completion limit + * defer completion to the thread + */ + if ((!ib_conn->in_thread) && + (comp_num >= ib_conn->ib_config->completion_limit)) { + ib_conn->in_thread = 1; + spin_lock_irqsave( + &ib_conn->compl_received_lock, flags); + ib_conn->compl_received = 1; + spin_unlock_irqrestore( + &ib_conn->compl_received_lock, flags); + wake_up(&(ib_conn->callback_wait_queue)); + break; + } + + } + vnic_ib_maxio_stats(ib_conn, comp_num); } - vnic_ib_maxio_stats(ib_conn, comp_num); } -static int vnic_ib_mod_qp_to_rts(struct ib_cm_id * cm_id, - struct vnic_ib_conn * ib_conn) +static int vnic_ib_mod_qp_to_rts(struct ib_cm_id *cm_id, + struct vnic_ib_conn *ib_conn) { int attr_mask = 0; int ret; @@ -376,25 +463,30 @@ static int vnic_ib_mod_qp_to_rts(struct ib_cm_id * cm_id, qp_attr->qp_state = IB_QPS_RTR; - if ((ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask))) + ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (ret) goto out; - if((ret = ib_modify_qp(ib_conn->qp, qp_attr, attr_mask))) + ret = ib_modify_qp(ib_conn->qp, qp_attr, attr_mask); + if (ret) goto out; IB_INFO("QP RTR\n"); qp_attr->qp_state = IB_QPS_RTS; - if((ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask))) + ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (ret) goto out; - if((ret=ib_modify_qp(ib_conn->qp, qp_attr, attr_mask))) + ret = ib_modify_qp(ib_conn->qp, qp_attr, attr_mask); + if (ret) goto out; IB_INFO("QP RTS\n"); - if((ret = ib_send_cm_rtu(cm_id, NULL, 0))) + ret = ib_send_cm_rtu(cm_id, NULL, 0); + if (ret) goto out; out: kfree(qp_attr); @@ -406,7 +498,6 @@ int vnic_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) struct vnic_ib_conn *ib_conn = cm_id->context; struct viport *viport = ib_conn->viport; int err = 0; - int disconn = 0; switch (event->event) { case IB_CM_REQ_ERROR: @@ -425,7 +516,7 @@ int vnic_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) } break; case IB_CM_REJ_RECEIVED: - printk(KERN_ERR PFX "CM rejected control connection \n"); + printk(KERN_ERR PFX " CM rejected control connection\n"); if (event->param.rej_rcvd.reason == IB_CM_REJ_INVALID_SERVICE_ID) printk(KERN_ERR "reason: invalid service ID. " @@ -435,7 +526,7 @@ int vnic_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) event->param.rej_rcvd.reason); err = 1; - disconn = 1; + viport->retry = 1; break; case IB_CM_MRA_RECEIVED: IB_INFO("CM MRA received\n"); @@ -457,9 +548,6 @@ int vnic_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) } - if (disconn) - viport->disconnect = 1; - if (err) { ib_conn->state = IB_CONN_DISCONNECTED; viport_failure(viport); @@ -530,10 +618,10 @@ int vnic_ib_cm_connect(struct vnic_ib_conn *ib_conn) return ret; } -static int vnic_ib_init_qp(struct vnic_ib_conn * ib_conn, +static int vnic_ib_init_qp(struct vnic_ib_conn *ib_conn, struct vnic_ib_config *config, struct ib_pd *pd, - struct viport_config * viport_config) + struct viport_config *viport_config) { struct ib_qp_init_attr *init_attr; struct ib_qp_attr *attr; @@ -567,13 +655,11 @@ static int vnic_ib_init_qp(struct vnic_ib_conn * ib_conn, goto destroy_qp; } - ret = ib_find_cached_pkey(viport_config->ibdev, - viport_config->port, - be16_to_cpu(viport_config->path_info.path. - pkey), - &attr->pkey_index); + ret = ib_find_pkey(viport_config->ibdev, viport_config->port, + be16_to_cpu(viport_config->path_info.path.pkey), + &attr->pkey_index); if (ret) { - printk(KERN_WARNING PFX "ib_find_cached_pkey() failed; " + printk(KERN_WARNING PFX "ib_find_pkey() failed; " "error %d\n", ret); goto freeattr; } @@ -620,31 +706,51 @@ int vnic_ib_conn_init(struct vnic_ib_conn *ib_conn, struct viport *viport, } ib_conn->cq = ib_create_cq(viport_config->ibdev, vnic_ib_completion, +#ifdef BUILD_FOR_OFED_1_2 + NULL, ib_conn, cq_size); +#else NULL, ib_conn, cq_size, 0); +#endif if (IS_ERR(ib_conn->cq)) { IB_ERROR("could not create CQ\n"); goto out; } + IB_INFO("cq created %p %d\n", ib_conn->cq, cq_size); ib_req_notify_cq(ib_conn->cq, IB_CQ_NEXT_COMP); + init_waitqueue_head(&(ib_conn->callback_wait_queue)); + init_completion(&(ib_conn->callback_thread_exit)); - ret = vnic_ib_init_qp(ib_conn, config, pd, viport_config); + spin_lock_init(&ib_conn->compl_received_lock); - if(ret) + ib_conn->callback_thread = kthread_run(vnic_defer_completion, ib_conn, + "qlgc_vnic_def_compl"); + if (IS_ERR(ib_conn->callback_thread)) { + IB_ERROR("Could not create vnic_callback_thread;" + " error %d\n", (int) PTR_ERR(ib_conn->callback_thread)); + ib_conn->callback_thread = NULL; goto destroy_cq; + } - ib_conn->conn_lock = SPIN_LOCK_UNLOCKED; + ret = vnic_ib_init_qp(ib_conn, config, pd, viport_config); + + if (ret) + goto destroy_thread; + + spin_lock_init(&ib_conn->conn_lock); ib_conn->state = IB_CONN_INITTED; return ret; +destroy_thread: + vnic_completion_cleanup(ib_conn); destroy_cq: ib_destroy_cq(ib_conn->cq); out: return ret; } -int vnic_ib_post_recv(struct vnic_ib_conn * ib_conn, struct io * io) +int vnic_ib_post_recv(struct vnic_ib_conn *ib_conn, struct io *io) { cycles_t post_time; struct ib_recv_wr *bad_wr; @@ -656,14 +762,17 @@ int vnic_ib_post_recv(struct vnic_ib_conn * ib_conn, struct io * io) spin_lock_irqsave(&ib_conn->conn_lock, flags); if (!vnic_ib_conn_initted(ib_conn) && - !vnic_ib_conn_connected(ib_conn)) - return -EINVAL; + !vnic_ib_conn_connected(ib_conn)) { + ret = -EINVAL; + goto out; + } vnic_ib_pre_rcvpost_stats(ib_conn, io, &post_time); io->type = RECV; ret = ib_post_recv(ib_conn->qp, &io->rwr, &bad_wr); if (ret) { IB_ERROR("error in posting rcv wr; error %d\n", ret); + ib_conn->state = IB_CONN_ERRORED; goto out; } @@ -674,7 +783,7 @@ out: } -int vnic_ib_post_send(struct vnic_ib_conn * ib_conn, struct io * io) +int vnic_ib_post_send(struct vnic_ib_conn *ib_conn, struct io *io) { cycles_t post_time; unsigned long flags; @@ -699,6 +808,7 @@ int vnic_ib_post_send(struct vnic_ib_conn * ib_conn, struct io * io) ret = ib_post_send(ib_conn->qp, &io->swr, &bad_wr); if (ret) { IB_ERROR("error in posting send wr; error %d\n", ret); + ib_conn->state = IB_CONN_ERRORED; goto out; } @@ -707,3 +817,261 @@ out: spin_unlock_irqrestore(&ib_conn->conn_lock, flags); return ret; } + +static int vnic_defer_completion(void *ptr) +{ + struct vnic_ib_conn *ib_conn = ptr; + struct ib_wc wc; + struct ib_cq *cq = ib_conn->cq; + cycles_t comp_time; + u32 comp_num = 0; + unsigned long flags; + + while (!ib_conn->callback_thread_end) { + wait_event_interruptible(ib_conn->callback_wait_queue, + ib_conn->compl_received || + ib_conn->callback_thread_end); + ib_conn->in_thread = 1; + spin_lock_irqsave(&ib_conn->compl_received_lock, flags); + ib_conn->compl_received = 0; + spin_unlock_irqrestore(&ib_conn->compl_received_lock, flags); + if (ib_conn->cm_id && + ib_conn->state != IB_CONN_CONNECTED) + goto out_thread; + + vnic_ib_note_comptime_stats(&comp_time); + vnic_ib_callback_stats(ib_conn); + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + while (ib_poll_cq(cq, 1, &wc) > 0) { + vnic_ib_handle_completions(&wc, ib_conn, &comp_num, + &comp_time); + if (ib_conn->cm_id && + ib_conn->state != IB_CONN_CONNECTED) + break; + } + vnic_ib_maxio_stats(ib_conn, comp_num); +out_thread: + ib_conn->in_thread = 0; + } + complete_and_exit(&(ib_conn->callback_thread_exit), 0); + return 0; +} + +void vnic_completion_cleanup(struct vnic_ib_conn *ib_conn) +{ + if (ib_conn->callback_thread) { + ib_conn->callback_thread_end = 1; + wake_up(&(ib_conn->callback_wait_queue)); + wait_for_completion(&(ib_conn->callback_thread_exit)); + ib_conn->callback_thread = NULL; + } +} + +int vnic_ib_mc_init(struct mc_data *mc_data, struct viport *viport, + struct ib_pd *pd, struct vnic_ib_config *config) +{ + struct viport_config *viport_config = viport->config; + int ret = -1; + unsigned int cq_size = config->num_recvs; /* recvs only */ + + IB_FUNCTION("vnic_ib_mc_init\n"); + + mc_data->ib_conn.cq = ib_create_cq(viport_config->ibdev, vnic_ib_completion, +#ifdef BUILD_FOR_OFED_1_2 + NULL, &mc_data->ib_conn, cq_size); +#else + NULL, &mc_data->ib_conn, cq_size, 0); +#endif + if (IS_ERR(mc_data->ib_conn.cq)) { + IB_ERROR("ib_create_cq failed\n"); + goto out; + } + IB_INFO("mc cq created %p %d\n", mc_data->ib_conn.cq, cq_size); + + ret = ib_req_notify_cq(mc_data->ib_conn.cq, IB_CQ_NEXT_COMP); + if (ret) { + IB_ERROR("ib_req_notify_cq failed %x \n", ret); + goto destroy_cq; + } + + init_waitqueue_head(&(mc_data->ib_conn.callback_wait_queue)); + init_completion(&(mc_data->ib_conn.callback_thread_exit)); + + spin_lock_init(&mc_data->ib_conn.compl_received_lock); + mc_data->ib_conn.callback_thread = kthread_run(vnic_defer_completion, + &mc_data->ib_conn, + "qlgc_vnic_mc_def_compl"); + if (IS_ERR(mc_data->ib_conn.callback_thread)) { + IB_ERROR("Could not create vnic_callback_thread for MULTICAST;" + " error %d\n", + (int) PTR_ERR(mc_data->ib_conn.callback_thread)); + mc_data->ib_conn.callback_thread = NULL; + goto destroy_cq; + } + IB_INFO("callback_thread created\n"); + + ret = vnic_ib_mc_init_qp(mc_data, config, pd, viport_config); + if (ret) + goto destroy_thread; + + spin_lock_init(&mc_data->ib_conn.conn_lock); + mc_data->ib_conn.state = IB_CONN_INITTED; /* stays in this state */ + + return ret; + +destroy_thread: + vnic_completion_cleanup(&mc_data->ib_conn); +destroy_cq: + ib_destroy_cq(mc_data->ib_conn.cq); + mc_data->ib_conn.cq = (struct ib_cq *)ERR_PTR(-EINVAL); +out: + return ret; +} + +static int vnic_ib_mc_init_qp(struct mc_data *mc_data, + struct vnic_ib_config *config, + struct ib_pd *pd, + struct viport_config *viport_config) +{ + struct ib_qp_init_attr *init_attr; + struct ib_qp_attr *qp_attr; + int ret; + + IB_FUNCTION("vnic_ib_mc_init_qp\n"); + + if (!mc_data->ib_conn.cq) { + IB_ERROR("cq is null\n"); + return -ENOMEM; + } + + init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); + if (!init_attr) { + IB_ERROR("failed to alloc init_attr\n"); + return -ENOMEM; + } + + init_attr->cap.max_recv_wr = config->num_recvs; + init_attr->cap.max_send_wr = 1; + init_attr->cap.max_recv_sge = 2; + init_attr->cap.max_send_sge = 1; + + /* Completion for all work requests. */ + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + + init_attr->qp_type = IB_QPT_UD; + + init_attr->send_cq = mc_data->ib_conn.cq; + init_attr->recv_cq = mc_data->ib_conn.cq; + + IB_INFO("creating qp %d \n", config->num_recvs); + + mc_data->ib_conn.qp = ib_create_qp(pd, init_attr); + + if (IS_ERR(mc_data->ib_conn.qp)) { + ret = -1; + IB_ERROR("could not create QP\n"); + goto free_init_attr; + } + + qp_attr = kzalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) { + ret = -ENOMEM; + goto destroy_qp; + } + + qp_attr->qp_state = IB_QPS_INIT; + qp_attr->port_num = viport_config->port; + qp_attr->qkey = IOC_NUMBER(be64_to_cpu(viport_config->ioc_guid)); + qp_attr->pkey_index = 0; + /* cannot set access flags for UD qp + qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; */ + + IB_INFO("port_num:%d qkey:%d pkey:%d\n", qp_attr->port_num, + qp_attr->qkey, qp_attr->pkey_index); + ret = ib_modify_qp(mc_data->ib_conn.qp, qp_attr, + IB_QP_STATE | + IB_QP_PKEY_INDEX | + IB_QP_QKEY | + + /* cannot set this for UD + IB_QP_ACCESS_FLAGS | */ + + IB_QP_PORT); + if (ret) { + IB_ERROR("ib_modify_qp to INIT failed %d \n", ret); + goto free_qp_attr; + } + + kfree(qp_attr); + kfree(init_attr); + return ret; + +free_qp_attr: + kfree(qp_attr); +destroy_qp: + ib_destroy_qp(mc_data->ib_conn.qp); + mc_data->ib_conn.qp = ERR_PTR(-EINVAL); +free_init_attr: + kfree(init_attr); + return ret; +} + +int vnic_ib_mc_mod_qp_to_rts(struct ib_qp *qp) +{ + int ret; + struct ib_qp_attr *qp_attr = NULL; + + IB_FUNCTION("vnic_ib_mc_mod_qp_to_rts\n"); + qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) + return -ENOMEM; + + memset(qp_attr, 0, sizeof *qp_attr); + qp_attr->qp_state = IB_QPS_RTR; + + ret = ib_modify_qp(qp, qp_attr, IB_QP_STATE); + if (ret) { + IB_ERROR("ib_modify_qp to RTR failed %d\n", ret); + goto out; + } + IB_INFO("MC QP RTR\n"); + + memset(qp_attr, 0, sizeof *qp_attr); + qp_attr->qp_state = IB_QPS_RTS; + qp_attr->sq_psn = 0; + + ret = ib_modify_qp(qp, qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) { + IB_ERROR("ib_modify_qp to RTS failed %d\n", ret); + goto out; + } + IB_INFO("MC QP RTS\n"); + + kfree(qp_attr); + return 0; + +out: + kfree(qp_attr); + return -1; +} + +int vnic_ib_mc_post_recv(struct mc_data *mc_data, struct io *io) +{ + cycles_t post_time; + struct ib_recv_wr *bad_wr; + int ret = -1; + + IB_FUNCTION("vnic_ib_mc_post_recv()\n"); + + vnic_ib_pre_rcvpost_stats(&mc_data->ib_conn, io, &post_time); + io->type = RECV_UD; + ret = ib_post_recv(mc_data->ib_conn.qp, &io->rwr, &bad_wr); + if (ret) { + IB_ERROR("error in posting rcv wr; error %d\n", ret); + goto out; + } + vnic_ib_post_rcvpost_stats(&mc_data->ib_conn, post_time); + +out: + return ret; +} diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.h index 0b7e637..4ed743f 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.h +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_ib.h @@ -45,14 +45,15 @@ #define PFX "qlgc_vnic: " struct io; -typedef void (comp_routine_t) (struct io * io); +typedef void (comp_routine_t) (struct io *io); enum vnic_ib_conn_state { IB_CONN_UNINITTED = 0, IB_CONN_INITTED = 1, IB_CONN_CONNECTING = 2, IB_CONN_CONNECTED = 3, - IB_CONN_DISCONNECTED = 4 + IB_CONN_DISCONNECTED = 4, + IB_CONN_ERRORED = 5 }; struct vnic_ib_conn { @@ -63,6 +64,13 @@ struct vnic_ib_conn { struct ib_qp *qp; struct ib_cq *cq; struct ib_cm_id *cm_id; + int callback_thread_end; + struct task_struct *callback_thread; + wait_queue_head_t callback_wait_queue; + u32 in_thread; + u32 compl_received; + struct completion callback_thread_exit; + spinlock_t compl_received_lock; #ifdef CONFIG_INFINIBAND_QLGC_VNIC_STATS struct { cycles_t connection_time; @@ -114,7 +122,7 @@ struct io { #ifdef CONFIG_INFINIBAND_QLGC_VNIC_STATS cycles_t time; #endif /* CONFIG_INFINIBAND_QLGC_VNIC_STATS */ - enum {RECV, RDMA, SEND} type; + enum {RECV, RDMA, SEND, RECV_UD} type; }; struct rdma_io { @@ -142,11 +150,20 @@ struct recv_io { u8 *virtual_addr; }; +struct ud_recv_io { + struct io io; + u16 len; + dma_addr_t skb_data_dma; + struct ib_sge list[2]; /* one for grh and other for rest of pkt. */ + struct sk_buff *skb; +}; + int vnic_ib_init(void); void vnic_ib_cleanup(void); +void vnic_completion_cleanup(struct vnic_ib_conn *ib_conn); struct vnic; -int vnic_ib_get_path(struct netpath *netpath, struct vnic * vnic); +int vnic_ib_get_path(struct netpath *netpath, struct vnic *vnic); int vnic_ib_conn_init(struct vnic_ib_conn *ib_conn, struct viport *viport, struct ib_pd *pd, struct vnic_ib_config *config); @@ -166,4 +183,25 @@ int vnic_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); #define vnic_ib_conn_disconnected(ib_conn) \ ((ib_conn)->state == IB_CONN_DISCONNECTED) +#define MCAST_GROUP_INVALID 0x00 /* viport failed to join or left mc group */ +#define MCAST_GROUP_JOINING 0x01 /* wait for completion */ +#define MCAST_GROUP_JOINED 0x02 /* join process completed successfully */ + +/* vnic_sa_client is used to register with sa once. It is needed to join and + * leave multicast groups. + */ +extern struct ib_sa_client vnic_sa_client; + +/* The following functions are using initialize and handle multicast + * components. + */ +struct mc_data; /* forward declaration */ +/* Initialize all necessary mc components */ +int vnic_ib_mc_init(struct mc_data *mc_data, struct viport *viport, + struct ib_pd *pd, struct vnic_ib_config *config); +/* Put multicast qp in RTS */ +int vnic_ib_mc_mod_qp_to_rts(struct ib_qp *qp); +/* Post multicast receive buffers */ +int vnic_ib_mc_post_recv(struct mc_data *mc_data, struct io *io); + #endif /* VNIC_IB_H_INCLUDED */ diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_main.c b/drivers/infiniband/ulp/qlgc_vnic/vnic_main.c index 2a27ed3..02a88a7 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_main.c +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_main.c @@ -40,8 +40,6 @@ #include <linux/list.h> #include <linux/completion.h> -#include <rdma/ib_cache.h> - #include "vnic_util.h" #include "vnic_main.h" #include "vnic_netpath.h" @@ -49,30 +47,58 @@ #include "vnic_ib.h" #include "vnic_stats.h" -#define MODULEVERSION "0.6.1" -#define MODULEDETAILS "QLogic Corp. Virtual NIC (VNIC) driver version " MODULEVERSION +#define MODULEVERSION "1.3.0.0.4" +#define MODULEDETAILS \ + "QLogic Corp. Virtual NIC (VNIC) driver version " MODULEVERSION MODULE_AUTHOR("QLogic Corp."); MODULE_DESCRIPTION(MODULEDETAILS); MODULE_LICENSE("Dual BSD/GPL"); MODULE_SUPPORTED_DEVICE("QLogic Ethernet Virtual I/O Controller"); -u32 vnic_debug = 0; +u32 vnic_debug; module_param(vnic_debug, uint, 0444); MODULE_PARM_DESC(vnic_debug, "Enable debug tracing if > 0"); LIST_HEAD(vnic_list); -const char driver[] = "qlgc_vnic"; +static DECLARE_WAIT_QUEUE_HEAD(vnic_npevent_queue); +static LIST_HEAD(vnic_npevent_list); +static DECLARE_COMPLETION(vnic_npevent_thread_exit); +static spinlock_t vnic_npevent_list_lock; +static struct task_struct *vnic_npevent_thread; +static int vnic_npevent_thread_end; -DECLARE_WAIT_QUEUE_HEAD(vnic_npevent_queue); -LIST_HEAD(vnic_npevent_list); -DECLARE_COMPLETION(vnic_npevent_thread_exit); -spinlock_t vnic_npevent_list_lock = SPIN_LOCK_UNLOCKED; -int vnic_npevent_thread = -1; -int vnic_npevent_thread_end = 0; +static const char *const vnic_npevent_str[] = { + "PRIMARY CONNECTED", + "PRIMARY DISCONNECTED", + "PRIMARY CARRIER", + "PRIMARY NO CARRIER", + "PRIMARY TIMER EXPIRED", + "PRIMARY SETLINK", + "SECONDARY CONNECTED", + "SECONDARY DISCONNECTED", + "SECONDARY CARRIER", + "SECONDARY NO CARRIER", + "SECONDARY TIMER EXPIRED", + "SECONDARY SETLINK", + "FORCED FAILOVER", + "UNFAILOVER", + "FREE VNIC", +}; +void vnic_force_failover(struct vnic *vnic) +{ + VNIC_FUNCTION("vnic_force_failover()\n"); + vnic_npevent_queue_evt(&vnic->primary_path, VNIC_FORCE_FAILOVER); +} + +void vnic_unfailover(struct vnic *vnic) +{ + VNIC_FUNCTION("vnic_unfailover()\n"); + vnic_npevent_queue_evt(&vnic->primary_path, VNIC_UNFAILOVER); +} void vnic_connected(struct vnic *vnic, struct netpath *netpath) { @@ -114,42 +140,49 @@ void vnic_link_down(struct vnic *vnic, struct netpath *netpath) void vnic_stop_xmit(struct vnic *vnic, struct netpath *netpath) { + unsigned long flags; + VNIC_FUNCTION("vnic_stop_xmit()\n"); + spin_lock_irqsave(&vnic->current_path_lock, flags); if (netpath == vnic->current_path) { - if (vnic->xmit_started) { - netif_stop_queue(&vnic->netdevice); - vnic->xmit_started = 0; + if (!netif_queue_stopped(vnic->netdevice)) { + netif_stop_queue(vnic->netdevice); + vnic->failed_over = 0; } vnic_stop_xmit_stats(vnic); } + spin_unlock_irqrestore(&vnic->current_path_lock, flags); } void vnic_restart_xmit(struct vnic *vnic, struct netpath *netpath) { + unsigned long flags; + VNIC_FUNCTION("vnic_restart_xmit()\n"); + spin_lock_irqsave(&vnic->current_path_lock, flags); if (netpath == vnic->current_path) { - if (!vnic->xmit_started) { - netif_wake_queue(&vnic->netdevice); - vnic->xmit_started = 1; - } + if (netif_queue_stopped(vnic->netdevice)) + netif_wake_queue(vnic->netdevice); vnic_restart_xmit_stats(vnic); } + spin_unlock_irqrestore(&vnic->current_path_lock, flags); } void vnic_recv_packet(struct vnic *vnic, struct netpath *netpath, struct sk_buff *skb) { VNIC_FUNCTION("vnic_recv_packet()\n"); - if ((netpath != vnic->current_path) || !vnic->open) { + if ((netpath != vnic->current_path) || + !netif_running(vnic->netdevice)) { VNIC_INFO("tossing packet\n"); dev_kfree_skb(skb); return; } - vnic->netdevice.last_rx = jiffies; - skb->dev = &vnic->netdevice; + vnic->netdevice->last_rx = jiffies; + skb->dev = vnic->netdevice; skb->protocol = eth_type_trans(skb, skb->dev); if (!vnic->config->use_rx_csum) skb->ip_summed = CHECKSUM_NONE; @@ -161,13 +194,22 @@ static struct net_device_stats *vnic_get_stats(struct net_device *device) { struct vnic *vnic; struct netpath *np; + unsigned long flags; VNIC_FUNCTION("vnic_get_stats()\n"); - vnic = (struct vnic *)device->priv; + vnic = netdev_priv(device); + spin_lock_irqsave(&vnic->current_path_lock, flags); np = vnic->current_path; - if (np && np->viport) + if (np && np->viport) { + atomic_inc(&np->viport->reference_count); + spin_unlock_irqrestore(&vnic->current_path_lock, flags); viport_get_stats(np->viport, &vnic->stats); + atomic_dec(&np->viport->reference_count); + wake_up(&np->viport->reference_queue); + } else + spin_unlock_irqrestore(&vnic->current_path_lock, flags); + return &vnic->stats; } @@ -176,12 +218,10 @@ static int vnic_open(struct net_device *device) struct vnic *vnic; VNIC_FUNCTION("vnic_open()\n"); - vnic = (struct vnic *)device->priv; + vnic = netdev_priv(device); - vnic->open++; - vnic_npevent_queue_evt(&vnic->primary_path, VNIC_NP_SETLINK); - vnic->xmit_started = 1; - netif_start_queue(&vnic->netdevice); + vnic_npevent_queue_evt(&vnic->primary_path, VNIC_PRINP_SETLINK); + netif_start_queue(vnic->netdevice); return 0; } @@ -192,11 +232,9 @@ static int vnic_stop(struct net_device *device) int ret = 0; VNIC_FUNCTION("vnic_stop()\n"); - vnic = (struct vnic *)device->priv; + vnic = netdev_priv(device); netif_stop_queue(device); - vnic->xmit_started = 0; - vnic->open--; - vnic_npevent_queue_evt(&vnic->primary_path, VNIC_NP_SETLINK); + vnic_npevent_queue_evt(&vnic->primary_path, VNIC_PRINP_SETLINK); return ret; } @@ -210,7 +248,7 @@ static int vnic_hard_start_xmit(struct sk_buff *skb, int ret = -1; VNIC_FUNCTION("vnic_hard_start_xmit()\n"); - vnic = (struct vnic *)device->priv; + vnic = netdev_priv(device); np = vnic->current_path; vnic_pre_pkt_xmit_stats(&xmit_time); @@ -234,13 +272,28 @@ out: static void vnic_tx_timeout(struct net_device *device) { struct vnic *vnic; + struct viport *viport = NULL; + unsigned long flags; VNIC_FUNCTION("vnic_tx_timeout()\n"); - vnic = (struct vnic *)device->priv; + vnic = netdev_priv(device); device->trans_start = jiffies; - if (vnic->current_path->viport) - viport_failure(vnic->current_path->viport); + spin_lock_irqsave(&vnic->current_path_lock, flags); + if (vnic->current_path && vnic->current_path->viport) { + if (vnic->failed_over) { + if (vnic->current_path == &vnic->primary_path) + viport = vnic->secondary_path.viport; + else if (vnic->current_path == &vnic->secondary_path) + viport = vnic->primary_path.viport; + } else + viport = vnic->current_path->viport; + + spin_unlock_irqrestore(&vnic->current_path_lock, flags); + if (viport) + viport_failure(viport); + } else + spin_unlock_irqrestore(&vnic->current_path_lock, flags); VNIC_ERROR("vnic_tx_timeout\n"); } @@ -251,19 +304,9 @@ static void vnic_set_multicast_list(struct net_device *device) unsigned long flags; VNIC_FUNCTION("vnic_set_multicast_list()\n"); - vnic = (struct vnic *)device->priv; + vnic = netdev_priv(device); spin_lock_irqsave(&vnic->lock, flags); - /* the vnic_link_evt thread also needs to be able to access - * mc_list. it is only safe to access the mc_list - * in the netdevice from this call, so make a local - * copy of it in the vnic. the mc_list is a linked - * list, but my copy is an array where each element's - * next pointer points to the next element. when I - * reallocate the list, I always size it with 10 - * extra elements so I don't have to resize it as - * often. I only downsize the list when it goes empty. - */ if (device->mc_count == 0) { if (vnic->mc_list_len) { vnic->mc_list_len = vnic->mc_count = 0; @@ -302,12 +345,16 @@ static void vnic_set_multicast_list(struct net_device *device) viport_set_multicast(vnic->secondary_path.viport, vnic->mc_list, vnic->mc_count); - vnic_npevent_queue_evt(&vnic->primary_path, VNIC_NP_SETLINK); + vnic_npevent_queue_evt(&vnic->primary_path, VNIC_PRINP_SETLINK); return; failure: spin_unlock_irqrestore(&vnic->lock, flags); } +/** + * Following set of functions queues up the events for EVIC and the + * kernel thread queuing up the event might return. + */ static int vnic_set_mac_address(struct net_device *device, void *addr) { struct vnic *vnic; @@ -316,7 +363,7 @@ static int vnic_set_mac_address(struct net_device *device, void *addr) int ret = -1; VNIC_FUNCTION("vnic_set_mac_address()\n"); - vnic = (struct vnic *)device->priv; + vnic = netdev_priv(device); if (!is_valid_ether_addr(sockaddr->sa_data)) return -EADDRNOTAVAIL; @@ -334,18 +381,10 @@ static int vnic_set_mac_address(struct net_device *device, void *addr) if (ret) return ret; - /* Ignore result of set unicast for secondary path viport. - * Consider the operation a success if we are able to atleast - * set the primary path viport address - */ if (vnic->secondary_path.viport) viport_set_unicast(vnic->secondary_path.viport, address); vnic->mac_set = 1; - /* I'm assuming that this should work even if nothing is connected - * at the moment. note that this might return before the address has - * actually been changed. - */ return 0; } @@ -357,7 +396,7 @@ static int vnic_change_mtu(struct net_device *device, int mtu) int sec_max_mtu; VNIC_FUNCTION("vnic_change_mtu()\n"); - vnic = (struct vnic *)device->priv; + vnic = netdev_priv(device); if (vnic->primary_path.viport) pri_max_mtu = viport_max_mtu(vnic->primary_path.viport); @@ -372,8 +411,19 @@ static int vnic_change_mtu(struct net_device *device, int mtu) if ((mtu < pri_max_mtu) && (mtu < sec_max_mtu)) { device->mtu = mtu; vnic_npevent_queue_evt(&vnic->primary_path, - VNIC_NP_SETLINK); - } + VNIC_PRINP_SETLINK); + vnic_npevent_queue_evt(&vnic->secondary_path, + VNIC_SECNP_SETLINK); + } else if (pri_max_mtu < sec_max_mtu) + printk(KERN_WARNING PFX "%s: Maximum " + "supported MTU size is %d. " + "Cannot set MTU to %d\n", + vnic->config->name, pri_max_mtu, mtu); + else + printk(KERN_WARNING PFX "%s: Maximum " + "supported MTU size is %d. " + "Cannot set MTU to %d\n", + vnic->config->name, sec_max_mtu, mtu); return ret; } @@ -388,8 +438,8 @@ static int vnic_npevent_register(struct vnic *vnic, struct netpath *netpath) * connected. MAC address will be set when the primary * connects. */ - netpath_get_hw_addr(netpath, vnic->netdevice.dev_addr); - address = vnic->netdevice.dev_addr; + netpath_get_hw_addr(netpath, vnic->netdevice->dev_addr); + address = vnic->netdevice->dev_addr; if (vnic->secondary_path.viport) viport_set_unicast(vnic->secondary_path.viport, @@ -397,11 +447,15 @@ static int vnic_npevent_register(struct vnic *vnic, struct netpath *netpath) vnic->mac_set = 1; } - - ret = register_netdev(&vnic->netdevice); + ret = register_netdev(vnic->netdevice); if (ret) { - printk(KERN_WARNING PFX "failed registering netdev " - "error %d\n", ret); + printk(KERN_ERR PFX "%s failed registering netdev " + "error %d - calling viport_failure\n", + config_viport_name(vnic->primary_path.viport->config), + ret); + vnic_free(vnic); + printk(KERN_ERR PFX "%s DELETED : register_netdev failure\n", + config_viport_name(vnic->primary_path.viport->config)); return ret; } @@ -429,22 +483,6 @@ out: spin_unlock_irqrestore(&vnic_npevent_list_lock, flags); } - -static const char *const vnic_npevent_str[] = { - "PRIMARY CONNECTED", - "PRIMARY DISCONNECTED", - "PRIMARY CARRIER", - "PRIMARY NO CARRIER", - "PRIMARY TIMER EXPIRED", - "SECONDARY CONNECTED", - "SECONDARY DISCONNECTED", - "SECONDARY CARRIER", - "SECONDARY NO CARRIER", - "SECONDARY TIMER EXPIRED", - "SETLINK", - "FREE VNIC", -}; - static void update_path_and_reconnect(struct netpath *netpath, struct vnic *vnic) { @@ -460,8 +498,8 @@ static void update_path_and_reconnect(struct netpath *netpath, * This prevents flooding connect requests to a path (or set * of paths) that aren't successfully connecting for some reason. */ - if (jiffies > netpath->connect_time + - vnic->config->no_path_timeout) { + if (time_after(jiffies, + (netpath->connect_time + vnic->config->no_path_timeout))) { netpath->path_idx = config->path_idx; netpath->connect_time = jiffies; netpath->delay_reconnect = 0; @@ -475,14 +513,29 @@ static void update_path_and_reconnect(struct netpath *netpath, viport_connect(netpath->viport, delay); } -static void vnic_set_uni_multicast(struct vnic * vnic, - struct netpath * netpath) +static inline void vnic_set_checksum_flag(struct vnic *vnic, + struct netpath *target_path) +{ + unsigned long flags; + + spin_lock_irqsave(&vnic->current_path_lock, flags); + vnic->current_path = target_path; + vnic->failed_over = 1; + if (vnic->config->use_tx_csum && + netpath_can_tx_csum(vnic->current_path)) + vnic->netdevice->features |= NETIF_F_IP_CSUM; + + spin_unlock_irqrestore(&vnic->current_path_lock, flags); +} + +static void vnic_set_uni_multicast(struct vnic *vnic, + struct netpath *netpath) { unsigned long flags; u8 *address; if (vnic->mac_set) { - address = vnic->netdevice.dev_addr; + address = vnic->netdevice->dev_addr; if (netpath->viport) viport_set_unicast(netpath->viport, address); @@ -498,8 +551,8 @@ static void vnic_set_uni_multicast(struct vnic * vnic, if (!netpath->viport) return; viport_set_link(netpath->viport, - vnic->netdevice.flags & ~IFF_UP, - vnic->netdevice.mtu); + vnic->netdevice->flags & ~IFF_UP, + vnic->netdevice->mtu); } } @@ -522,14 +575,14 @@ static void vnic_set_netpath_timers(struct vnic *vnic, /*nothing to do*/ break; case NETPATH_TS_EXPIRED: - if (vnic->state == VNIC_UNINITIALIZED) { + if (vnic->state == VNIC_UNINITIALIZED) vnic_npevent_register(vnic, netpath); - } + break; } } -static void vnic_check_primary_path_timer(struct vnic * vnic) +static void vnic_check_primary_path_timer(struct vnic *vnic) { switch (vnic->primary_path.timer_state) { case NETPATH_TS_ACTIVE: @@ -545,23 +598,58 @@ static void vnic_check_primary_path_timer(struct vnic * vnic) "%s: switching to primary path\n", vnic->config->name); - vnic->current_path = &vnic->primary_path; - if (vnic->config->use_tx_csum - && netpath_can_tx_csum(vnic-> - current_path)) { - vnic->netdevice.features |= - NETIF_F_IP_CSUM; - } + vnic_set_checksum_flag(vnic, &vnic->primary_path); break; } } -static void vnic_carrier_loss(struct vnic * vnic, +static void vnic_forced_failover(struct vnic *vnic) +{ + if (vnic->current_path == &vnic->primary_path) { + if (vnic->secondary_path.carrier && + vnic->secondary_path.timer_state != NETPATH_TS_ACTIVE) { + printk(KERN_INFO PFX "%s: Forced failover to " + "secondary path.\n", + vnic->config->name); + vnic->forced_failover = 1; + vnic_set_checksum_flag(vnic, &vnic->secondary_path); + if (vnic->config->prefer_primary) + printk(KERN_INFO "%s: To enable failback use " + "command - echo -n %s > " + "/sys/class/infiniband_qlgc_vnic/" + "interfaces/unfailover\n", + vnic->config->name, vnic->config->name); + } else + printk(KERN_INFO PFX "%s: Unable to force failover to " + "secondary path.\n", + vnic->config->name); + } else if (vnic->current_path == &vnic->secondary_path) { + if (vnic->primary_path.carrier && + vnic->primary_path.timer_state != NETPATH_TS_ACTIVE) { + printk(KERN_INFO PFX "%s: Forced failover to " + "primary path.\n", + vnic->config->name); + vnic->forced_failover = 1; + vnic_set_checksum_flag(vnic, &vnic->primary_path); + if (vnic->config->prefer_primary) + printk(KERN_INFO "%s: To enable failback use " + "command - echo -n %s > " + "/sys/class/infiniband_qlgc_vnic/" + "interfaces/unfailover\n", + vnic->config->name, vnic->config->name); + } else + printk(KERN_INFO PFX "%s: Unable to force failover to " + "primary path.\n", + vnic->config->name); + } +} + +static void vnic_carrier_loss(struct vnic *vnic, struct netpath *last_path) { if (vnic->primary_path.carrier) { vnic->carrier = 1; - vnic->current_path = &vnic->primary_path; + vnic_set_checksum_flag(vnic, &vnic->primary_path); if (last_path && last_path != vnic->current_path) printk(KERN_INFO PFX @@ -571,14 +659,10 @@ static void vnic_carrier_loss(struct vnic * vnic, printk(KERN_INFO PFX "%s: using primary path\n", vnic->config->name); - if (vnic->config->use_tx_csum && - netpath_can_tx_csum(vnic->current_path)) - vnic->netdevice.features |= NETIF_F_IP_CSUM; - } else if ((vnic->secondary_path.carrier) && (vnic->secondary_path.timer_state != NETPATH_TS_ACTIVE)) { vnic->carrier = 1; - vnic->current_path = &vnic->secondary_path; + vnic_set_checksum_flag(vnic, &vnic->secondary_path); if (last_path && last_path != vnic->current_path) printk(KERN_INFO PFX @@ -588,18 +672,14 @@ static void vnic_carrier_loss(struct vnic * vnic, printk(KERN_INFO PFX "%s: using secondary path\n", vnic->config->name); - if (vnic->config->use_tx_csum && - netpath_can_tx_csum(vnic->current_path)) - vnic->netdevice.features |= NETIF_F_IP_CSUM; - } } -static void vnic_handle_path_change(struct vnic * vnic, +static void vnic_handle_path_change(struct vnic *vnic, struct netpath **path) { - struct netpath * last_path = *path; + struct netpath *last_path = *path; if (!last_path) { if (vnic->current_path == &vnic->primary_path) @@ -611,18 +691,18 @@ static void vnic_handle_path_change(struct vnic * vnic, if (vnic->current_path && vnic->current_path->viport) viport_set_link(vnic->current_path->viport, - vnic->netdevice.flags, - vnic->netdevice.mtu); + vnic->netdevice->flags, + vnic->netdevice->mtu); if (last_path->viport) viport_set_link(last_path->viport, - vnic->netdevice.flags & - ~IFF_UP, vnic->netdevice.mtu); + vnic->netdevice->flags & + ~IFF_UP, vnic->netdevice->mtu); vnic_restart_xmit(vnic, vnic->current_path); } -static void vnic_report_path_change(struct vnic * vnic, +static void vnic_report_path_change(struct vnic *vnic, struct netpath *last_path, int other_path_ok) { @@ -666,15 +746,23 @@ static void vnic_report_path_change(struct vnic * vnic, } } -static void vnic_handle_free_vnic_evt(struct vnic * vnic) +static void vnic_handle_free_vnic_evt(struct vnic *vnic) { + unsigned long flags; + + if (!netif_queue_stopped(vnic->netdevice)) + netif_stop_queue(vnic->netdevice); + netpath_timer_stop(&vnic->primary_path); netpath_timer_stop(&vnic->secondary_path); + spin_lock_irqsave(&vnic->current_path_lock, flags); vnic->current_path = NULL; + spin_unlock_irqrestore(&vnic->current_path_lock, flags); netpath_free(&vnic->primary_path); netpath_free(&vnic->secondary_path); if (vnic->state == VNIC_REGISTERED) - unregister_netdev(&vnic->netdevice); + unregister_netdev(vnic->netdevice); + vnic_npevent_dequeue_all(vnic); kfree(vnic->config); if (vnic->mc_list_len) { @@ -682,22 +770,31 @@ static void vnic_handle_free_vnic_evt(struct vnic * vnic) kfree(vnic->mc_list); } - sysfs_remove_group(&vnic->class_dev_info.class_dev.kobj, - &vnic_dev_attr_group); - vnic_cleanup_stats_files(vnic); - class_device_unregister(&vnic->class_dev_info.class_dev); - wait_for_completion(&vnic->class_dev_info.released); + sysfs_remove_group(&vnic->class_dev_info.class_dev.kobj, + &vnic_dev_attr_group); + vnic_cleanup_stats_files(vnic); + class_device_unregister(&vnic->class_dev_info.class_dev); + wait_for_completion(&vnic->class_dev_info.released); + free_netdev(vnic->netdevice); } -static struct vnic * vnic_handle_npevent(struct vnic *vnic, - enum vnic_npevent_type npevt_type) +static struct vnic *vnic_handle_npevent(struct vnic *vnic, + enum vnic_npevent_type npevt_type, + int *failover_forced) { struct netpath *netpath; + const char *netpath_str; + + if (npevt_type <= VNIC_PRINP_LASTTYPE) + netpath_str = netpath_to_string(vnic, &vnic->primary_path); + else if (npevt_type <= VNIC_SECNP_LASTTYPE) + netpath_str = netpath_to_string(vnic, &vnic->secondary_path); + else + netpath_str = netpath_to_string(vnic, vnic->current_path); VNIC_INFO("%s: processing %s, netpath=%s, carrier=%d\n", vnic->config->name, vnic_npevent_str[npevt_type], - netpath_to_string(vnic, vnic->current_path), - vnic->carrier); + netpath_str, vnic->carrier); switch (npevt_type) { case VNIC_PRINP_CONNECTED: @@ -756,18 +853,34 @@ static struct vnic * vnic_handle_npevent(struct vnic *vnic, netpath->carrier = 0; update_path_and_reconnect(netpath, vnic); break; - case VNIC_NP_FREEVNIC: - vnic_handle_free_vnic_evt(vnic); - kfree(vnic); - vnic = NULL; - break; - case VNIC_NP_SETLINK: + case VNIC_PRINP_SETLINK: netpath = vnic->current_path; if (!netpath || !netpath->viport) break; viport_set_link(netpath->viport, - vnic->netdevice.flags, - vnic->netdevice.mtu); + vnic->netdevice->flags, + vnic->netdevice->mtu); + break; + case VNIC_SECNP_SETLINK: + netpath = &vnic->secondary_path; + if (!netpath || !netpath->viport) + break; + viport_set_link(netpath->viport, + vnic->netdevice->flags, + vnic->netdevice->mtu); + break; + case VNIC_FORCE_FAILOVER: + *failover_forced = 1; + break; + case VNIC_UNFAILOVER: + vnic->forced_failover = 0; + printk(KERN_INFO PFX "%s: Forced failover cleared.\n", + vnic->config->name); + break; + + case VNIC_NP_FREEVNIC: + vnic_handle_free_vnic_evt(vnic); + vnic = NULL; break; } return vnic; @@ -781,8 +894,7 @@ static int vnic_npevent_statemachine(void *context) int last_carrier; int other_path_ok = 0; struct netpath *last_path; - - daemonize("vnic_link_evt"); + int forced_failover; while (!vnic_npevent_thread_end || !list_empty(&vnic_npevent_list)) { @@ -791,6 +903,7 @@ static int vnic_npevent_statemachine(void *context) wait_event_interruptible(vnic_npevent_queue, !list_empty(&vnic_npevent_list) || vnic_npevent_thread_end); + forced_failover = 0; spin_lock_irqsave(&vnic_npevent_list_lock, flags); if (list_empty(&vnic_npevent_list)) { spin_unlock_irqrestore(&vnic_npevent_list_lock, @@ -801,8 +914,8 @@ static int vnic_npevent_statemachine(void *context) } vnic_link_evt = list_entry(vnic_npevent_list.next, - struct vnic_npevent, - list_ptrs); + struct vnic_npevent, + list_ptrs); list_del(&vnic_link_evt->list_ptrs); spin_unlock_irqrestore(&vnic_npevent_list_lock, flags); vnic = vnic_link_evt->vnic; @@ -814,7 +927,7 @@ static int vnic_npevent_statemachine(void *context) else if (vnic->current_path == &vnic->primary_path) other_path_ok = vnic->secondary_path.carrier; - vnic = vnic_handle_npevent(vnic, npevt_type); + vnic = vnic_handle_npevent(vnic, npevt_type, &forced_failover); if (!vnic) continue; @@ -826,13 +939,16 @@ static int vnic_npevent_statemachine(void *context) !vnic->current_path->carrier) { vnic->carrier = 0; vnic->current_path = NULL; - vnic->netdevice.features &= ~NETIF_F_IP_CSUM; + vnic->netdevice->features &= ~NETIF_F_IP_CSUM; } if (!vnic->carrier) vnic_carrier_loss(vnic, last_path); + else if (forced_failover) + vnic_forced_failover(vnic); else if ((vnic->current_path != &vnic->primary_path) && (vnic->config->prefer_primary) && + (!vnic->forced_failover) && (vnic->primary_path.carrier)) vnic_check_primary_path_timer(vnic); @@ -850,11 +966,11 @@ static int vnic_npevent_statemachine(void *context) if (vnic->carrier != last_carrier) { if (vnic->carrier) { VNIC_INFO("netif_carrier_on\n"); - netif_carrier_on(&vnic->netdevice); + netif_carrier_on(vnic->netdevice); vnic_carrier_loss_stats(vnic); } else { VNIC_INFO("netif_carrier_off\n"); - netif_carrier_off(&vnic->netdevice); + netif_carrier_off(vnic->netdevice); vnic_disconn_stats(vnic); } @@ -889,7 +1005,7 @@ void vnic_npevent_dequeue_evt(struct netpath *netpath, { unsigned long flags; struct vnic_npevent *npevt, *tmp; - struct vnic * vnic = netpath->parent; + struct vnic *vnic = netpath->parent; spin_lock_irqsave(&vnic_npevent_list_lock, flags); if (list_empty(&vnic_npevent_list)) @@ -911,11 +1027,15 @@ static int vnic_npevent_start(void) { VNIC_FUNCTION("vnic_npevent_start()\n"); - if ((vnic_npevent_thread = - kernel_thread(vnic_npevent_statemachine, NULL, 0)) < 0) { + spin_lock_init(&vnic_npevent_list_lock); + vnic_npevent_thread = kthread_run(vnic_npevent_statemachine, NULL, + "qlgc_vnic_npevent_s_m"); + if (IS_ERR(vnic_npevent_thread)) { printk(KERN_WARNING PFX "failed to create vnic npevent" - " thread; error %d\n", vnic_npevent_thread); - return vnic_npevent_thread; + " thread; error %d\n", + (int) PTR_ERR(vnic_npevent_thread)); + vnic_npevent_thread = NULL; + return 1; } return 0; @@ -923,37 +1043,23 @@ static int vnic_npevent_start(void) void vnic_npevent_cleanup(void) { - if (vnic_npevent_thread >= 0) { + if (vnic_npevent_thread) { vnic_npevent_thread_end = 1; wake_up(&vnic_npevent_queue); wait_for_completion(&vnic_npevent_thread_exit); - vnic_npevent_thread = -1; + vnic_npevent_thread = NULL; } } -struct vnic *vnic_allocate(struct vnic_config *config) +static void vnic_setup(struct net_device *device) { - struct vnic *vnic = NULL; - struct net_device *device; - - VNIC_FUNCTION("vnic_allocate()\n"); - vnic = kzalloc(sizeof *vnic, GFP_KERNEL); - if (!vnic) { - VNIC_ERROR("failed allocating vnic structure\n"); - return NULL; - } - - vnic->lock = SPIN_LOCK_UNLOCKED; - vnic_alloc_stats(vnic); - vnic->state = VNIC_UNINITIALIZED; - vnic->config = config; - device = &vnic->netdevice; - - strcpy(device->name, config->name); - ether_setup(device); - device->priv = (void *)vnic; + /* ether_setup is used to fill + * device parameters for ethernet devices. + * We override some of the parameters + * which are specific to VNIC. + */ device->get_stats = vnic_get_stats; device->open = vnic_open; device->stop = vnic_stop; @@ -964,11 +1070,34 @@ struct vnic *vnic_allocate(struct vnic_config *config) device->change_mtu = vnic_change_mtu; device->watchdog_timeo = 10 * HZ; device->features = 0; +} + +struct vnic *vnic_allocate(struct vnic_config *config) +{ + struct vnic *vnic = NULL; + struct net_device *netdev; + + VNIC_FUNCTION("vnic_allocate()\n"); + netdev = alloc_netdev((int) sizeof(*vnic), config->name, vnic_setup); + if (!netdev) { + VNIC_ERROR("failed allocating vnic structure\n"); + return NULL; + } + + vnic = netdev_priv(netdev); + vnic->netdevice = netdev; + spin_lock_init(&vnic->lock); + spin_lock_init(&vnic->current_path_lock); + vnic_alloc_stats(vnic); + vnic->state = VNIC_UNINITIALIZED; + vnic->config = config; + netpath_init(&vnic->primary_path, vnic, 0); netpath_init(&vnic->secondary_path, vnic, 1); vnic->current_path = NULL; + vnic->failed_over = 0; list_add_tail(&vnic->list_ptrs, &vnic_list); @@ -1005,22 +1134,26 @@ static int __init vnic_init(void) VNIC_FUNCTION("vnic_init()\n"); VNIC_INIT("Initializing %s\n", MODULEDETAILS); - if ((ret=config_start())) { + ret = config_start(); + if (ret) { VNIC_ERROR("config_start failed\n"); goto failure; } - if ((ret=vnic_ib_init())) { + ret = vnic_ib_init(); + if (ret) { VNIC_ERROR("ib_start failed\n"); goto failure; } - if ((ret=viport_start())) { + ret = viport_start(); + if (ret) { VNIC_ERROR("viport_start failed\n"); goto failure; } - if ((ret=vnic_npevent_start())) { + ret = vnic_npevent_start(); + if (ret) { VNIC_ERROR("vnic_npevent_start failed\n"); goto failure; } diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_main.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_main.h index 1931889..697aad0 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_main.h +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_main.h @@ -35,23 +35,44 @@ #include <linux/timex.h> #include <linux/netdevice.h> +#include <linux/kthread.h> #include "vnic_config.h" #include "vnic_netpath.h" +extern u16 vnic_max_mtu; +extern struct list_head vnic_list; +extern struct attribute_group vnic_stats_attr_group; +extern cycles_t vnic_recv_ref; + enum vnic_npevent_type { VNIC_PRINP_CONNECTED = 0, VNIC_PRINP_DISCONNECTED = 1, VNIC_PRINP_LINKUP = 2, VNIC_PRINP_LINKDOWN = 3, VNIC_PRINP_TIMEREXPIRED = 4, - VNIC_SECNP_CONNECTED = 5, - VNIC_SECNP_DISCONNECTED = 6, - VNIC_SECNP_LINKUP = 7, - VNIC_SECNP_LINKDOWN = 8, - VNIC_SECNP_TIMEREXPIRED = 9, - VNIC_NP_SETLINK = 10, - VNIC_NP_FREEVNIC = 11 + VNIC_PRINP_SETLINK = 5, + + /* used to figure out PRI vs SEC types for dbg msg*/ + VNIC_PRINP_LASTTYPE = VNIC_PRINP_SETLINK, + + VNIC_SECNP_CONNECTED = 6, + VNIC_SECNP_DISCONNECTED = 7, + VNIC_SECNP_LINKUP = 8, + VNIC_SECNP_LINKDOWN = 9, + VNIC_SECNP_TIMEREXPIRED = 10, + VNIC_SECNP_SETLINK = 11, + + /* used to figure out PRI vs SEC types for dbg msg*/ + VNIC_SECNP_LASTTYPE = VNIC_SECNP_SETLINK, + + VNIC_FORCE_FAILOVER = 12, + VNIC_UNFAILOVER = 13, + VNIC_NP_FREEVNIC = 14, + /* + * NOTE : If any new netpath event is being added, don't forget to + * add corresponding netpath event string into vnic_main.c. + */ }; struct vnic_npevent { @@ -77,17 +98,18 @@ struct vnic { struct netpath *current_path; struct netpath primary_path; struct netpath secondary_path; - int open; int carrier; - int xmit_started; + int forced_failover; + int failed_over; int mac_set; struct net_device_stats stats; - struct net_device netdevice; + struct net_device *netdevice; struct class_dev_info class_dev_info; struct dev_mc_list *mc_list; int mc_list_len; int mc_count; spinlock_t lock; + spinlock_t current_path_lock; #ifdef CONFIG_INFINIBAND_QLGC_VNIC_STATS struct { cycles_t start_time; @@ -100,6 +122,7 @@ struct vnic { u32 xmit_fail; cycles_t recv_time; u32 recv_num; + u32 multicast_recv_num; cycles_t xmit_ref; /* intermediate time */ cycles_t xmit_off_time; u32 xmit_off_num; @@ -115,6 +138,9 @@ struct vnic *vnic_allocate(struct vnic_config *config); void vnic_free(struct vnic *vnic); +void vnic_force_failover(struct vnic *vnic); +void vnic_unfailover(struct vnic *vnic); + void vnic_connected(struct vnic *vnic, struct netpath *netpath); void vnic_disconnected(struct vnic *vnic, struct netpath *netpath); @@ -127,5 +153,4 @@ void vnic_restart_xmit(struct vnic *vnic, struct netpath *netpath); void vnic_recv_packet(struct vnic *vnic, struct netpath *netpath, struct sk_buff *skb); void vnic_npevent_cleanup(void); - #endif /* VNIC_MAIN_H_INCLUDED */ diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_multicast.c b/drivers/infiniband/ulp/qlgc_vnic/vnic_multicast.c new file mode 100644 index 0000000..f40ea20 --- /dev/null +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_multicast.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2008 QLogic, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/jiffies.h> +#include <rdma/ib_sa.h> +#include "vnic_viport.h" +#include "vnic_main.h" +#include "vnic_util.h" + +static inline void vnic_set_multicast_state_invalid(struct viport *viport) +{ + viport->mc_info.state = MCAST_STATE_INVALID; + viport->mc_info.mc = NULL; + memset(&viport->mc_info.mgid, 0, sizeof(union ib_gid)); +} + +int vnic_mc_init(struct viport *viport) +{ + MCAST_FUNCTION("vnic_mc_init %p\n", viport); + vnic_set_multicast_state_invalid(viport); + viport->mc_info.retries = 0; + spin_lock_init(&viport->mc_info.lock); + + return 0; +} + +void vnic_mc_uninit(struct viport *viport) +{ + unsigned long flags; + MCAST_FUNCTION("vnic_mc_uninit %p\n", viport); + + spin_lock_irqsave(&viport->mc_info.lock, flags); + if ((viport->mc_info.state != MCAST_STATE_INVALID) && + (viport->mc_info.state != MCAST_STATE_RETRIED)) { + MCAST_ERROR("%s mcast state is not INVALID or RETRIED %d\n", + control_ifcfg_name(&viport->control), + viport->mc_info.state); + } + spin_unlock_irqrestore(&viport->mc_info.lock, flags); + MCAST_FUNCTION("vnic_mc_uninit done\n"); +} + + +/* This function is called when NEED_MCAST_COMPLETION is set. + * It finishes off the join multicast work. + */ +int vnic_mc_join_handle_completion(struct viport *viport) +{ + unsigned int ret = 0; + + MCAST_FUNCTION("vnic_mc_join_handle_completion()\n"); + if (viport->mc_info.state != MCAST_STATE_JOINING) { + MCAST_ERROR("%s unexpected mcast state in handle_completion: " + " %d\n", control_ifcfg_name(&viport->control), + viport->mc_info.state); + ret = -1; + goto out; + } + viport->mc_info.state = MCAST_STATE_ATTACHING; + MCAST_INFO("%s Attaching QP %lx mgid:" + VNIC_GID_FMT " mlid:%x\n", + control_ifcfg_name(&viport->control), jiffies, + VNIC_GID_RAW_ARG(viport->mc_info.mgid.raw), + viport->mc_info.mlid); + ret = ib_attach_mcast(viport->mc_data.ib_conn.qp, &viport->mc_info.mgid, + viport->mc_info.mlid); + if (ret) { + MCAST_ERROR("%s Attach mcast qp failed %d\n", + control_ifcfg_name(&viport->control), ret); + ret = -1; + goto out; + } + viport->mc_info.state = MCAST_STATE_JOINED_ATTACHED; + MCAST_INFO("%s UD QP successfully attached to mcast group\n", + control_ifcfg_name(&viport->control)); + +out: + return ret; +} + +/* NOTE: ib_sa.h says "returning a non-zero value from this callback will + * result in destroying the multicast tracking structure. + */ +static int vnic_mc_join_complete(int status, + struct ib_sa_multicast *multicast) +{ + struct viport *viport = (struct viport *)multicast->context; + unsigned long flags; + + MCAST_FUNCTION("vnic_mc_join_complete() status:%x\n", status); + if (status) { + spin_lock_irqsave(&viport->mc_info.lock, flags); + if (status == -ENETRESET) { + vnic_set_multicast_state_invalid(viport); + viport->mc_info.retries = 0; + spin_unlock_irqrestore(&viport->mc_info.lock, flags); + MCAST_ERROR("%s got ENETRESET\n", + control_ifcfg_name(&viport->control)); + goto out; + } + /* perhaps the mcgroup hasn't yet been created - retry */ + viport->mc_info.retries++; + viport->mc_info.mc = NULL; + if (viport->mc_info.retries > MAX_MCAST_JOIN_RETRIES) { + viport->mc_info.state = MCAST_STATE_RETRIED; + spin_unlock_irqrestore(&viport->mc_info.lock, flags); + MCAST_ERROR("%s join failed 0x%x - max retries:%d " + "exceeded\n", + control_ifcfg_name(&viport->control), + status, viport->mc_info.retries); + } else { + viport->mc_info.state = MCAST_STATE_INVALID; + spin_unlock_irqrestore(&viport->mc_info.lock, flags); + spin_lock_irqsave(&viport->lock, flags); + viport->updates |= NEED_MCAST_JOIN; + spin_unlock_irqrestore(&viport->lock, flags); + viport_kick(viport); + MCAST_ERROR("%s join failed 0x%x - retrying; " + "retries:%d\n", + control_ifcfg_name(&viport->control), + status, viport->mc_info.retries); + } + goto out; + } + + /* finish join work from main state loop for viport - in case + * the work itself cannot be done in a callback environment */ + spin_lock_irqsave(&viport->lock, flags); + viport->mc_info.mlid = be16_to_cpu(multicast->rec.mlid); + viport->updates |= NEED_MCAST_COMPLETION; + spin_unlock_irqrestore(&viport->lock, flags); + viport_kick(viport); + MCAST_INFO("%s setting NEED_MCAST_COMPLETION %x %x\n", + control_ifcfg_name(&viport->control), + multicast->rec.mlid, viport->mc_info.mlid); +out: + return status; +} + +void vnic_mc_join_setup(struct viport *viport, union ib_gid *mgid) +{ + unsigned long flags; + + MCAST_FUNCTION("in vnic_mc_join_setup\n"); + spin_lock_irqsave(&viport->mc_info.lock, flags); + if (viport->mc_info.state != MCAST_STATE_INVALID) { + if (viport->mc_info.state == MCAST_STATE_DETACHING) + MCAST_ERROR("%s detach in progress\n", + control_ifcfg_name(&viport->control)); + else if (viport->mc_info.state == MCAST_STATE_RETRIED) + MCAST_ERROR("%s max join retries exceeded\n", + control_ifcfg_name(&viport->control)); + else { + /* join/attach in progress or done */ + /* verify that the current mgid is same as prev mgid */ + if (memcmp(mgid, &viport->mc_info.mgid, sizeof(union ib_gid)) != 0) { + /* Separate MGID for each IOC */ + MCAST_ERROR("%s Multicast Group MGIDs not " + "unique; mgids: " VNIC_GID_FMT + " " VNIC_GID_FMT "\n", + control_ifcfg_name(&viport->control), + VNIC_GID_RAW_ARG(mgid->raw), + VNIC_GID_RAW_ARG(viport->mc_info.mgid.raw)); + } else + MCAST_INFO("%s join already issued: %d\n", + control_ifcfg_name(&viport->control), + viport->mc_info.state); + + } + spin_unlock_irqrestore(&viport->mc_info.lock, flags); + return; + } + viport->mc_info.mgid = *mgid; + spin_unlock_irqrestore(&viport->mc_info.lock, flags); + spin_lock_irqsave(&viport->lock, flags); + viport->updates |= NEED_MCAST_JOIN; + spin_unlock_irqrestore(&viport->lock, flags); + viport_kick(viport); + MCAST_INFO("%s setting NEED_MCAST_JOIN \n", + control_ifcfg_name(&viport->control)); +} + +int vnic_mc_join(struct viport *viport) +{ + struct ib_sa_mcmember_rec rec; + ib_sa_comp_mask comp_mask; + unsigned long flags; + int ret = 0; + + MCAST_FUNCTION("vnic_mc_join()\n"); + if (!viport->mc_data.ib_conn.qp) { + MCAST_ERROR("%s qp is NULL\n", + control_ifcfg_name(&viport->control)); + ret = -1; + goto out; + } + spin_lock_irqsave(&viport->mc_info.lock, flags); + if (viport->mc_info.state != MCAST_STATE_INVALID) { + spin_unlock_irqrestore(&viport->mc_info.lock, flags); + MCAST_INFO("%s Multicast join already issued\n", + control_ifcfg_name(&viport->control)); + goto out; + } + viport->mc_info.state = MCAST_STATE_JOINING; + spin_unlock_irqrestore(&viport->mc_info.lock, flags); + + memset(&rec, 0, sizeof(rec)); + rec.join_state = 2; /* bit 1 is Nonmember */ + rec.mgid = viport->mc_info.mgid; + rec.port_gid = viport->config->path_info.path.sgid; + + comp_mask = IB_SA_MCMEMBER_REC_MGID | + IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_JOIN_STATE; + + MCAST_INFO("%s Joining Multicast group%lx mgid:" + VNIC_GID_FMT " port_gid: " VNIC_GID_FMT "\n", + control_ifcfg_name(&viport->control), jiffies, + VNIC_GID_RAW_ARG(rec.mgid.raw), + VNIC_GID_RAW_ARG(rec.port_gid.raw)); + + viport->mc_info.mc = ib_sa_join_multicast(&vnic_sa_client, + viport->config->ibdev, viport->config->port, + &rec, comp_mask, GFP_KERNEL, + vnic_mc_join_complete, viport); + + if (IS_ERR(viport->mc_info.mc)) { + MCAST_ERROR("%s Multicast joining failed " VNIC_GID_FMT + ".\n", + control_ifcfg_name(&viport->control), + VNIC_GID_RAW_ARG(rec.mgid.raw)); + viport->mc_info.state = MCAST_STATE_INVALID; + ret = -1; + goto out; + } + MCAST_INFO("%s Multicast group join issued mgid:" + VNIC_GID_FMT " port_gid: " VNIC_GID_FMT "\n", + control_ifcfg_name(&viport->control), + VNIC_GID_RAW_ARG(rec.mgid.raw), + VNIC_GID_RAW_ARG(rec.port_gid.raw)); +out: + return ret; +} + +void vnic_mc_leave(struct viport *viport) +{ + unsigned long flags; + unsigned int ret; + struct ib_sa_multicast *mc; + + MCAST_FUNCTION("vnic_mc_leave()\n"); + + spin_lock_irqsave(&viport->mc_info.lock, flags); + if ((viport->mc_info.state == MCAST_STATE_INVALID) || + (viport->mc_info.state == MCAST_STATE_RETRIED)) { + spin_unlock_irqrestore(&viport->mc_info.lock, flags); + return; + } + + if (viport->mc_info.state == MCAST_STATE_JOINED_ATTACHED) { + + viport->mc_info.state = MCAST_STATE_DETACHING; + spin_unlock_irqrestore(&viport->mc_info.lock, flags); + ret = ib_detach_mcast(viport->mc_data.ib_conn.qp, + &viport->mc_info.mgid, + viport->mc_info.mlid); + if (ret) { + MCAST_ERROR("%s UD QP Detach failed %d\n", + control_ifcfg_name(&viport->control), ret); + return; + } + MCAST_INFO("%s UD QP detached succesfully\n", + control_ifcfg_name(&viport->control)); + spin_lock_irqsave(&viport->mc_info.lock, flags); + } + mc = viport->mc_info.mc; + vnic_set_multicast_state_invalid(viport); + viport->mc_info.retries = 0; + spin_unlock_irqrestore(&viport->mc_info.lock, flags); + + if (mc) { + MCAST_INFO("%s Freeing up multicast structure.\n", + control_ifcfg_name(&viport->control)); + ib_sa_free_multicast(mc); + } + MCAST_FUNCTION("vnic_mc_leave done\n"); + return; +} diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_multicast.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_multicast.h new file mode 100644 index 0000000..e049180 --- /dev/null +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_multicast.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2008 QLogic, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __VNIC_MULTICAST_H__ +#define __VNIC_MULTTCAST_H__ + +enum { + MCAST_STATE_INVALID = 0x00, /* join not attempted or failed */ + MCAST_STATE_JOINING = 0x01, /* join mcgroup in progress */ + MCAST_STATE_ATTACHING = 0x02, /* join completed with success, + * attach qp to mcgroup in progress + */ + MCAST_STATE_JOINED_ATTACHED = 0x03, /* join completed with success */ + MCAST_STATE_DETACHING = 0x04, /* detach qp in progress */ + MCAST_STATE_RETRIED = 0x05, /* retried join and failed */ +}; + +#define MAX_MCAST_JOIN_RETRIES 5 /* used to retry join */ + +struct mc_info { + u8 state; + spinlock_t lock; + union ib_gid mgid; + u16 mlid; + struct ib_sa_multicast *mc; + u8 retries; +}; + + +int vnic_mc_init(struct viport *viport); +void vnic_mc_uninit(struct viport *viport); +extern char *control_ifcfg_name(struct control *control); + +/* This function is called when a viport gets a multicast mgid from EVIC + and must join the multicast group. It sets up NEED_MCAST_JOIN flag, which + results in vnic_mc_join being called later. */ +void vnic_mc_join_setup(struct viport *viport, union ib_gid *mgid); + +/* This function is called when NEED_MCAST_JOIN flag is set. */ +int vnic_mc_join(struct viport *viport); + +/* This function is called when NEED_MCAST_COMPLETION is set. + It finishes off the join multicast work. */ +int vnic_mc_join_handle_completion(struct viport *viport); + +void vnic_mc_leave(struct viport *viport); + +#endif /* __VNIC_MULTICAST_H__ */ diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_netpath.c b/drivers/infiniband/ulp/qlgc_vnic/vnic_netpath.c index ce54608..195202e 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_netpath.c +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_netpath.c @@ -38,7 +38,7 @@ #include "vnic_viport.h" #include "vnic_netpath.h" -void vnic_npevent_timeout(unsigned long data) +static void vnic_npevent_timeout(unsigned long data) { struct netpath *netpath = (struct netpath *)data; diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_netpath.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_netpath.h index cc43c83..0a259eb 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_netpath.h +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_netpath.h @@ -51,7 +51,7 @@ struct netpath { struct vnic *parent; struct viport *viport; size_t path_idx; - u32 connect_time; + unsigned long connect_time; int second_bias; u8 is_primary_path; u8 delay_reconnect; diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_stats.c b/drivers/infiniband/ulp/qlgc_vnic/vnic_stats.c index 8981eed..4f89451 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_stats.c +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_stats.c @@ -36,7 +36,7 @@ #include "vnic_main.h" -cycles_t recv_ref; +cycles_t vnic_recv_ref; /* * TODO: Statistics reporting for control path, data path, @@ -170,6 +170,17 @@ static ssize_t show_recvs(struct class_device *class_dev, char *buf) static CLASS_DEVICE_ATTR(recvs, S_IRUGO, show_recvs, NULL); +static ssize_t show_multicast_recvs(struct class_device *class_dev, char *buf) +{ + struct class_dev_info *info = + container_of(class_dev, struct class_dev_info, class_dev); + struct vnic *vnic = container_of(info, struct vnic, stat_info); + + return sprintf(buf, "%d\n", vnic->statistics.multicast_recv_num); +} + +static CLASS_DEVICE_ATTR(multicast_recvs, S_IRUGO, show_multicast_recvs, NULL); + static ssize_t show_total_xmit_time(struct class_device *class_dev, char *buf) { @@ -206,12 +217,13 @@ static ssize_t show_failed_xmits(struct class_device *class_dev, char *buf) static CLASS_DEVICE_ATTR(failed_xmits, S_IRUGO, show_failed_xmits, NULL); -static struct attribute * vnic_stats_attrs[] = { +static struct attribute *vnic_stats_attrs[] = { &class_device_attr_lifetime.attr, &class_device_attr_xmits.attr, &class_device_attr_total_xmit_time.attr, &class_device_attr_failed_xmits.attr, &class_device_attr_recvs.attr, + &class_device_attr_multicast_recvs.attr, &class_device_attr_total_recv_time.attr, &class_device_attr_connection_time.attr, &class_device_attr_disconnects.attr, diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_stats.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_stats.h index 2933173..40d5291 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_stats.h +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_stats.h @@ -38,9 +38,6 @@ #ifdef CONFIG_INFINIBAND_QLGC_VNIC_STATS -extern struct attribute_group vnic_stats_attr_group; -extern cycles_t recv_ref; - static inline void vnic_connected_stats(struct vnic *vnic) { if (vnic->statistics.conn_time == 0) { @@ -57,7 +54,7 @@ static inline void vnic_connected_stats(struct vnic *vnic) } -static inline void vnic_stop_xmit_stats(struct vnic * vnic) +static inline void vnic_stop_xmit_stats(struct vnic *vnic) { if (vnic->statistics.xmit_ref == 0) vnic->statistics.xmit_ref = get_cycles(); @@ -75,10 +72,15 @@ static inline void vnic_restart_xmit_stats(struct vnic *vnic) static inline void vnic_recv_pkt_stats(struct vnic *vnic) { - vnic->statistics.recv_time += get_cycles() - recv_ref; + vnic->statistics.recv_time += get_cycles() - vnic_recv_ref; vnic->statistics.recv_num++; } +static inline void vnic_multicast_recv_pkt_stats(struct vnic *vnic) +{ + vnic->statistics.multicast_recv_num++; +} + static inline void vnic_pre_pkt_xmit_stats(cycles_t *time) { *time = get_cycles(); @@ -133,7 +135,7 @@ stats_out: return -1; } -static inline void vnic_cleanup_stats_files(struct vnic * vnic) +static inline void vnic_cleanup_stats_files(struct vnic *vnic) { sysfs_remove_group(&vnic->class_dev_info.class_dev.kobj, &vnic_stats_attr_group); @@ -161,7 +163,7 @@ static inline void control_note_rsptime_stats(cycles_t *time) } static inline void control_update_rsptime_stats(struct control *control, - cycles_t response_time) + cycles_t response_time) { response_time -= control->statistics.request_time; control->statistics.response_time += response_time; @@ -174,7 +176,7 @@ static inline void control_update_rsptime_stats(struct control *control, } -static inline void control_note_reqtime_stats(struct control * control) +static inline void control_note_reqtime_stats(struct control *control) { control->statistics.request_time = get_cycles(); } @@ -184,32 +186,32 @@ static inline void control_timeout_stats(struct control *control) control->statistics.timeout_num++; } -static inline void data_kickreq_stats(struct data * data) +static inline void data_kickreq_stats(struct data *data) { data->statistics.kick_reqs++; } -static inline void data_no_xmitbuf_stats(struct data * data) +static inline void data_no_xmitbuf_stats(struct data *data) { data->statistics.no_xmit_bufs++; } -static inline void data_xmits_stats(struct data * data) +static inline void data_xmits_stats(struct data *data) { data->statistics.xmit_num++; } -static inline void data_recvs_stats(struct data * data) +static inline void data_recvs_stats(struct data *data) { data->statistics.recv_num++; } static inline void data_note_kickrcv_time(void) { - recv_ref = get_cycles(); + vnic_recv_ref = get_cycles(); } -static inline void data_rcvkicks_stats(struct data * data) +static inline void data_rcvkicks_stats(struct data *data) { data->statistics.kick_recvs++; } @@ -238,11 +240,11 @@ static inline void vnic_ib_comp_stats(struct vnic_ib_conn *ib_conn, } -static inline void vnic_ib_io_stats(struct io * io, +static inline void vnic_ib_io_stats(struct io *io, struct vnic_ib_conn *ib_conn, cycles_t comp_time) { - if (io->type == RECV) + if ((io->type == RECV) || (io->type == RECV_UD)) io->time = comp_time; else if (io->type == RDMA) { ib_conn->statistics.rdma_comp_time += comp_time - io->time; @@ -312,7 +314,7 @@ static inline void vnic_connected_stats(struct vnic *vnic) ; } -static inline void vnic_stop_xmit_stats(struct vnic * vnic) +static inline void vnic_stop_xmit_stats(struct vnic *vnic) { ; } @@ -327,6 +329,11 @@ static inline void vnic_recv_pkt_stats(struct vnic *vnic) ; } +static inline void vnic_multicast_recv_pkt_stats(struct vnic *vnic) +{ + ; +} + static inline void vnic_pre_pkt_xmit_stats(cycles_t *time) { ; @@ -348,7 +355,7 @@ static inline int vnic_setup_stats_files(struct vnic *vnic) return 0; } -static inline void vnic_cleanup_stats_files(struct vnic * vnic) +static inline void vnic_cleanup_stats_files(struct vnic *vnic) { ; } @@ -374,12 +381,12 @@ static inline void control_note_rsptime_stats(cycles_t *time) } static inline void control_update_rsptime_stats(struct control *control, - cycles_t response_time) + cycles_t response_time) { ; } -static inline void control_note_reqtime_stats(struct control * control) +static inline void control_note_reqtime_stats(struct control *control) { ; } @@ -389,22 +396,22 @@ static inline void control_timeout_stats(struct control *control) ; } -static inline void data_kickreq_stats(struct data * data) +static inline void data_kickreq_stats(struct data *data) { ; } -static inline void data_no_xmitbuf_stats(struct data * data) +static inline void data_no_xmitbuf_stats(struct data *data) { ; } -static inline void data_xmits_stats(struct data * data) +static inline void data_xmits_stats(struct data *data) { ; } -static inline void data_recvs_stats(struct data * data) +static inline void data_recvs_stats(struct data *data) { ; } @@ -414,7 +421,7 @@ static inline void data_note_kickrcv_time(void) ; } -static inline void data_rcvkicks_stats(struct data * data) +static inline void data_rcvkicks_stats(struct data *data) { ; } @@ -440,7 +447,7 @@ static inline void vnic_ib_comp_stats(struct vnic_ib_conn *ib_conn, ; } -static inline void vnic_ib_io_stats(struct io * io, +static inline void vnic_ib_io_stats(struct io *io, struct vnic_ib_conn *ib_conn, cycles_t comp_time) { diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_sys.c b/drivers/infiniband/ulp/qlgc_vnic/vnic_sys.c index 508f3d2..a2f054a 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_sys.c +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_sys.c @@ -30,8 +30,8 @@ * SOFTWARE. */ -#include <linux/parser.h> #include <linux/netdevice.h> +#include <linux/parser.h> #include <linux/if.h> #include "vnic_util.h" @@ -41,8 +41,6 @@ #include "vnic_main.h" #include "vnic_stats.h" -extern struct list_head vnic_list; - /* * target eiocs are added by writing * @@ -59,6 +57,8 @@ enum { VNIC_OPT_RXCSUM = 1 << 5, VNIC_OPT_TXCSUM = 1 << 6, VNIC_OPT_HEARTBEAT = 1 << 7, + VNIC_OPT_IOC_STRING = 1 << 8, + VNIC_OPT_IB_MULTICAST = 1 << 9, VNIC_OPT_ALL = (VNIC_OPT_IOC_GUID | VNIC_OPT_DGID | VNIC_OPT_NAME | VNIC_OPT_PKEY), }; @@ -72,6 +72,8 @@ static match_table_t vnic_opt_tokens = { {VNIC_OPT_RXCSUM, "rx_csum=%s"}, {VNIC_OPT_TXCSUM, "tx_csum=%s"}, {VNIC_OPT_HEARTBEAT, "heartbeat=%d"}, + {VNIC_OPT_IOC_STRING, "ioc_string=\"%s"}, + {VNIC_OPT_IB_MULTICAST, "ib_multicast=%s"}, {VNIC_OPT_ERR, NULL} }; @@ -81,7 +83,6 @@ static void vnic_release_class_dev(struct class_device *class_dev) container_of(class_dev, struct class_dev_info, class_dev); complete(&cdev_info->released); - } struct class vnic_class = { @@ -100,7 +101,7 @@ static int vnic_parse_options(const char *buf, struct path_param *param) int opt_mask = 0; int token; int ret = -EINVAL; - int i; + int i, len; options = kstrdup(buf, GFP_KERNEL); if (!options) @@ -170,7 +171,7 @@ static int vnic_parse_options(const char *buf, struct path_param *param) if (token > 255 || token < 0) { printk(KERN_WARNING PFX "instance parameter must be" - " > 0 and <= 255\n"); + " >= 0 and <= 255\n"); goto out; } @@ -221,6 +222,42 @@ static int vnic_parse_options(const char *buf, struct path_param *param) } param->heartbeat = token; break; + case VNIC_OPT_IOC_STRING: + p = match_strdup(args); + len = strlen(p); + if (len > MAX_IOC_STRING_LEN) { + printk(KERN_WARNING PFX + "ioc string parameter too long\n"); + kfree(p); + goto out; + } + strcpy(param->ioc_string, p); + if (*(p + len - 1) != '\"') { + strcat(param->ioc_string, ","); + kfree(p); + p = strsep(&sep_opt, "\""); + strcat(param->ioc_string, p); + sep_opt++; + } else { + *(param->ioc_string + len - 1) = '\0'; + kfree(p); + } + break; + case VNIC_OPT_IB_MULTICAST: + p = match_strdup(args); + if (!strncmp(p, "true", 4)) + param->ib_multicast = 1; + else if (!strncmp(p, "false", 5)) + param->ib_multicast = 0; + else { + printk(KERN_WARNING PFX + "bad ib_multicast parameter." + " must be 'true' or 'false'\n"); + kfree(p); + goto out; + } + kfree(p); + break; default: printk(KERN_WARNING PFX "unknown parameter or missing value " @@ -294,23 +331,27 @@ static ssize_t show_tx_csum(struct class_device *class_dev, char *buf) static CLASS_DEVICE_ATTR(tx_csum, S_IRUGO, show_tx_csum, NULL); static ssize_t show_current_path(struct class_device *class_dev, char *buf) -{ - struct class_dev_info *info = - container_of(class_dev, struct class_dev_info, class_dev); - struct vnic *vnic = container_of(info, struct vnic, class_dev_info); - + { + struct class_dev_info *info = + container_of(class_dev, struct class_dev_info, class_dev); + struct vnic *vnic = container_of(info, struct vnic, class_dev_info); + unsigned long flags; + size_t length; + + spin_lock_irqsave(&vnic->current_path_lock, flags); if (vnic->current_path == &vnic->primary_path) - return sprintf(buf, "primary path\n"); + length = sprintf(buf, "primary_path\n"); else if (vnic->current_path == &vnic->secondary_path) - return sprintf(buf, "secondary path\n"); + length = sprintf(buf, "secondary path\n"); else - return sprintf(buf, "none\n"); - + length = sprintf(buf, "none\n"); + spin_unlock_irqrestore(&vnic->current_path_lock, flags); + return length; } static CLASS_DEVICE_ATTR(current_path, S_IRUGO, show_current_path, NULL); -static struct attribute * vnic_dev_attrs[] = { +static struct attribute *vnic_dev_attrs[] = { &class_device_attr_vnic_state.attr, &class_device_attr_rx_csum.attr, &class_device_attr_tx_csum.attr, @@ -322,6 +363,25 @@ struct attribute_group vnic_dev_attr_group = { .attrs = vnic_dev_attrs, }; +static inline void print_dgid(u8 *dgid) +{ + int i; + + for (i = 0; i < 16; i += 2) + printk("%04x", be16_to_cpu(*(__be16 *)&dgid[i])); +} + +static inline int is_dgid_zero(u8 *dgid) +{ + int i; + + for (i = 0; i < 16; i++) { + if (dgid[i] != 0) + return 1; + } + return 0; +} + static int create_netpath(struct netpath *npdest, struct path_param *p_params) { @@ -337,7 +397,8 @@ static int create_netpath(struct netpath *npdest, viport_config = vnic->primary_path.viport->config; if ((viport_config->ioc_guid == p_params->ioc_guid) && (viport_config->control_config.vnic_instance - == p_params->instance)) { + == p_params->instance) + && (be64_to_cpu(p_params->ioc_guid))) { SYS_ERROR("GUID %llx," " INSTANCE %d already in use\n", be64_to_cpu(p_params->ioc_guid), @@ -351,7 +412,8 @@ static int create_netpath(struct netpath *npdest, viport_config = vnic->secondary_path.viport->config; if ((viport_config->ioc_guid == p_params->ioc_guid) && (viport_config->control_config.vnic_instance - == p_params->instance)) { + == p_params->instance) + && (be64_to_cpu(p_params->ioc_guid))) { SYS_ERROR("GUID %llx," " INSTANCE %d already in use\n", be64_to_cpu(p_params->ioc_guid), @@ -396,13 +458,26 @@ static int create_netpath(struct netpath *npdest, npdest->viport = viport; viport->parent = npdest; viport->vnic = npdest->parent; - viport_kick(viport); - vnic_disconnected(npdest->parent, npdest); + + if (is_dgid_zero(p_params->dgid) && p_params->ioc_guid != 0 + && p_params->pkey != 0) { + viport_kick(viport); + vnic_disconnected(npdest->parent, npdest); + } else { + printk(KERN_WARNING "Specified parameters IOCGUID=%llx, " + "P_Key=%x, DGID=", be64_to_cpu(p_params->ioc_guid), + p_params->pkey); + print_dgid(p_params->dgid); + printk(" insufficient for establishing %s path for interface " + "%s. Hence, path will not be established.\n", + (npdest->second_bias ? "secondary" : "primary"), + p_params->name); + } out: return ret; } -struct vnic *create_vnic(struct path_param *param) +static struct vnic *create_vnic(struct path_param *param) { struct vnic_config *vnic_config; struct vnic *vnic; @@ -476,8 +551,62 @@ free_vnic_config: return NULL; } -ssize_t vnic_delete(struct class_device * class_dev, - const char *buf, size_t count) +static ssize_t vnic_sysfs_force_failover(struct class_device *class_dev, + const char *buf, size_t count) +{ + struct vnic *vnic; + struct list_head *ptr; + int ret = -EINVAL; + + if (count > IFNAMSIZ) { + printk(KERN_WARNING PFX "invalid vnic interface name\n"); + return ret; + } + + SYS_INFO("vnic_sysfs_force_failover: name = %s\n", buf); + list_for_each(ptr, &vnic_list) { + vnic = list_entry(ptr, struct vnic, list_ptrs); + if (!strcmp(vnic->config->name, buf)) { + vnic_force_failover(vnic); + return count; + } + } + + printk(KERN_WARNING PFX "vnic interface '%s' does not exist\n", buf); + return ret; +} + +CLASS_DEVICE_ATTR(force_failover, S_IWUSR, NULL, vnic_sysfs_force_failover); + +static ssize_t vnic_sysfs_unfailover(struct class_device *class_dev, + const char *buf, size_t count) +{ + struct vnic *vnic; + struct list_head *ptr; + int ret = -EINVAL; + + if (count > IFNAMSIZ) { + printk(KERN_WARNING PFX "invalid vnic interface name\n"); + return ret; + } + + SYS_INFO("vnic_sysfs_unfailover: name = %s\n", buf); + list_for_each(ptr, &vnic_list) { + vnic = list_entry(ptr, struct vnic, list_ptrs); + if (!strcmp(vnic->config->name, buf)) { + vnic_unfailover(vnic); + return count; + } + } + + printk(KERN_WARNING PFX "vnic interface '%s' does not exist\n", buf); + return ret; +} + +CLASS_DEVICE_ATTR(unfailover, S_IWUSR, NULL, vnic_sysfs_unfailover); + +static ssize_t vnic_delete(struct class_device *class_dev, + const char *buf, size_t count) { struct vnic *vnic; struct list_head *ptr; @@ -501,6 +630,8 @@ ssize_t vnic_delete(struct class_device * class_dev, return ret; } +CLASS_DEVICE_ATTR(delete_vnic, S_IWUSR, NULL, vnic_delete); + static ssize_t show_viport_state(struct class_device *class_dev, char *buf) { struct class_dev_info *info = @@ -625,10 +756,137 @@ static ssize_t show_heartbeat(struct class_device *class_dev, char *buf) static CLASS_DEVICE_ATTR(heartbeat, S_IRUGO, show_heartbeat, NULL); -static struct attribute * vnic_path_attrs[] = { +static ssize_t show_ioc_guid(struct class_device *class_dev, char *buf) +{ + struct class_dev_info *info = + container_of(class_dev, struct class_dev_info, class_dev); + + struct netpath *path = + container_of(info, struct netpath, class_dev_info); + + return sprintf(buf, "%llx\n", + __be64_to_cpu(path->viport->config->ioc_guid)); +} + +static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); + +static inline void get_dgid_string(u8 *dgid, char *buf) +{ + int i; + char holder[5]; + + for (i = 0; i < 16; i += 2) { + sprintf(holder, "%04x", be16_to_cpu(*(__be16 *)&dgid[i])); + strcat(buf, holder); + } + + strcat(buf, "\n"); +} + +static ssize_t show_dgid(struct class_device *class_dev, char *buf) +{ + struct class_dev_info *info = + container_of(class_dev, struct class_dev_info, class_dev); + + struct netpath *path = + container_of(info, struct netpath, class_dev_info); + + get_dgid_string(path->viport->config->path_info.path.dgid.raw, buf); + + return strlen(buf); +} + +static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); + +static ssize_t show_pkey(struct class_device *class_dev, char *buf) +{ + struct class_dev_info *info = + container_of(class_dev, struct class_dev_info, class_dev); + + struct netpath *path = + container_of(info, struct netpath, class_dev_info); + + return sprintf(buf, "%x\n", path->viport->config->path_info.path.pkey); +} + +static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); + +static ssize_t show_hca_info(struct class_device *class_dev, char *buf) +{ + struct class_dev_info *info = + container_of(class_dev, struct class_dev_info, class_dev); + + struct netpath *path = + container_of(info, struct netpath, class_dev_info); + + return sprintf(buf, "vnic-%s-%d\n", path->viport->config->ibdev->name, + path->viport->config->port); +} + +static CLASS_DEVICE_ATTR(hca_info, S_IRUGO, show_hca_info, NULL); + +static ssize_t show_ioc_string(struct class_device *class_dev, char *buf) +{ + struct class_dev_info *info = + container_of(class_dev, struct class_dev_info, class_dev); + + struct netpath *path = + container_of(info, struct netpath, class_dev_info); + + return sprintf(buf, "%s\n", path->viport->config->ioc_string); +} + +static CLASS_DEVICE_ATTR(ioc_string, S_IRUGO, show_ioc_string, NULL); + +static ssize_t show_multicast_state(struct class_device *class_dev, char *buf) +{ + struct class_dev_info *info = + container_of(class_dev, struct class_dev_info, class_dev); + + struct netpath *path = + container_of(info, struct netpath, class_dev_info); + + if (!(path->viport->features_supported & VNIC_FEAT_INBOUND_IB_MC)) + return sprintf(buf, "feature not enabled\n"); + + switch (path->viport->mc_info.state) { + case MCAST_STATE_INVALID: + return sprintf(buf, "state=Invalid\n"); + case MCAST_STATE_JOINING: + return sprintf(buf, "state=Joining MGID:" VNIC_GID_FMT "\n", + VNIC_GID_RAW_ARG(path->viport->mc_info.mgid.raw)); + case MCAST_STATE_ATTACHING: + return sprintf(buf, "state=Attaching MGID:" VNIC_GID_FMT + " MLID:%X\n", + VNIC_GID_RAW_ARG(path->viport->mc_info.mgid.raw), + path->viport->mc_info.mlid); + case MCAST_STATE_JOINED_ATTACHED: + return sprintf(buf, + "state=Joined & Attached MGID:" VNIC_GID_FMT + " MLID:%X\n", + VNIC_GID_RAW_ARG(path->viport->mc_info.mgid.raw), + path->viport->mc_info.mlid); + case MCAST_STATE_DETACHING: + return sprintf(buf, "state=Detaching MGID: " VNIC_GID_FMT "\n", + VNIC_GID_RAW_ARG(path->viport->mc_info.mgid.raw)); + case MCAST_STATE_RETRIED: + return sprintf(buf, "state=Retries Exceeded\n"); + } + return sprintf(buf, "invalid state\n"); +} + +static CLASS_DEVICE_ATTR(multicast_state, S_IRUGO, show_multicast_state, NULL); + +static struct attribute *vnic_path_attrs[] = { &class_device_attr_viport_state.attr, &class_device_attr_link_state.attr, &class_device_attr_heartbeat.attr, + &class_device_attr_ioc_guid.attr, + &class_device_attr_dgid.attr, + &class_device_attr_pkey.attr, + &class_device_attr_hca_info.attr, + &class_device_attr_ioc_string.attr, + &class_device_attr_multicast_state.attr, NULL }; @@ -668,8 +926,133 @@ out: } -ssize_t vnic_create_primary(struct class_device * class_dev, - const char *buf, size_t count) +static inline void update_dgids(u8 *old, u8 *new, char *vnic_name, + char *path_name) +{ + int i; + + if (!memcmp(old, new, 16)) + return; + + printk(KERN_INFO PFX "Changing dgid from 0x"); + print_dgid(old); + printk(" to 0x"); + print_dgid(new); + printk(" for %s path of %s\n", path_name, vnic_name); + for (i = 0; i < 16; i++) + old[i] = new[i]; +} + +static inline void update_ioc_guids(struct path_param *params, + struct netpath *path, + char *vnic_name, char *path_name) +{ + u64 sid; + + if (path->viport->config->ioc_guid == params->ioc_guid) + return; + + printk(KERN_INFO PFX "Changing IOC GUID from 0x%llx to 0x%llx " + "for %s path of %s\n", + __be64_to_cpu(path->viport->config->ioc_guid), + __be64_to_cpu(params->ioc_guid), path_name, vnic_name); + + path->viport->config->ioc_guid = params->ioc_guid; + + sid = (SST_AGN << 56) | (SST_OUI << 32) | (CONTROL_PATH_ID << 8) + | IOC_NUMBER(be64_to_cpu(params->ioc_guid)); + + path->viport->config->control_config.ib_config.service_id = + cpu_to_be64(sid); + + sid = (SST_AGN << 56) | (SST_OUI << 32) | (DATA_PATH_ID << 8) + | IOC_NUMBER(be64_to_cpu(params->ioc_guid)); + + path->viport->config->data_config.ib_config.service_id = + cpu_to_be64(sid); +} + +static inline void update_pkeys(__be16 *old, __be16 *new, char *vnic_name, + char *path_name) +{ + if (*old == *new) + return; + + printk(KERN_INFO PFX "Changing P_Key from 0x%x to 0x%x " + "for %s path of %s\n", *old, *new, + path_name, vnic_name); + *old = *new; +} + +static void update_ioc_strings(struct path_param *params, struct netpath *path, + char *path_name) +{ + if (!strcmp(params->ioc_string, path->viport->config->ioc_string)) + return; + + printk(KERN_INFO PFX "Changing ioc_string to %s for %s path of %s\n", + params->ioc_string, path_name, params->name); + + strcpy(path->viport->config->ioc_string, params->ioc_string); +} + +static void update_path_parameters(struct path_param *params, + struct netpath *path) +{ + update_dgids(path->viport->config->path_info.path.dgid.raw, + params->dgid, params->name, + (path->second_bias ? "secondary" : "primary")); + + update_ioc_guids(params, path, params->name, + (path->second_bias ? "secondary" : "primary")); + + update_pkeys(&path->viport->config->path_info.path.pkey, + ¶ms->pkey, params->name, + (path->second_bias ? "secondary" : "primary")); + + update_ioc_strings(params, path, + (path->second_bias ? "secondary" : "primary")); +} + +static ssize_t update_params_and_connect(struct path_param *params, + struct netpath *path, size_t count) +{ + if (is_dgid_zero(params->dgid) && params->ioc_guid != 0 && + params->pkey != 0) { + + if (!memcmp(path->viport->config->path_info.path.dgid.raw, + params->dgid, 16) && + params->ioc_guid == path->viport->config->ioc_guid && + params->pkey == path->viport->config->path_info.path.pkey) { + + printk(KERN_WARNING PFX "All of the dgid, ioc_guid and " + "pkeys are same as the existing" + " one. Not updating values.\n"); + return -EINVAL; + } else { + if (path->viport->state == VIPORT_CONNECTED) { + printk(KERN_WARNING PFX "%s path of %s " + "interface is already in connected " + "state. Not updating values.\n", + (path->second_bias ? "Secondary" : "Primary"), + path->parent->config->name); + return -EINVAL; + } else { + update_path_parameters(params, path); + viport_kick(path->viport); + vnic_disconnected(path->parent, path); + return count; + } + } + } else { + printk(KERN_WARNING PFX "Either dgid, iocguid, pkey is zero. " + "No update.\n"); + return -EINVAL; + } +} + +static ssize_t vnic_create_primary(struct class_device *class_dev, + const char *buf, size_t count) { struct class_dev_info *cdev = container_of(class_dev, struct class_dev_info, class_dev); @@ -679,17 +1062,30 @@ ssize_t vnic_create_primary(struct class_device * class_dev, struct path_param param; int ret = -EINVAL; struct vnic *vnic; + struct list_head *ptr; param.instance = 0; param.rx_csum = -1; param.tx_csum = -1; param.heartbeat = -1; + param.ib_multicast = -1; + *param.ioc_string = '\0'; ret = vnic_parse_options(buf, ¶m); if (ret) goto out; + list_for_each(ptr, &vnic_list) { + vnic = list_entry(ptr, struct vnic, list_ptrs); + if (!strcmp(vnic->config->name, param.name)) { + ret = update_params_and_connect(¶m, + &vnic->primary_path, + count); + goto out; + } + } + param.ibdev = target->dev->dev; param.ibport = target; param.port = target->port_num; @@ -723,8 +1119,10 @@ out: return ret; } -ssize_t vnic_create_secondary(struct class_device * class_dev, - const char *buf, size_t count) +CLASS_DEVICE_ATTR(create_primary, S_IWUSR, NULL, vnic_create_primary); + +static ssize_t vnic_create_secondary(struct class_device *class_dev, + const char *buf, size_t count) { struct class_dev_info *cdev = container_of(class_dev, struct class_dev_info, class_dev); @@ -732,7 +1130,7 @@ ssize_t vnic_create_secondary(struct class_device * class_dev, container_of(cdev, struct vnic_ib_port, cdev_info); struct path_param param; - struct vnic *vnic; + struct vnic *vnic = NULL; int ret = -EINVAL; struct list_head *ptr; int found = 0; @@ -741,6 +1139,8 @@ ssize_t vnic_create_secondary(struct class_device * class_dev, param.rx_csum = -1; param.tx_csum = -1; param.heartbeat = -1; + param.ib_multicast = -1; + *param.ioc_string = '\0'; ret = vnic_parse_options(buf, ¶m); @@ -750,6 +1150,12 @@ ssize_t vnic_create_secondary(struct class_device * class_dev, list_for_each(ptr, &vnic_list) { vnic = list_entry(ptr, struct vnic, list_ptrs); if (!strncmp(vnic->config->name, param.name, IFNAMSIZ)) { + if (vnic->secondary_path.viport) { + ret = update_params_and_connect(¶m, + &vnic->secondary_path, + count); + goto out; + } found = 1; break; } @@ -784,3 +1190,5 @@ free_vnic: out: return ret; } + +CLASS_DEVICE_ATTR(create_secondary, S_IWUSR, NULL, vnic_create_secondary); diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_sys.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_sys.h index 5835c4a..bf0f379 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_sys.h +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_sys.h @@ -42,13 +42,10 @@ extern struct class vnic_class; extern struct class_dev_info interface_cdev; extern struct attribute_group vnic_dev_attr_group; extern struct attribute_group vnic_path_attr_group; +extern struct class_device_attribute class_device_attr_create_primary; +extern struct class_device_attribute class_device_attr_create_secondary; +extern struct class_device_attribute class_device_attr_delete_vnic; +extern struct class_device_attribute class_device_attr_force_failover; +extern struct class_device_attribute class_device_attr_unfailover; -extern ssize_t vnic_create_primary(struct class_device *class_dev, - const char *buf, size_t count); - -extern ssize_t vnic_create_secondary(struct class_device *class_dev, - const char *buf, size_t count); - -extern ssize_t vnic_delete(struct class_device *class_dev, - const char *buf, size_t count); #endif /*VNIC_SYS_H_INCLUDED*/ diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_util.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_util.h index 6e27d82..095fa3a 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_util.h +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_util.h @@ -38,7 +38,6 @@ #define VNIC_MAJORVERSION 1 #define VNIC_MINORVERSION 1 -#define is_power_of2(value) (((value) & ((value - 1))) == 0) #define ALIGN_DOWN(x, a) ((x)&(~((a)-1))) extern u32 vnic_debug; @@ -49,34 +48,23 @@ enum { DEBUG_IB_FSTATUS = 0x00000004, DEBUG_IB_ASSERTS = 0x00000008, DEBUG_CONTROL_INFO = 0x00000010, - DEBUG_CONTROL_FUNCTION = 0x00000020, - DEBUG_CONTROL_PACKET = 0x00000040, + DEBUG_CONTROL_FUNCTION = 0x00000020, + DEBUG_CONTROL_PACKET = 0x00000040, DEBUG_CONFIG_INFO = 0x00000100, DEBUG_DATA_INFO = 0x00001000, DEBUG_DATA_FUNCTION = 0x00002000, DEBUG_NETPATH_INFO = 0x00010000, DEBUG_VIPORT_INFO = 0x00100000, - DEBUG_VIPORT_FUNCTION = 0x00200000, + DEBUG_VIPORT_FUNCTION = 0x00200000, DEBUG_LINK_STATE = 0x00400000, DEBUG_VNIC_INFO = 0x01000000, DEBUG_VNIC_FUNCTION = 0x02000000, + DEBUG_MCAST_INFO = 0x04000000, + DEBUG_MCAST_FUNCTION = 0x08000000, DEBUG_SYS_INFO = 0x10000000, DEBUG_SYS_VERBOSE = 0x40000000 }; -#ifdef CONFIG_INFINIBAND_QLGC_VNIC_DEBUG -#define PRINT(level, x, fmt, arg...) \ - printk(level "%s: %s: %s, line %d: " fmt, \ - MODULE_NAME, x, __FILE__, __LINE__, ##arg) - -#define PRINT_CONDITIONAL(level, x, condition, fmt, arg...) \ - do { \ - if (condition) \ - printk(level "%s: %s: %s, line %d: " fmt, \ - MODULE_NAME, x, __FILE__, __LINE__, \ - ##arg); \ - } while(0) -#else #define PRINT(level, x, fmt, arg...) \ printk(level "%s: " fmt, MODULE_NAME, ##arg) @@ -85,8 +73,7 @@ enum { if (condition) \ printk(level "%s: %s: " fmt, \ MODULE_NAME, x, ##arg); \ - } while(0) -#endif /*CONFIG_INFINIBAND_QLGC_VNIC_DEBUG*/ + } while (0) #define IB_PRINT(fmt, arg...) \ PRINT(KERN_INFO, "IB", fmt, ##arg) @@ -110,8 +97,8 @@ enum { if ((vnic_debug & DEBUG_IB_ASSERTS) && !(x)) \ panic("%s assertion failed, file: %s," \ " line %d: ", \ - MODULE_NAME,__FILE__,__LINE__) \ - } while(0) + MODULE_NAME, __FILE__, __LINE__) \ + } while (0) #define CONTROL_PRINT(fmt, arg...) \ PRINT(KERN_INFO, "CONTROL", fmt, ##arg) @@ -126,15 +113,15 @@ enum { #define CONTROL_FUNCTION(fmt, arg...) \ PRINT_CONDITIONAL(KERN_INFO, \ - "CONTROL", \ - (vnic_debug & DEBUG_CONTROL_FUNCTION), \ - fmt, ##arg) + "CONTROL", \ + (vnic_debug & DEBUG_CONTROL_FUNCTION), \ + fmt, ##arg) #define CONTROL_PACKET(pkt) \ do { \ if (vnic_debug & DEBUG_CONTROL_PACKET) \ control_log_control_packet(pkt); \ - } while(0) + } while (0) #define CONFIG_PRINT(fmt, arg...) \ PRINT(KERN_INFO, "CONFIG", fmt, ##arg) @@ -164,6 +151,24 @@ enum { (vnic_debug & DEBUG_DATA_FUNCTION), \ fmt, ##arg) + +#define MCAST_PRINT(fmt, arg...) \ + PRINT(KERN_INFO, "MCAST", fmt, ##arg) +#define MCAST_ERROR(fmt, arg...) \ + PRINT(KERN_ERR, "MCAST", fmt, ##arg) + +#define MCAST_INFO(fmt, arg...) \ + PRINT_CONDITIONAL(KERN_INFO, \ + "MCAST", \ + (vnic_debug & DEBUG_MCAST_INFO), \ + fmt, ##arg) + +#define MCAST_FUNCTION(fmt, arg...) \ + PRINT_CONDITIONAL(KERN_INFO, \ + "MCAST", \ + (vnic_debug & DEBUG_MCAST_FUNCTION), \ + fmt, ##arg) + #define NETPATH_PRINT(fmt, arg...) \ PRINT(KERN_INFO, "NETPATH", fmt, ##arg) #define NETPATH_ERROR(fmt, arg...) \ diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_viport.c b/drivers/infiniband/ulp/qlgc_vnic/vnic_viport.c index 75e02e5..7462403 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_viport.c +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_viport.c @@ -46,23 +46,22 @@ #include "vnic_config.h" #include "vnic_control_pkt.h" -#define VIPORT_DISCONN_TIMER 10000 /*in ms*/ +#define VIPORT_DISCONN_TIMER 10000 /* 10 seconds */ -#define MAX_RETRY_INTERVAL 20000 /* 20 seconds*/ -#define RETRY_INCREMENT 5000 /* 5 seconds */ -#define MAX_CONNECT_RETRY_TIMEOUT 600000 /* 10 minutes*/ +#define MAX_RETRY_INTERVAL 20000 /* 20 seconds */ +#define RETRY_INCREMENT 5000 /* 5 seconds */ +#define MAX_CONNECT_RETRY_TIMEOUT 600000 /* 10 minutes */ -DECLARE_WAIT_QUEUE_HEAD(viport_queue); -LIST_HEAD(viport_list); -DECLARE_COMPLETION(viport_thread_exit); -spinlock_t viport_list_lock = SPIN_LOCK_UNLOCKED; +static DECLARE_WAIT_QUEUE_HEAD(viport_queue); +static LIST_HEAD(viport_list); +static DECLARE_COMPLETION(viport_thread_exit); +static spinlock_t viport_list_lock; -int viport_thread = -1; -int viport_thread_end = 0; -static u32 total_retry_duration = 0; -static u32 retry_duration = 0; +static struct task_struct *viport_thread; +static int viport_thread_end; static void viport_timer(struct viport *viport, int timeout); + struct viport *viport_allocate(struct viport_config *config) { struct viport *viport; @@ -81,34 +80,36 @@ struct viport *viport_allocate(struct viport_config *config) viport->new_flags = 0; viport->config = config; viport->connect = DELAY; - + viport->data.max_mtu = vnic_max_mtu; spin_lock_init(&viport->lock); init_waitqueue_head(&viport->stats_queue); init_waitqueue_head(&viport->disconnect_queue); + init_waitqueue_head(&viport->reference_queue); INIT_LIST_HEAD(&viport->list_ptrs); + vnic_mc_init(viport); + return viport; } -void viport_connect(struct viport * viport, int delay) +void viport_connect(struct viport *viport, int delay) { VIPORT_FUNCTION("viport_connect()\n"); if (viport->connect != DELAY) viport->connect = (delay) ? DELAY : NOW; - if (viport->link_state == LINK_FIRSTCONNECT){ + if (viport->link_state == LINK_FIRSTCONNECT) { u32 duration; duration = (net_random() & 0x1ff); if (!viport->parent->is_primary_path) duration += 0x1ff; viport->link_state = LINK_RETRYWAIT; - viport_timer(viport,duration); - } - else + viport_timer(viport, duration); + } else viport_kick(viport); } -void viport_disconnect(struct viport *viport) +static void viport_disconnect(struct viport *viport) { VIPORT_FUNCTION("viport_disconnect()\n"); viport->disconnect = 1; @@ -120,13 +121,15 @@ void viport_free(struct viport *viport) { VIPORT_FUNCTION("viport_free()\n"); viport_disconnect(viport); /* NOTE: this can sleep */ + vnic_mc_uninit(viport); kfree(viport->config); kfree(viport); } -void viport_set_link(struct viport * viport, u16 flags, u16 mtu) +void viport_set_link(struct viport *viport, u16 flags, u16 mtu) { unsigned long localflags; + int i; VIPORT_FUNCTION("viport_set_link()\n"); if (mtu > data_max_mtu(&viport->data)) { @@ -143,6 +146,40 @@ void viport_set_link(struct viport * viport, u16 flags, u16 mtu) viport->new_flags = flags; viport->new_mtu = mtu; viport->updates |= NEED_LINK_CONFIG; + if (viport->features_supported & VNIC_FEAT_INBOUND_IB_MC) { + if (((viport->mtu <= MCAST_MSG_SIZE) && (mtu > MCAST_MSG_SIZE)) || + ((viport->mtu > MCAST_MSG_SIZE) && (mtu <= MCAST_MSG_SIZE))) { + /* + * MTU value will enable/disable the multicast. In + * either case, need to send the CMD_CONFIG_ADDRESS2 to + * EVIC. Hence, setting the NEED_ADDRESS_CONFIG flag. + */ + viport->updates |= NEED_ADDRESS_CONFIG; + if (mtu <= MCAST_MSG_SIZE) { + VIPORT_PRINT("%s: MTU changed; " + "old:%d new:%d (threshold:%d);" + " MULTICAST will be enabled.\n", + config_viport_name(viport->config), + viport->mtu, mtu, + (int)MCAST_MSG_SIZE); + } else { + VIPORT_PRINT("%s: MTU changed; " + "old:%d new:%d (threshold:%d); " + "MULTICAST will be disabled.\n", + config_viport_name(viport->config), + viport->mtu, mtu, + (int)MCAST_MSG_SIZE); + } + /* When we resend these addresses, EVIC will + * send mgid=0 back in response. So no need to + * shutoff ib_multicast. + */ + for (i = MCAST_ADDR_START; i < viport->num_mac_addresses; i++) { + if (viport->mac_addresses[i].valid) + viport->mac_addresses[i].operation = VNIC_OP_SET_ENTRY; + } + } + } viport_kick(viport); } @@ -152,7 +189,7 @@ failure: viport_failure(viport); } -int viport_set_unicast(struct viport * viport, u8 * address) +int viport_set_unicast(struct viport *viport, u8 *address) { unsigned long flags; int ret = -1; @@ -177,8 +214,8 @@ out: return ret; } -int viport_set_multicast(struct viport * viport, - struct dev_mc_list * mc_list, int mc_count) +int viport_set_multicast(struct viport *viport, + struct dev_mc_list *mc_list, int mc_count) { u32 old_update_list; int i; @@ -195,33 +232,33 @@ int viport_set_multicast(struct viport * viport, if (mc_count > viport->num_mac_addresses - MCAST_ADDR_START) viport->updates |= NEED_LINK_CONFIG | MCAST_OVERFLOW; else { + if (mc_count == 0) { + ret = 0; + goto out; + } if (viport->updates & MCAST_OVERFLOW) { viport->updates &= ~MCAST_OVERFLOW; viport->updates |= NEED_LINK_CONFIG; } - /* brute force algorithm */ - for (i = MCAST_ADDR_START; - i < mc_count + MCAST_ADDR_START; - i++, mc_list = mc_list->next) { + for (i = MCAST_ADDR_START; i < mc_count + MCAST_ADDR_START; + i++, mc_list = mc_list->next) { if (viport->mac_addresses[i].valid && - !memcmp(viport->mac_addresses[i].address, - mc_list->dmi_addr, ETH_ALEN)) - continue; - memcpy(viport->mac_addresses[i].address, - mc_list->dmi_addr, ETH_ALEN); - viport->mac_addresses[i].valid = 1; - viport->mac_addresses[i].operation = - VNIC_OP_SET_ENTRY; - } - for (; i < viport->num_mac_addresses; i++) { - if (!viport->mac_addresses[i].valid) - continue; - viport->mac_addresses[i].valid = 0; - viport->mac_addresses[i].operation = - VNIC_OP_SET_ENTRY; - } - if (mc_count) - viport->updates |= NEED_ADDRESS_CONFIG; + !memcmp(viport->mac_addresses[i].address, + mc_list->dmi_addr, ETH_ALEN)) + continue; + memcpy(viport->mac_addresses[i].address, + mc_list->dmi_addr, ETH_ALEN); + viport->mac_addresses[i].valid = 1; + viport->mac_addresses[i].operation = VNIC_OP_SET_ENTRY; + } + for (; i < viport->num_mac_addresses; i++) { + if (!viport->mac_addresses[i].valid) + continue; + viport->mac_addresses[i].valid = 0; + viport->mac_addresses[i].operation = VNIC_OP_SET_ENTRY; + } + if (mc_count) + viport->updates |= NEED_ADDRESS_CONFIG; } if (viport->updates != old_update_list) @@ -232,20 +269,35 @@ out: return ret; } -void viport_get_stats(struct viport * viport, - struct net_device_stats * stats) +static inline void viport_disable_multicast(struct viport *viport) +{ + VIPORT_INFO("turned off IB_MULTICAST\n"); + viport->config->control_config.ib_multicast = 0; + viport->config->control_config.ib_config.conn_data.features_supported &= + __constant_cpu_to_be32((u32)~VNIC_FEAT_INBOUND_IB_MC); + viport->link_state = LINK_RESET; +} + +void viport_get_stats(struct viport *viport, + struct net_device_stats *stats) { unsigned long flags; VIPORT_FUNCTION("viport_get_stats()\n"); - if (jiffies > viport->last_stats_time + - viport->config->stats_interval) { + /* Reference count has been already incremented indicating + * that viport structure is being used, which prevents its + * freeing when this task sleeps + */ + if (time_after(jiffies, + (viport->last_stats_time + viport->config->stats_interval))) { + spin_lock_irqsave(&viport->lock, flags); viport->updates |= NEED_STATS; spin_unlock_irqrestore(&viport->lock, flags); viport_kick(viport); wait_event(viport->stats_queue, - !(viport->updates & NEED_STATS)); + !(viport->updates & NEED_STATS) + || (viport->disconnect == 1)); if (viport->stats.ethernet_status) vnic_link_up(viport->vnic, viport->parent); @@ -265,7 +317,7 @@ void viport_get_stats(struct viport * viport, stats->collisions = 0; /* EIOC doesn't track */ } -int viport_xmit_packet(struct viport * viport, struct sk_buff * skb) +int viport_xmit_packet(struct viport *viport, struct sk_buff *skb) { int status = -1; unsigned long flags; @@ -297,6 +349,7 @@ void viport_failure(struct viport *viport) unsigned long flags; VIPORT_FUNCTION("viport_failure()\n"); + vnic_stop_xmit(viport->vnic, viport->parent); spin_lock_irqsave(&viport_list_lock, flags); viport->errored = 1; if (list_empty(&viport->list_ptrs)) { @@ -339,7 +392,7 @@ static void viport_timer_stop(struct viport *viport) static int viport_init_mac_addresses(struct viport *viport) { - struct vnic_address_op *temp; + struct vnic_address_op2 *temp; unsigned long flags; int i; @@ -371,24 +424,53 @@ static int viport_init_mac_addresses(struct viport *viport) return 0; } +static inline void viport_match_mac_address(struct vnic *vnic, + struct viport *viport) +{ + if (vnic && vnic->current_path && + viport == vnic->current_path->viport && + vnic->mac_set && + memcmp(vnic->netdevice->dev_addr, viport->hw_mac_address, ETH_ALEN)) { + VIPORT_ERROR("*** ERROR MAC address mismatch; " + "current = %02x:%02x:%02x:%02x:%02x:%02x " + "From EVIC = %02x:%02x:%02x:%02x:%02x:%02x\n", + vnic->netdevice->dev_addr[0], + vnic->netdevice->dev_addr[1], + vnic->netdevice->dev_addr[2], + vnic->netdevice->dev_addr[3], + vnic->netdevice->dev_addr[4], + vnic->netdevice->dev_addr[5], + viport->hw_mac_address[0], + viport->hw_mac_address[1], + viport->hw_mac_address[2], + viport->hw_mac_address[3], + viport->hw_mac_address[4], + viport->hw_mac_address[5]); + } +} + static int viport_handle_init_states(struct viport *viport) { enum link_state old_state; do { - switch(old_state = viport->link_state) { + switch (old_state = viport->link_state) { case LINK_UNINITIALIZED: LINK_STATE("state LINK_UNINITIALIZED\n"); viport->updates = 0; - wake_up(&viport->stats_queue); - /* in case of going to - * uninitialized put this viport - * back on the serviceQ, delete - * it off again. - */ spin_lock_irq(&viport_list_lock); list_del_init(&viport->list_ptrs); spin_unlock_irq(&viport_list_lock); + if (atomic_read(&viport->reference_count)) { + wake_up(&viport->stats_queue); + wait_event(viport->reference_queue, + atomic_read(&viport->reference_count) == 0); + } + /* No more references to viport structure + * so it is safe to delete it by waking disconnect + * queue + */ + viport->disconnect = 0; wake_up(&viport->disconnect_queue); break; @@ -413,8 +495,7 @@ static int viport_handle_init_states(struct viport *viport) ib_dealloc_pd(viport->pd); viport->link_state = LINK_DISCONNECTED; - } - else + } else viport->link_state = LINK_INITIALIZEDATA; break; case LINK_INITIALIZEDATA: @@ -437,9 +518,10 @@ static int viport_handle_init_states(struct viport *viport) static int viport_handle_control_states(struct viport *viport) { enum link_state old_state; + struct vnic *vnic; do { - switch(old_state = viport->link_state) { + switch (old_state = viport->link_state) { case LINK_CONTROLCONNECT: if (vnic_ib_cm_connect(&viport->control.ib_conn)) viport->link_state = LINK_CLEANUPDATA; @@ -474,9 +556,18 @@ static int viport_handle_control_states(struct viport *viport) if (viport_init_mac_addresses(viport)) viport->link_state = LINK_RESETCONTROL; - else + else { viport->link_state = LINK_BEGINDATAPATH; + /* + * Ensure that the current path's MAC + * address matches the one returned by + * EVIC - we've had cases of mismatch + * which then caused havoc. + */ + vnic = viport->parent->parent; + viport_match_mac_address(vnic, viport); + } } if (viport->errored) { @@ -487,7 +578,7 @@ static int viport_handle_control_states(struct viport *viport) default: return -1; } - } while(viport->link_state != old_state); + } while (viport->link_state != old_state); return 0; } @@ -497,7 +588,7 @@ static int viport_handle_data_states(struct viport *viport) enum link_state old_state; do { - switch(old_state = viport->link_state) { + switch (old_state = viport->link_state) { case LINK_BEGINDATAPATH: LINK_STATE("state LINK_BEGINDATAPATH\n"); viport->link_state = LINK_CONFIGDATAPATHREQ; @@ -560,7 +651,7 @@ static int viport_handle_data_states(struct viport *viport) default: return -1; } - } while(viport->link_state != old_state); + } while (viport->link_state != old_state); return 0; } @@ -570,7 +661,7 @@ static int viport_handle_xchgpool_states(struct viport *viport) enum link_state old_state; do { - switch(old_state = viport->link_state) { + switch (old_state = viport->link_state) { case LINK_XCHGPOOLREQ: LINK_STATE("state LINK_XCHGPOOLREQ\n"); if (control_exchange_pools_req(&viport->control, @@ -607,6 +698,17 @@ static int viport_handle_xchgpool_states(struct viport *viport) data_connected(&viport->data); vnic_connected(viport->parent->parent, viport->parent); + if (viport->features_supported & VNIC_FEAT_INBOUND_IB_MC) { + printk(KERN_INFO PFX "%s: Supports Inbound IB " + "Multicast\n", + config_viport_name(viport->config)); + if (mc_data_init(&viport->mc_data, viport, + &viport->config->data_config, + viport->pd)) { + viport_disable_multicast(viport); + break; + } + } spin_lock_irq(&viport->lock); viport->mtu = 1500; viport->flags = 0; @@ -615,13 +717,13 @@ static int viport_handle_xchgpool_states(struct viport *viport) viport->updates |= NEED_LINK_CONFIG; spin_unlock_irq(&viport->lock); viport->link_state = LINK_IDLE; - retry_duration = 0; - total_retry_duration = 0; + viport->retry_duration = 0; + viport->total_retry_duration = 0; break; default: return -1; } - } while(viport->link_state != old_state); + } while (viport->link_state != old_state); return 0; } @@ -629,9 +731,10 @@ static int viport_handle_xchgpool_states(struct viport *viport) static int viport_handle_idle_states(struct viport *viport) { enum link_state old_state; + int handle_mc_join_compl, handle_mc_join; do { - switch(old_state = viport->link_state) { + switch (old_state = viport->link_state) { case LINK_IDLE: LINK_STATE("state LINK_IDLE\n"); if (viport->config->hb_interval) @@ -650,6 +753,15 @@ static int viport_handle_idle_states(struct viport *viport) } spin_lock_irq(&viport->lock); + handle_mc_join = (viport->updates & NEED_MCAST_JOIN); + handle_mc_join_compl = + (viport->updates & NEED_MCAST_COMPLETION); + /* + * Turn off both flags, the handler functions will + * rearm them if necessary. + */ + viport->updates &= ~(NEED_MCAST_JOIN | NEED_MCAST_COMPLETION); + if (viport->updates & NEED_LINK_CONFIG) { viport_timer_stop(viport); viport->link_state = LINK_CONFIGLINKREQ; @@ -665,11 +777,18 @@ static int viport_handle_idle_states(struct viport *viport) LINK_HEARTBEATREQ; } spin_unlock_irq(&viport->lock); + if (handle_mc_join) { + if (vnic_mc_join(viport)) + viport_disable_multicast(viport); + } + if (handle_mc_join_compl) + vnic_mc_join_handle_completion(viport); + break; default: return -1; } - } while(viport->link_state != old_state); + } while (viport->link_state != old_state); return 0; } @@ -680,7 +799,7 @@ static int viport_handle_config_states(struct viport *viport) int res; do { - switch(old_state = viport->link_state) { + switch (old_state = viport->link_state) { case LINK_CONFIGLINKREQ: LINK_STATE("state LINK_CONFIGLINKREQ\n"); spin_lock_irq(&viport->lock); @@ -744,7 +863,7 @@ static int viport_handle_config_states(struct viport *viport) default: return -1; } - } while(viport->link_state != old_state); + } while (viport->link_state != old_state); return 0; } @@ -754,7 +873,7 @@ static int viport_handle_stat_states(struct viport *viport) enum link_state old_state; do { - switch(old_state = viport->link_state) { + switch (old_state = viport->link_state) { case LINK_REPORTSTATREQ: LINK_STATE("state LINK_REPORTSTATREQ\n"); if (control_report_statistics_req(&viport->control)) @@ -785,7 +904,7 @@ static int viport_handle_stat_states(struct viport *viport) default: return -1; } - } while(viport->link_state != old_state); + } while (viport->link_state != old_state); return 0; } @@ -795,7 +914,7 @@ static int viport_handle_heartbeat_states(struct viport *viport) enum link_state old_state; do { - switch(old_state = viport->link_state) { + switch (old_state = viport->link_state) { case LINK_HEARTBEATREQ: LINK_STATE("state LINK_HEARTBEATREQ\n"); if (control_heartbeat_req(&viport->control, @@ -819,7 +938,7 @@ static int viport_handle_heartbeat_states(struct viport *viport) default: return -1; } - } while(viport->link_state != old_state); + } while (viport->link_state != old_state); return 0; } @@ -827,19 +946,37 @@ static int viport_handle_heartbeat_states(struct viport *viport) static int viport_handle_reset_states(struct viport *viport) { enum link_state old_state; + int handle_mc_join_compl = 0, handle_mc_join = 0; do { - switch(old_state = viport->link_state) { + switch (old_state = viport->link_state) { case LINK_RESET: LINK_STATE("state LINK_RESET\n"); viport->errored = 0; spin_lock_irq(&viport->lock); viport->state = VIPORT_DISCONNECTED; + /* + * Turn off both flags, the handler functions will + * rearm them if necessary + */ + viport->updates &= ~(NEED_MCAST_JOIN | NEED_MCAST_COMPLETION); + spin_unlock_irq(&viport->lock); vnic_link_down(viport->vnic, viport->parent); printk(KERN_INFO PFX "%s: connection lost\n", config_viport_name(viport->config)); + if (handle_mc_join) { + if (vnic_mc_join(viport)) + viport_disable_multicast(viport); + } + if (handle_mc_join_compl) + vnic_mc_join_handle_completion(viport); + if (viport->features_supported & VNIC_FEAT_INBOUND_IB_MC) { + vnic_mc_leave(viport); + vnic_mc_data_cleanup(&viport->mc_data); + } + if (control_reset_req(&viport->control)) viport->link_state = LINK_DATADISCONNECT; else @@ -879,7 +1016,7 @@ static int viport_handle_reset_states(struct viport *viport) default: return -1; } - } while(viport->link_state != old_state); + } while (viport->link_state != old_state); return 0; } @@ -889,7 +1026,7 @@ static int viport_handle_disconn_states(struct viport *viport) enum link_state old_state; do { - switch(old_state = viport->link_state) { + switch (old_state = viport->link_state) { case LINK_DATADISCONNECT: LINK_STATE("state LINK_DATADISCONNECT\n"); data_disconnect(&viport->data); @@ -907,10 +1044,8 @@ static int viport_handle_disconn_states(struct viport *viport) case LINK_CLEANUPCONTROL: LINK_STATE("state LINK_CLEANUPCONTROL\n"); spin_lock_irq(&viport->lock); - if (viport->mac_addresses) { - kfree(viport->mac_addresses); - viport->mac_addresses = NULL; - } + kfree(viport->mac_addresses); + viport->mac_addresses = NULL; spin_unlock_irq(&viport->lock); control_cleanup(&viport->control); ib_dealloc_pd(viport->pd); @@ -928,31 +1063,31 @@ static int viport_handle_disconn_states(struct viport *viport) * Check if the initial retry interval has crossed * 20 seconds. * The retry interval is initially 5 seconds which - * is incremented by 5. Once it is 20 the interval - * is fixed to 20 seconds till 10 minutes, + * is incremented by 5. Once it is 20 the interval + * is fixed to 20 seconds till 10 minutes, * after which retrying is stopped */ - if (retry_duration < MAX_RETRY_INTERVAL) - retry_duration += RETRY_INCREMENT; + if (viport->retry_duration < MAX_RETRY_INTERVAL) + viport->retry_duration += + RETRY_INCREMENT; - total_retry_duration += retry_duration; + viport->total_retry_duration += + viport->retry_duration; - if (total_retry_duration >= + if (viport->total_retry_duration >= MAX_CONNECT_RETRY_TIMEOUT) { viport->link_state = LINK_UNINITIALIZED; printk("Timed out after retrying" " for retry_duration %d msecs\n" - , retry_duration); - } - else { + , viport->total_retry_duration); + } else { viport->connect = DELAY; viport->link_state = LINK_RETRYWAIT; } viport_timer(viport, - msecs_to_jiffies(retry_duration)); - } - else { - u32 duration = 500 + ((net_random()) & 0x1FF); + msecs_to_jiffies(viport->retry_duration)); + } else { + u32 duration = 5000 + ((net_random()) & 0x1FF); if (!viport->parent->is_primary_path) duration += 0x1ff; viport_timer(viport, @@ -970,9 +1105,8 @@ static int viport_handle_disconn_states(struct viport *viport) viport_timer_stop(viport); viport->link_state = LINK_UNINITIALIZED; } else if (viport->connect == DELAY) { - if (!viport->timer_active) { + if (!viport->timer_active) viport->link_state = LINK_INITIALIZE; - } } else if (viport->connect == NOW) { viport_timer_stop(viport); viport->link_state = LINK_INITIALIZE; @@ -982,7 +1116,7 @@ static int viport_handle_disconn_states(struct viport *viport) viport->stats.ethernet_status = 0; viport->updates = 0; wake_up(&viport->stats_queue); - if (viport->disconnect !=0) { + if (viport->disconnect != 0) { viport_timer_stop(viport); viport->link_state = LINK_UNINITIALIZED; } @@ -991,7 +1125,7 @@ static int viport_handle_disconn_states(struct viport *viport) default: return -1; } - } while(viport->link_state != old_state); + } while (viport->link_state != old_state); return 0; } @@ -1002,7 +1136,6 @@ static int viport_statemachine(void *context) enum link_state old_link_state; VIPORT_FUNCTION("viport_statemachine()\n"); - daemonize("vnic_viport"); while (!viport_thread_end || !list_empty(&viport_list)) { wait_event_interruptible(viport_queue, !list_empty(&viport_list) @@ -1056,11 +1189,14 @@ int viport_start(void) { VIPORT_FUNCTION("viport_start()\n"); - viport_thread = kernel_thread(viport_statemachine, NULL, 0); - if (viport_thread < 0) { + spin_lock_init(&viport_list_lock); + viport_thread = kthread_run(viport_statemachine, NULL, + "qlgc_vnic_viport_s_m"); + if (IS_ERR(viport_thread)) { printk(KERN_WARNING PFX "Could not create viport_thread;" - " error %d\n", viport_thread); - return viport_thread; + " error %d\n", (int) PTR_ERR(viport_thread)); + viport_thread = NULL; + return 1; } return 0; @@ -1069,10 +1205,10 @@ int viport_start(void) void viport_cleanup(void) { VIPORT_FUNCTION("viport_cleanup()\n"); - if (viport_thread > 0) { + if (viport_thread) { viport_thread_end = 1; wake_up(&viport_queue); wait_for_completion(&viport_thread_exit); - viport_thread = -1; + viport_thread = NULL; } } diff --git a/drivers/infiniband/ulp/qlgc_vnic/vnic_viport.h b/drivers/infiniband/ulp/qlgc_vnic/vnic_viport.h index b144fac..70cdc9f 100644 --- a/drivers/infiniband/ulp/qlgc_vnic/vnic_viport.h +++ b/drivers/infiniband/ulp/qlgc_vnic/vnic_viport.h @@ -35,6 +35,7 @@ #include "vnic_control.h" #include "vnic_data.h" +#include "vnic_multicast.h" enum viport_state { VIPORT_DISCONNECTED = 0, @@ -90,10 +91,12 @@ enum { #define current_mac_address mac_addresses[UNICAST_ADDR].address enum { - NEED_STATS = 0x00000001, - NEED_ADDRESS_CONFIG = 0x00000002, - NEED_LINK_CONFIG = 0x00000004, - MCAST_OVERFLOW = 0x00000008 + NEED_STATS = 0x00000001, + NEED_ADDRESS_CONFIG = 0x00000002, + NEED_LINK_CONFIG = 0x00000004, + MCAST_OVERFLOW = 0x00000008, + NEED_MCAST_COMPLETION = 0x00000010, + NEED_MCAST_JOIN = 0x00000020 }; struct viport { @@ -109,12 +112,12 @@ struct viport { enum link_state link_state; struct vnic_cmd_report_stats_rsp stats; wait_queue_head_t stats_queue; - u32 last_stats_time; + unsigned long last_stats_time; u32 features_supported; u8 hw_mac_address[ETH_ALEN]; u16 default_vlan; u16 num_mac_addresses; - struct vnic_address_op *mac_addresses; + struct vnic_address_op2 *mac_addresses; u32 updates; u16 flags; u16 new_flags; @@ -127,6 +130,12 @@ struct viport { wait_queue_head_t disconnect_queue; int timer_active; struct timer_list timer; + u32 retry_duration; + u32 total_retry_duration; + atomic_t reference_count; + wait_queue_head_t reference_queue; + struct mc_info mc_info; + struct mc_data mc_data; }; int viport_start(void); @@ -136,7 +145,6 @@ struct viport *viport_allocate(struct viport_config *config); void viport_free(struct viport *viport); void viport_connect(struct viport *viport, int delay); -void viport_disconnect(struct viport *viport); void viport_set_link(struct viport *viport, u16 flags, u16 mtu); void viport_get_stats(struct viport *viport, @@ -146,7 +154,7 @@ void viport_kick(struct viport *viport); void viport_failure(struct viport *viport); -int viport_set_unicast(struct viport *viport, u8 * address); +int viport_set_unicast(struct viport *viport, u8 *address); int viport_set_multicast(struct viport *viport, struct dev_mc_list *mc_list, int mc_count);