svn commit: r289550 - head/sys/dev/vnic
Zbigniew Bodek
zbb at FreeBSD.org
Sun Oct 18 21:39:16 UTC 2015
Author: zbb
Date: Sun Oct 18 21:39:15 2015
New Revision: 289550
URL: https://svnweb.freebsd.org/changeset/base/289550
Log:
Raw import of ThunderX VNIC networking driver components
This import brings following components of the Linux driver:
- Thunder BGX (programmable MAC)
- Physical Function driver
- Virtual Function driver
- Headers
Revision: 1.0
Obtained from: Cavium
License information: Cavium provided these files under BSD license
Added:
head/sys/dev/vnic/
head/sys/dev/vnic/nic.h (contents, props changed)
head/sys/dev/vnic/nic_main.c (contents, props changed)
head/sys/dev/vnic/nic_reg.h (contents, props changed)
head/sys/dev/vnic/nicvf_main.c (contents, props changed)
head/sys/dev/vnic/nicvf_queues.c (contents, props changed)
head/sys/dev/vnic/nicvf_queues.h (contents, props changed)
head/sys/dev/vnic/q_struct.h (contents, props changed)
head/sys/dev/vnic/thunder_bgx.c (contents, props changed)
head/sys/dev/vnic/thunder_bgx.h (contents, props changed)
Added: head/sys/dev/vnic/nic.h
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/sys/dev/vnic/nic.h Sun Oct 18 21:39:15 2015 (r289550)
@@ -0,0 +1,539 @@
+/*
+ * Copyright (C) 2015 Cavium Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef NIC_H
+#define NIC_H
+
+#include <linux/netdevice.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include "thunder_bgx.h"
+
+/* PCI device IDs */
+#define PCI_DEVICE_ID_THUNDER_NIC_PF 0xA01E
+#define PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF 0x0011
+#define PCI_DEVICE_ID_THUNDER_NIC_VF 0xA034
+#define PCI_DEVICE_ID_THUNDER_BGX 0xA026
+
+/* PCI BAR nos */
+#define PCI_CFG_REG_BAR_NUM 0
+#define PCI_MSIX_REG_BAR_NUM 4
+
+/* NIC SRIOV VF count */
+#define MAX_NUM_VFS_SUPPORTED 128
+#define DEFAULT_NUM_VF_ENABLED 8
+
+#define NIC_TNS_BYPASS_MODE 0
+#define NIC_TNS_MODE 1
+
+/* NIC priv flags */
+#define NIC_SRIOV_ENABLED BIT(0)
+#define NIC_TNS_ENABLED BIT(1)
+
+/* VNIC HW optimiation features */
+#define VNIC_RSS_SUPPORT
+#define VNIC_MULTI_QSET_SUPPORT
+
+/* Min/Max packet size */
+#define NIC_HW_MIN_FRS 64
+#define NIC_HW_MAX_FRS 9200 /* 9216 max packet including FCS */
+
+/* Max pkinds */
+#define NIC_MAX_PKIND 16
+
+/* Rx Channels */
+/* Receive channel configuration in TNS bypass mode
+ * Below is configuration in TNS bypass mode
+ * BGX0-LMAC0-CHAN0 - VNIC CHAN0
+ * BGX0-LMAC1-CHAN0 - VNIC CHAN16
+ * ...
+ * BGX1-LMAC0-CHAN0 - VNIC CHAN128
+ * ...
+ * BGX1-LMAC3-CHAN0 - VNIC CHAN174
+ */
+#define NIC_INTF_COUNT 2 /* Interfaces btw VNIC and TNS/BGX */
+#define NIC_CHANS_PER_INF 128
+#define NIC_MAX_CHANS (NIC_INTF_COUNT * NIC_CHANS_PER_INF)
+#define NIC_CPI_COUNT 2048 /* No of channel parse indices */
+
+/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */
+#define NIC_MAX_BGX MAX_BGX_PER_CN88XX
+#define NIC_CPI_PER_BGX (NIC_CPI_COUNT / NIC_MAX_BGX)
+#define NIC_MAX_CPI_PER_LMAC 64 /* Max when CPI_ALG is IP diffserv */
+#define NIC_RSSI_PER_BGX (NIC_RSSI_COUNT / NIC_MAX_BGX)
+
+/* Tx scheduling */
+#define NIC_MAX_TL4 1024
+#define NIC_MAX_TL4_SHAPERS 256 /* 1 shaper for 4 TL4s */
+#define NIC_MAX_TL3 256
+#define NIC_MAX_TL3_SHAPERS 64 /* 1 shaper for 4 TL3s */
+#define NIC_MAX_TL2 64
+#define NIC_MAX_TL2_SHAPERS 2 /* 1 shaper for 32 TL2s */
+#define NIC_MAX_TL1 2
+
+/* TNS bypass mode */
+#define NIC_TL2_PER_BGX 32
+#define NIC_TL4_PER_BGX (NIC_MAX_TL4 / NIC_MAX_BGX)
+#define NIC_TL4_PER_LMAC (NIC_MAX_TL4 / NIC_CHANS_PER_INF)
+
+/* NIC VF Interrupts */
+#define NICVF_INTR_CQ 0
+#define NICVF_INTR_SQ 1
+#define NICVF_INTR_RBDR 2
+#define NICVF_INTR_PKT_DROP 3
+#define NICVF_INTR_TCP_TIMER 4
+#define NICVF_INTR_MBOX 5
+#define NICVF_INTR_QS_ERR 6
+
+#define NICVF_INTR_CQ_SHIFT 0
+#define NICVF_INTR_SQ_SHIFT 8
+#define NICVF_INTR_RBDR_SHIFT 16
+#define NICVF_INTR_PKT_DROP_SHIFT 20
+#define NICVF_INTR_TCP_TIMER_SHIFT 21
+#define NICVF_INTR_MBOX_SHIFT 22
+#define NICVF_INTR_QS_ERR_SHIFT 23
+
+#define NICVF_INTR_CQ_MASK (0xFF << NICVF_INTR_CQ_SHIFT)
+#define NICVF_INTR_SQ_MASK (0xFF << NICVF_INTR_SQ_SHIFT)
+#define NICVF_INTR_RBDR_MASK (0x03 << NICVF_INTR_RBDR_SHIFT)
+#define NICVF_INTR_PKT_DROP_MASK BIT(NICVF_INTR_PKT_DROP_SHIFT)
+#define NICVF_INTR_TCP_TIMER_MASK BIT(NICVF_INTR_TCP_TIMER_SHIFT)
+#define NICVF_INTR_MBOX_MASK BIT(NICVF_INTR_MBOX_SHIFT)
+#define NICVF_INTR_QS_ERR_MASK BIT(NICVF_INTR_QS_ERR_SHIFT)
+
+/* MSI-X interrupts */
+#define NIC_PF_MSIX_VECTORS 10
+#define NIC_VF_MSIX_VECTORS 20
+
+#define NIC_PF_INTR_ID_ECC0_SBE 0
+#define NIC_PF_INTR_ID_ECC0_DBE 1
+#define NIC_PF_INTR_ID_ECC1_SBE 2
+#define NIC_PF_INTR_ID_ECC1_DBE 3
+#define NIC_PF_INTR_ID_ECC2_SBE 4
+#define NIC_PF_INTR_ID_ECC2_DBE 5
+#define NIC_PF_INTR_ID_ECC3_SBE 6
+#define NIC_PF_INTR_ID_ECC3_DBE 7
+#define NIC_PF_INTR_ID_MBOX0 8
+#define NIC_PF_INTR_ID_MBOX1 9
+
+/* Global timer for CQ timer thresh interrupts
+ * Calculated for SCLK of 700Mhz
+ * value written should be a 1/16th of what is expected
+ *
+ * 1 tick per 0.05usec = value of 2.2
+ * This 10% would be covered in CQ timer thresh value
+ */
+#define NICPF_CLK_PER_INT_TICK 2
+
+/* Time to wait before we decide that a SQ is stuck.
+ *
+ * Since both pkt rx and tx notifications are done with same CQ,
+ * when packets are being received at very high rate (eg: L2 forwarding)
+ * then freeing transmitted skbs will be delayed and watchdog
+ * will kick in, resetting interface. Hence keeping this value high.
+ */
+#define NICVF_TX_TIMEOUT (50 * HZ)
+
+struct nicvf_cq_poll {
+ struct nicvf *nicvf;
+ u8 cq_idx; /* Completion queue index */
+ struct napi_struct napi;
+};
+
+#define NIC_RSSI_COUNT 4096 /* Total no of RSS indices */
+#define NIC_MAX_RSS_HASH_BITS 8
+#define NIC_MAX_RSS_IDR_TBL_SIZE (1 << NIC_MAX_RSS_HASH_BITS)
+#define RSS_HASH_KEY_SIZE 5 /* 320 bit key */
+
+#ifdef VNIC_RSS_SUPPORT
+struct nicvf_rss_info {
+ bool enable;
+#define RSS_L2_EXTENDED_HASH_ENA BIT(0)
+#define RSS_IP_HASH_ENA BIT(1)
+#define RSS_TCP_HASH_ENA BIT(2)
+#define RSS_TCP_SYN_DIS BIT(3)
+#define RSS_UDP_HASH_ENA BIT(4)
+#define RSS_L4_EXTENDED_HASH_ENA BIT(5)
+#define RSS_ROCE_ENA BIT(6)
+#define RSS_L3_BI_DIRECTION_ENA BIT(7)
+#define RSS_L4_BI_DIRECTION_ENA BIT(8)
+ u64 cfg;
+ u8 hash_bits;
+ u16 rss_size;
+ u8 ind_tbl[NIC_MAX_RSS_IDR_TBL_SIZE];
+ u64 key[RSS_HASH_KEY_SIZE];
+} ____cacheline_aligned_in_smp;
+#endif
+
+enum rx_stats_reg_offset {
+ RX_OCTS = 0x0,
+ RX_UCAST = 0x1,
+ RX_BCAST = 0x2,
+ RX_MCAST = 0x3,
+ RX_RED = 0x4,
+ RX_RED_OCTS = 0x5,
+ RX_ORUN = 0x6,
+ RX_ORUN_OCTS = 0x7,
+ RX_FCS = 0x8,
+ RX_L2ERR = 0x9,
+ RX_DRP_BCAST = 0xa,
+ RX_DRP_MCAST = 0xb,
+ RX_DRP_L3BCAST = 0xc,
+ RX_DRP_L3MCAST = 0xd,
+ RX_STATS_ENUM_LAST,
+};
+
+enum tx_stats_reg_offset {
+ TX_OCTS = 0x0,
+ TX_UCAST = 0x1,
+ TX_BCAST = 0x2,
+ TX_MCAST = 0x3,
+ TX_DROP = 0x4,
+ TX_STATS_ENUM_LAST,
+};
+
+struct nicvf_hw_stats {
+ u64 rx_bytes;
+ u64 rx_ucast_frames;
+ u64 rx_bcast_frames;
+ u64 rx_mcast_frames;
+ u64 rx_fcs_errors;
+ u64 rx_l2_errors;
+ u64 rx_drop_red;
+ u64 rx_drop_red_bytes;
+ u64 rx_drop_overrun;
+ u64 rx_drop_overrun_bytes;
+ u64 rx_drop_bcast;
+ u64 rx_drop_mcast;
+ u64 rx_drop_l3_bcast;
+ u64 rx_drop_l3_mcast;
+ u64 rx_bgx_truncated_pkts;
+ u64 rx_jabber_errs;
+ u64 rx_fcs_errs;
+ u64 rx_bgx_errs;
+ u64 rx_prel2_errs;
+ u64 rx_l2_hdr_malformed;
+ u64 rx_oversize;
+ u64 rx_undersize;
+ u64 rx_l2_len_mismatch;
+ u64 rx_l2_pclp;
+ u64 rx_ip_ver_errs;
+ u64 rx_ip_csum_errs;
+ u64 rx_ip_hdr_malformed;
+ u64 rx_ip_payload_malformed;
+ u64 rx_ip_ttl_errs;
+ u64 rx_l3_pclp;
+ u64 rx_l4_malformed;
+ u64 rx_l4_csum_errs;
+ u64 rx_udp_len_errs;
+ u64 rx_l4_port_errs;
+ u64 rx_tcp_flag_errs;
+ u64 rx_tcp_offset_errs;
+ u64 rx_l4_pclp;
+ u64 rx_truncated_pkts;
+
+ u64 tx_bytes_ok;
+ u64 tx_ucast_frames_ok;
+ u64 tx_bcast_frames_ok;
+ u64 tx_mcast_frames_ok;
+ u64 tx_drops;
+};
+
+struct nicvf_drv_stats {
+ /* Rx */
+ u64 rx_frames_ok;
+ u64 rx_frames_64;
+ u64 rx_frames_127;
+ u64 rx_frames_255;
+ u64 rx_frames_511;
+ u64 rx_frames_1023;
+ u64 rx_frames_1518;
+ u64 rx_frames_jumbo;
+ u64 rx_drops;
+
+ /* Tx */
+ u64 tx_frames_ok;
+ u64 tx_drops;
+ u64 tx_tso;
+ u64 txq_stop;
+ u64 txq_wake;
+};
+
+struct nicvf {
+ struct nicvf *pnicvf;
+ struct net_device *netdev;
+ struct pci_dev *pdev;
+ u8 vf_id;
+ u8 node;
+ bool tns_mode:1;
+ bool sqs_mode:1;
+ bool loopback_supported:1;
+ u16 mtu;
+ struct queue_set *qs;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+#define MAX_SQS_PER_VF_SINGLE_NODE 5
+#define MAX_SQS_PER_VF 11
+ u8 sqs_id;
+ u8 sqs_count; /* Secondary Qset count */
+ struct nicvf *snicvf[MAX_SQS_PER_VF];
+#endif
+ u8 rx_queues;
+ u8 tx_queues;
+ u8 max_queues;
+ void __iomem *reg_base;
+ bool link_up;
+ u8 duplex;
+ u32 speed;
+ struct page *rb_page;
+ u32 rb_page_offset;
+ bool rb_alloc_fail;
+ bool rb_work_scheduled;
+ struct delayed_work rbdr_work;
+ struct tasklet_struct rbdr_task;
+ struct tasklet_struct qs_err_task;
+ struct tasklet_struct cq_task;
+ struct nicvf_cq_poll *napi[8];
+#ifdef VNIC_RSS_SUPPORT
+ struct nicvf_rss_info rss_info;
+#endif
+ u8 cpi_alg;
+ /* Interrupt coalescing settings */
+ u32 cq_coalesce_usecs;
+
+ u32 msg_enable;
+ struct nicvf_hw_stats hw_stats;
+ struct nicvf_drv_stats drv_stats;
+ struct bgx_stats bgx_stats;
+ struct work_struct reset_task;
+
+ /* MSI-X */
+ bool msix_enabled;
+ u8 num_vec;
+ struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS];
+ char irq_name[NIC_VF_MSIX_VECTORS][20];
+ bool irq_allocated[NIC_VF_MSIX_VECTORS];
+
+ /* VF <-> PF mailbox communication */
+ bool pf_acked;
+ bool pf_nacked;
+} ____cacheline_aligned_in_smp;
+
+/* PF <--> VF Mailbox communication
+ * Eight 64bit registers are shared between PF and VF.
+ * Separate set for each VF.
+ * Writing '1' into last register mbx7 means end of message.
+ */
+
+/* PF <--> VF mailbox communication */
+#define NIC_PF_VF_MAILBOX_SIZE 2
+#define NIC_MBOX_MSG_TIMEOUT 2000 /* ms */
+
+/* Mailbox message types */
+#define NIC_MBOX_MSG_READY 0x01 /* Is PF ready to rcv msgs */
+#define NIC_MBOX_MSG_ACK 0x02 /* ACK the message received */
+#define NIC_MBOX_MSG_NACK 0x03 /* NACK the message received */
+#define NIC_MBOX_MSG_QS_CFG 0x04 /* Configure Qset */
+#define NIC_MBOX_MSG_RQ_CFG 0x05 /* Configure receive queue */
+#define NIC_MBOX_MSG_SQ_CFG 0x06 /* Configure Send queue */
+#define NIC_MBOX_MSG_RQ_DROP_CFG 0x07 /* Configure receive queue */
+#define NIC_MBOX_MSG_SET_MAC 0x08 /* Add MAC ID to DMAC filter */
+#define NIC_MBOX_MSG_SET_MAX_FRS 0x09 /* Set max frame size */
+#define NIC_MBOX_MSG_CPI_CFG 0x0A /* Config CPI, RSSI */
+#define NIC_MBOX_MSG_RSS_SIZE 0x0B /* Get RSS indir_tbl size */
+#define NIC_MBOX_MSG_RSS_CFG 0x0C /* Config RSS table */
+#define NIC_MBOX_MSG_RSS_CFG_CONT 0x0D /* RSS config continuation */
+#define NIC_MBOX_MSG_RQ_BP_CFG 0x0E /* RQ backpressure config */
+#define NIC_MBOX_MSG_RQ_SW_SYNC 0x0F /* Flush inflight pkts to RQ */
+#define NIC_MBOX_MSG_BGX_STATS 0x10 /* Get stats from BGX */
+#define NIC_MBOX_MSG_BGX_LINK_CHANGE 0x11 /* BGX:LMAC link status */
+#define NIC_MBOX_MSG_ALLOC_SQS 0x12 /* Allocate secondary Qset */
+#define NIC_MBOX_MSG_NICVF_PTR 0x13 /* Send nicvf ptr to PF */
+#define NIC_MBOX_MSG_PNICVF_PTR 0x14 /* Get primary qset nicvf ptr */
+#define NIC_MBOX_MSG_SNICVF_PTR 0x15 /* Send sqet nicvf ptr to PVF */
+#define NIC_MBOX_MSG_LOOPBACK 0x16 /* Set interface in loopback */
+#define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */
+#define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */
+
+struct nic_cfg_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 node_id;
+ bool tns_mode:1;
+ bool sqs_mode:1;
+ bool loopback_supported:1;
+ u8 mac_addr[ETH_ALEN];
+};
+
+/* Qset configuration */
+struct qs_cfg_msg {
+ u8 msg;
+ u8 num;
+ u8 sqs_count;
+ u64 cfg;
+};
+
+/* Receive queue configuration */
+struct rq_cfg_msg {
+ u8 msg;
+ u8 qs_num;
+ u8 rq_num;
+ u64 cfg;
+};
+
+/* Send queue configuration */
+struct sq_cfg_msg {
+ u8 msg;
+ u8 qs_num;
+ u8 sq_num;
+ bool sqs_mode;
+ u64 cfg;
+};
+
+/* Set VF's MAC address */
+struct set_mac_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 mac_addr[ETH_ALEN];
+};
+
+/* Set Maximum frame size */
+struct set_frs_msg {
+ u8 msg;
+ u8 vf_id;
+ u16 max_frs;
+};
+
+/* Set CPI algorithm type */
+struct cpi_cfg_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 rq_cnt;
+ u8 cpi_alg;
+};
+
+/* Get RSS table size */
+struct rss_sz_msg {
+ u8 msg;
+ u8 vf_id;
+ u16 ind_tbl_size;
+};
+
+/* Set RSS configuration */
+struct rss_cfg_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 hash_bits;
+ u8 tbl_len;
+ u8 tbl_offset;
+#define RSS_IND_TBL_LEN_PER_MBX_MSG 8
+ u8 ind_tbl[RSS_IND_TBL_LEN_PER_MBX_MSG];
+};
+
+struct bgx_stats_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 rx;
+ u8 idx;
+ u64 stats;
+};
+
+/* Physical interface link status */
+struct bgx_link_status {
+ u8 msg;
+ u8 link_up;
+ u8 duplex;
+ u32 speed;
+};
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+/* Get Extra Qset IDs */
+struct sqs_alloc {
+ u8 msg;
+ u8 vf_id;
+ u8 qs_count;
+};
+
+struct nicvf_ptr {
+ u8 msg;
+ u8 vf_id;
+ bool sqs_mode;
+ u8 sqs_id;
+ u64 nicvf;
+};
+#endif
+
+/* Set interface in loopback mode */
+struct set_loopback {
+ u8 msg;
+ u8 vf_id;
+ bool enable;
+};
+
+/* 128 bit shared memory between PF and each VF */
+union nic_mbx {
+ struct { u8 msg; } msg;
+ struct nic_cfg_msg nic_cfg;
+ struct qs_cfg_msg qs;
+ struct rq_cfg_msg rq;
+ struct sq_cfg_msg sq;
+ struct set_mac_msg mac;
+ struct set_frs_msg frs;
+ struct cpi_cfg_msg cpi_cfg;
+ struct rss_sz_msg rss_size;
+ struct rss_cfg_msg rss_cfg;
+ struct bgx_stats_msg bgx_stats;
+ struct bgx_link_status link_status;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+ struct sqs_alloc sqs_alloc;
+ struct nicvf_ptr nicvf;
+#endif
+ struct set_loopback lbk;
+};
+
+#define NIC_NODE_ID_MASK 0x03
+#define NIC_NODE_ID_SHIFT 44
+
+static inline int nic_get_node_id(struct pci_dev *pdev)
+{
+ u64 addr = pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM);
+ return ((addr >> NIC_NODE_ID_SHIFT) & NIC_NODE_ID_MASK);
+}
+
+int nicvf_set_real_num_queues(struct net_device *netdev,
+ int tx_queues, int rx_queues);
+int nicvf_open(struct net_device *netdev);
+int nicvf_stop(struct net_device *netdev);
+int nicvf_send_msg_to_pf(struct nicvf *vf, union nic_mbx *mbx);
+void nicvf_config_rss(struct nicvf *nic);
+void nicvf_set_rss_key(struct nicvf *nic);
+void nicvf_set_ethtool_ops(struct net_device *netdev);
+void nicvf_update_stats(struct nicvf *nic);
+void nicvf_update_lmac_stats(struct nicvf *nic);
+
+#endif /* NIC_H */
Added: head/sys/dev/vnic/nic_main.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/sys/dev/vnic/nic_main.c Sun Oct 18 21:39:15 2015 (r289550)
@@ -0,0 +1,1192 @@
+/*
+ * Copyright (C) 2015 Cavium Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/etherdevice.h>
+#include <linux/of.h>
+
+#include "nic_reg.h"
+#include "nic.h"
+#include "q_struct.h"
+#include "thunder_bgx.h"
+
+#define DRV_NAME "thunder-nic"
+#define DRV_VERSION "1.0"
+
+struct nicpf {
+ struct pci_dev *pdev;
+ u8 rev_id;
+ u8 node;
+ unsigned int flags;
+ u8 num_vf_en; /* No of VF enabled */
+ bool vf_enabled[MAX_NUM_VFS_SUPPORTED];
+ void __iomem *reg_base; /* Register start address */
+#ifdef VNIC_MULTI_QSET_SUPPORT
+ u8 num_sqs_en; /* Secondary qsets enabled */
+ u64 nicvf[MAX_NUM_VFS_SUPPORTED];
+ u8 vf_sqs[MAX_NUM_VFS_SUPPORTED][MAX_SQS_PER_VF];
+ u8 pqs_vf[MAX_NUM_VFS_SUPPORTED];
+ bool sqs_used[MAX_NUM_VFS_SUPPORTED];
+#endif
+ struct pkind_cfg pkind;
+#define NIC_SET_VF_LMAC_MAP(bgx, lmac) (((bgx & 0xF) << 4) | (lmac & 0xF))
+#define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF)
+#define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF)
+ u8 vf_lmac_map[MAX_LMAC];
+ struct delayed_work dwork;
+ struct workqueue_struct *check_link;
+ u8 link[MAX_LMAC];
+ u8 duplex[MAX_LMAC];
+ u32 speed[MAX_LMAC];
+ u16 cpi_base[MAX_NUM_VFS_SUPPORTED];
+ u16 rss_ind_tbl_size;
+ bool mbx_lock[MAX_NUM_VFS_SUPPORTED];
+
+ /* MSI-X */
+ bool msix_enabled;
+ u8 num_vec;
+ struct msix_entry msix_entries[NIC_PF_MSIX_VECTORS];
+ bool irq_allocated[NIC_PF_MSIX_VECTORS];
+};
+
+/* Supported devices */
+static const struct pci_device_id nic_id_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_PF) },
+ { 0, } /* end of table */
+};
+
+MODULE_AUTHOR("Sunil Goutham");
+MODULE_DESCRIPTION("Cavium Thunder NIC Physical Function Driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, nic_id_table);
+
+/* The Cavium ThunderX network controller can *only* be found in SoCs
+ * containing the ThunderX ARM64 CPU implementation. All accesses to the device
+ * registers on this platform are implicitly strongly ordered with respect
+ * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use
+ * with no memory barriers in this driver. The readq()/writeq() functions add
+ * explicit ordering operation which in this case are redundant, and only
+ * add overhead.
+ */
+
+/* Register read/write APIs */
+static void nic_reg_write(struct nicpf *nic, u64 offset, u64 val)
+{
+ writeq_relaxed(val, nic->reg_base + offset);
+}
+
+static u64 nic_reg_read(struct nicpf *nic, u64 offset)
+{
+ return readq_relaxed(nic->reg_base + offset);
+}
+
+/* PF -> VF mailbox communication APIs */
+static void nic_enable_mbx_intr(struct nicpf *nic)
+{
+ /* Enable mailbox interrupt for all 128 VFs */
+ nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull);
+ nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull);
+}
+
+static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg)
+{
+ nic_reg_write(nic, NIC_PF_MAILBOX_INT + (mbx_reg << 3), BIT_ULL(vf));
+}
+
+static u64 nic_get_mbx_addr(int vf)
+{
+ return NIC_PF_VF_0_127_MAILBOX_0_1 + (vf << NIC_VF_NUM_SHIFT);
+}
+
+/* Send a mailbox message to VF
+ * @vf: vf to which this message to be sent
+ * @mbx: Message to be sent
+ */
+static void nic_send_msg_to_vf(struct nicpf *nic, int vf, union nic_mbx *mbx)
+{
+ void __iomem *mbx_addr = nic->reg_base + nic_get_mbx_addr(vf);
+ u64 *msg = (u64 *)mbx;
+
+ /* In first revision HW, mbox interrupt is triggerred
+ * when PF writes to MBOX(1), in next revisions when
+ * PF writes to MBOX(0)
+ */
+ if (nic->rev_id == 0) {
+ /* see the comment for nic_reg_write()/nic_reg_read()
+ * functions above
+ */
+ writeq_relaxed(msg[0], mbx_addr);
+ writeq_relaxed(msg[1], mbx_addr + 8);
+ } else {
+ writeq_relaxed(msg[1], mbx_addr + 8);
+ writeq_relaxed(msg[0], mbx_addr);
+ }
+}
+
+/* Responds to VF's READY message with VF's
+ * ID, node, MAC address e.t.c
+ * @vf: VF which sent READY message
+ */
+static void nic_mbx_send_ready(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+ int bgx_idx, lmac;
+ const char *mac;
+
+ mbx.nic_cfg.msg = NIC_MBOX_MSG_READY;
+ mbx.nic_cfg.vf_id = vf;
+
+ if (nic->flags & NIC_TNS_ENABLED)
+ mbx.nic_cfg.tns_mode = NIC_TNS_MODE;
+ else
+ mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE;
+
+ if (vf < MAX_LMAC) {
+ bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+ mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac);
+ if (mac)
+ ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac);
+ }
+#ifdef VNIC_MULTI_QSET_SUPPORT
+ mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false;
+#endif
+ mbx.nic_cfg.node_id = nic->node;
+
+ mbx.nic_cfg.loopback_supported = vf < MAX_LMAC;
+
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* ACKs VF's mailbox message
+ * @vf: VF to which ACK to be sent
+ */
+static void nic_mbx_send_ack(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_ACK;
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* NACKs VF's mailbox message that PF is not able to
+ * complete the action
+ * @vf: VF to which ACK to be sent
+ */
+static void nic_mbx_send_nack(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_NACK;
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* Flush all in flight receive packets to memory and
+ * bring down an active RQ
+ */
+static int nic_rcv_queue_sw_sync(struct nicpf *nic)
+{
+ u16 timeout = ~0x00;
+
+ nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x01);
+ /* Wait till sync cycle is finished */
+ while (timeout) {
+ if (nic_reg_read(nic, NIC_PF_SW_SYNC_RX_DONE) & 0x1)
+ break;
+ timeout--;
+ }
+ nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x00);
+ if (!timeout) {
+ dev_err(&nic->pdev->dev, "Receive queue software sync failed");
+ return 1;
+ }
+ return 0;
+}
+
+/* Get BGX Rx/Tx stats and respond to VF's request */
+static void nic_get_bgx_stats(struct nicpf *nic, struct bgx_stats_msg *bgx)
+{
+ int bgx_idx, lmac;
+ union nic_mbx mbx = {};
+
+ bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]);
+
+ mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS;
+ mbx.bgx_stats.vf_id = bgx->vf_id;
+ mbx.bgx_stats.rx = bgx->rx;
+ mbx.bgx_stats.idx = bgx->idx;
+ if (bgx->rx)
+ mbx.bgx_stats.stats = bgx_get_rx_stats(nic->node, bgx_idx,
+ lmac, bgx->idx);
+ else
+ mbx.bgx_stats.stats = bgx_get_tx_stats(nic->node, bgx_idx,
+ lmac, bgx->idx);
+ nic_send_msg_to_vf(nic, bgx->vf_id, &mbx);
+}
+
+/* Update hardware min/max frame size */
+static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf)
+{
+ if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS)) {
+ dev_err(&nic->pdev->dev,
+ "Invalid MTU setting from VF%d rejected, should be between %d and %d\n",
+ vf, NIC_HW_MIN_FRS, NIC_HW_MAX_FRS);
+ return 1;
+ }
+ new_frs += ETH_HLEN;
+ if (new_frs <= nic->pkind.maxlen)
+ return 0;
+
+ nic->pkind.maxlen = new_frs;
+ nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG, *(u64 *)&nic->pkind);
+ return 0;
+}
+
+/* Set minimum transmit packet size */
+static void nic_set_tx_pkt_pad(struct nicpf *nic, int size)
+{
+ int lmac;
+ u64 lmac_cfg;
+
+ /* Max value that can be set is 60 */
+ if (size > 60)
+ size = 60;
+
+ for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) {
+ lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3));
+ lmac_cfg &= ~(0xF << 2);
+ lmac_cfg |= ((size / 4) << 2);
+ nic_reg_write(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3), lmac_cfg);
+ }
+}
+
+/* Function to check number of LMACs present and set VF::LMAC mapping.
+ * Mapping will be used while initializing channels.
+ */
+static void nic_set_lmac_vf_mapping(struct nicpf *nic)
+{
+ unsigned bgx_map = bgx_get_map(nic->node);
+ int bgx, next_bgx_lmac = 0;
+ int lmac, lmac_cnt = 0;
+ u64 lmac_credit;
+
+ nic->num_vf_en = 0;
+ if (nic->flags & NIC_TNS_ENABLED) {
+ nic->num_vf_en = DEFAULT_NUM_VF_ENABLED;
+ return;
+ }
+
+ for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) {
+ if (!(bgx_map & (1 << bgx)))
+ continue;
+ lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
+ for (lmac = 0; lmac < lmac_cnt; lmac++)
+ nic->vf_lmac_map[next_bgx_lmac++] =
+ NIC_SET_VF_LMAC_MAP(bgx, lmac);
+ nic->num_vf_en += lmac_cnt;
+
+ /* Program LMAC credits */
+ lmac_credit = (1ull << 1); /* channel credit enable */
+ lmac_credit |= (0x1ff << 2); /* Max outstanding pkt count */
+ /* 48KB BGX Tx buffer size, each unit is of size 16bytes */
+ lmac_credit |= (((((48 * 1024) / lmac_cnt) -
+ NIC_HW_MAX_FRS) / 16) << 12);
+ lmac = bgx * MAX_LMAC_PER_BGX;
+ for (; lmac < lmac_cnt + (bgx * MAX_LMAC_PER_BGX); lmac++)
+ nic_reg_write(nic,
+ NIC_PF_LMAC_0_7_CREDIT + (lmac * 8),
+ lmac_credit);
+ }
+}
+
+#define TNS_PORT0_BLOCK 6
+#define TNS_PORT1_BLOCK 7
+#define BGX0_BLOCK 8
+#define BGX1_BLOCK 9
+
+static void nic_init_hw(struct nicpf *nic)
+{
+ int i;
+
+ /* Reset NIC, in case the driver is repeatedly inserted and removed */
+ nic_reg_write(nic, NIC_PF_SOFT_RESET, 1);
+
+ /* Enable NIC HW block */
+ nic_reg_write(nic, NIC_PF_CFG, 0x3);
+
+ /* Enable backpressure */
+ nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03);
+
+ if (nic->flags & NIC_TNS_ENABLED) {
+ nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
+ (NIC_TNS_MODE << 7) | TNS_PORT0_BLOCK);
+ nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
+ (NIC_TNS_MODE << 7) | TNS_PORT1_BLOCK);
+ nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG,
+ (1ULL << 63) | TNS_PORT0_BLOCK);
+ nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8),
+ (1ULL << 63) | TNS_PORT1_BLOCK);
+
+ } else {
+ /* Disable TNS mode on both interfaces */
+ nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
+ (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK);
+ nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
+ (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK);
+ nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG,
+ (1ULL << 63) | BGX0_BLOCK);
+ nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8),
+ (1ULL << 63) | BGX1_BLOCK);
+ }
+
+ /* PKIND configuration */
+ nic->pkind.minlen = 0;
+ nic->pkind.maxlen = NIC_HW_MAX_FRS + ETH_HLEN;
+ nic->pkind.lenerr_en = 1;
+ nic->pkind.rx_hdr = 0;
+ nic->pkind.hdr_sl = 0;
+
+ for (i = 0; i < NIC_MAX_PKIND; i++)
+ nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (i << 3),
+ *(u64 *)&nic->pkind);
+
+ nic_set_tx_pkt_pad(nic, NIC_HW_MIN_FRS);
+
+ /* Timer config */
+ nic_reg_write(nic, NIC_PF_INTR_TIMER_CFG, NICPF_CLK_PER_INT_TICK);
+
+ /* Enable VLAN ethertype matching and stripping */
+ nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7,
+ (2 << 19) | (ETYPE_ALG_VLAN_STRIP << 16) | ETH_P_8021Q);
+}
+
+/* Channel parse index configuration */
+static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
+{
+ u32 vnic, bgx, lmac, chan;
+ u32 padd, cpi_count = 0;
+ u64 cpi_base, cpi, rssi_base, rssi;
+ u8 qset, rq_idx = 0;
+
+ vnic = cfg->vf_id;
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
+
+ chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
+ cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX);
+ rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX);
+
+ /* Rx channel configuration */
+ nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3),
+ (1ull << 63) | (vnic << 0));
+ nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_CFG | (chan << 3),
+ ((u64)cfg->cpi_alg << 62) | (cpi_base << 48));
+
+ if (cfg->cpi_alg == CPI_ALG_NONE)
+ cpi_count = 1;
+ else if (cfg->cpi_alg == CPI_ALG_VLAN) /* 3 bits of PCP */
+ cpi_count = 8;
+ else if (cfg->cpi_alg == CPI_ALG_VLAN16) /* 3 bits PCP + DEI */
+ cpi_count = 16;
+ else if (cfg->cpi_alg == CPI_ALG_DIFF) /* 6bits DSCP */
+ cpi_count = NIC_MAX_CPI_PER_LMAC;
+
+ /* RSS Qset, Qidx mapping */
+ qset = cfg->vf_id;
+ rssi = rssi_base;
+ for (; rssi < (rssi_base + cfg->rq_cnt); rssi++) {
+ nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3),
+ (qset << 3) | rq_idx);
+ rq_idx++;
+ }
+
+ rssi = 0;
+ cpi = cpi_base;
+ for (; cpi < (cpi_base + cpi_count); cpi++) {
+ /* Determine port to channel adder */
+ if (cfg->cpi_alg != CPI_ALG_DIFF)
+ padd = cpi % cpi_count;
+ else
+ padd = cpi % 8; /* 3 bits CS out of 6bits DSCP */
+
+ /* Leave RSS_SIZE as '0' to disable RSS */
+ nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3),
+ (vnic << 24) | (padd << 16) | (rssi_base + rssi));
+
+ if ((rssi + 1) >= cfg->rq_cnt)
+ continue;
+
+ if (cfg->cpi_alg == CPI_ALG_VLAN)
+ rssi++;
+ else if (cfg->cpi_alg == CPI_ALG_VLAN16)
+ rssi = ((cpi - cpi_base) & 0xe) >> 1;
+ else if (cfg->cpi_alg == CPI_ALG_DIFF)
+ rssi = ((cpi - cpi_base) & 0x38) >> 3;
+ }
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list