1479 lines
43 KiB
C
1479 lines
43 KiB
C
/*******************************************************************************
|
|
Barefoot Networks Switch ASIC Linux driver
|
|
Copyright(c) 2015 - 2019 Barefoot Networks, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or modify it
|
|
under the terms and conditions of the GNU General Public License,
|
|
version 2, as published by the Free Software Foundation.
|
|
|
|
This program is distributed in the hope it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
more details.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
The full GNU General Public License is included in this distribution in
|
|
the file called "COPYING".
|
|
|
|
Contact Information:
|
|
info@barefootnetworks.com
|
|
Barefoot Networks, 4750 Patrick Henry Drive, Santa Clara CA 95054
|
|
|
|
*******************************************************************************/
|
|
/* bf_drv kernel module
|
|
*
|
|
* This is kernel mode driver for Tofino chip.
|
|
* Provides user space mmap service and user space "wait for interrupt"
|
|
* and "enable interrupt" services.
|
|
*/
|
|
|
|
#include <linux/device.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/module.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/io.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/dma-mapping.h>
|
|
#include "bf_ioctl.h"
|
|
#include "bf_kdrv.h"
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
|
|
#include <linux/sched/signal.h>
|
|
#else
|
|
#include <linux/sched.h>
|
|
#endif
|
|
#include <linux/cdev.h>
|
|
#include <linux/aer.h>
|
|
#include <linux/string.h>
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
|
|
//#error unsupported linux kernel version
|
|
#endif
|
|
|
|
#ifdef BF_INCLUDE_KPKT
|
|
/* kernel pkt driver entry/exit APIs */
|
|
extern int bf_kpkt_init(struct pci_dev *pdev,
|
|
u8 *bar0_vaddr,
|
|
void **adapter_ptr,
|
|
int dev_id,
|
|
int pci_use_highmem,
|
|
unsigned long head_room,
|
|
int kpkt_dr_int_en,
|
|
unsigned long rx_desc_countp);
|
|
extern void bf_kpkt_remove(void *adapter_ptr);
|
|
extern void bf_kpkt_irqhandler(int irq, void *adapter_ptr);
|
|
extern void bf_kpkt_set_pci_error(void *adapter_ptr, u8 pci_error);
|
|
#endif
|
|
|
|
/* Keep any global information here that must survive even after the
|
|
* bf_pci_dev is free-ed up.
|
|
*/
|
|
struct bf_global {
|
|
struct bf_pci_dev *bfdev;
|
|
struct cdev *bf_cdev;
|
|
struct fasync_struct *async_queue;
|
|
};
|
|
|
|
static int bf_major;
|
|
static int bf_minor[BF_MAX_DEVICE_CNT] = {0};
|
|
static struct class *bf_class = NULL;
|
|
static char *intr_mode = NULL;
|
|
static int kpkt_mode = 0;
|
|
static int kpkt_hd_room = 32;
|
|
static int kpkt_rx_count = 256;
|
|
static int kpkt_dr_int_en = 1;
|
|
|
|
static enum bf_intr_mode bf_intr_mode_default = BF_INTR_MODE_MSI;
|
|
static spinlock_t bf_nonisr_lock;
|
|
|
|
/* dev->minor should index into this array */
|
|
static struct bf_global bf_global[BF_MAX_DEVICE_CNT];
|
|
|
|
static void bf_add_listener(struct bf_pci_dev *bfdev,
|
|
struct bf_listener *listener) {
|
|
struct bf_listener **cur_listener = &bfdev->listener_head;
|
|
|
|
if (!listener) {
|
|
return;
|
|
}
|
|
spin_lock(&bf_nonisr_lock);
|
|
|
|
while (*cur_listener) {
|
|
cur_listener = &((*cur_listener)->next);
|
|
}
|
|
*cur_listener = listener;
|
|
listener->next = NULL;
|
|
|
|
spin_unlock(&bf_nonisr_lock);
|
|
}
|
|
|
|
static void bf_remove_listener(struct bf_pci_dev *bfdev,
|
|
struct bf_listener *listener) {
|
|
struct bf_listener **cur_listener = &bfdev->listener_head;
|
|
|
|
/* in case of certain error conditions, this function might be called after
|
|
* bf_pci_remove()
|
|
*/
|
|
if (!bfdev || !listener) {
|
|
return;
|
|
}
|
|
spin_lock(&bf_nonisr_lock);
|
|
|
|
if (*cur_listener == listener) {
|
|
*cur_listener = listener->next;
|
|
} else {
|
|
while (*cur_listener) {
|
|
if ((*cur_listener)->next == listener) {
|
|
(*cur_listener)->next = listener->next;
|
|
break;
|
|
}
|
|
cur_listener = &((*cur_listener)->next);
|
|
}
|
|
listener->next = NULL;
|
|
}
|
|
|
|
spin_unlock(&bf_nonisr_lock);
|
|
}
|
|
|
|
/* a pool of minor numbers is maintained */
|
|
/* return the first available minor number */
|
|
static int bf_get_next_minor_no(int *minor) {
|
|
int i;
|
|
|
|
spin_lock(&bf_nonisr_lock);
|
|
for (i = 0; i < BF_MAX_DEVICE_CNT; i++) {
|
|
if (bf_minor[i] == 0) {
|
|
*minor = i;
|
|
bf_minor[i] = 1; /* mark it as taken */
|
|
spin_unlock(&bf_nonisr_lock);
|
|
return 0;
|
|
}
|
|
}
|
|
*minor = -1;
|
|
spin_unlock(&bf_nonisr_lock);
|
|
return -1;
|
|
}
|
|
|
|
/* return a minor number back to the pool for recycling */
|
|
static int bf_return_minor_no(int minor) {
|
|
int err;
|
|
|
|
spin_lock(&bf_nonisr_lock);
|
|
if (bf_minor[minor] == 0) { /* was already returned */
|
|
err = -1; /* don't change anything, but return error */
|
|
} else {
|
|
bf_minor[minor] = 0; /* mark it as available */
|
|
err = 0;
|
|
}
|
|
spin_unlock(&bf_nonisr_lock);
|
|
return err;
|
|
}
|
|
|
|
static inline struct bf_pci_dev *bf_get_pci_dev(struct bf_dev_info *info) {
|
|
return container_of(info, struct bf_pci_dev, info);
|
|
}
|
|
|
|
/*
|
|
* It masks the msix on/off of generating MSI-X messages.
|
|
*/
|
|
static void bf_msix_mask_irq(struct msi_desc *desc, int32_t state) {
|
|
u32 mask_bits = desc->masked;
|
|
unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
|
|
PCI_MSIX_ENTRY_VECTOR_CTRL;
|
|
|
|
if (state != 0) {
|
|
mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
|
|
} else {
|
|
mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
|
|
}
|
|
|
|
if (mask_bits != desc->masked) {
|
|
writel(mask_bits, desc->mask_base + offset);
|
|
readl(desc->mask_base);
|
|
desc->masked = mask_bits;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* irqcontrol can be used to disable/enable interrupt from user space processes.
|
|
*
|
|
* @param bf_dev
|
|
* pointer to bf_pci_dev
|
|
* @param irq_state
|
|
* state value. 1 to enable interrupt, 0 to disable interrupt.
|
|
*
|
|
* @return
|
|
* - On success, 0.
|
|
* - On failure, a negative value.
|
|
*/
|
|
static int bf_pci_irqcontrol(struct bf_pci_dev *bfdev, s32 irq_state) {
|
|
struct pci_dev *pdev = bfdev->pdev;
|
|
|
|
pci_cfg_access_lock(pdev);
|
|
if (bfdev->mode == BF_INTR_MODE_LEGACY) {
|
|
pci_intx(pdev, !!irq_state);
|
|
} else if (bfdev->mode == BF_INTR_MODE_MSIX) {
|
|
struct msi_desc *desc;
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
|
|
list_for_each_entry(desc, &pdev->msi_list, list)
|
|
bf_msix_mask_irq(desc, irq_state);
|
|
#else
|
|
for_each_pci_msi_entry(desc, pdev) bf_msix_mask_irq(desc, irq_state);
|
|
#endif
|
|
}
|
|
pci_cfg_access_unlock(pdev);
|
|
|
|
return 0;
|
|
}
|
|
|
|
#ifdef BF_INCLUDE_KPKT
|
|
/* there are three TBUS MSIX vectors */
|
|
static int bf_irq_is_tbus_msix(struct bf_pci_dev *bfdev, int irq) {
|
|
struct bf_dev_info *info = &bfdev->info;
|
|
|
|
if (!info->tbus_msix_map_enable) {
|
|
return 0;
|
|
}
|
|
if (irq == info->msix_entries[info->tbus_msix_ind[0]].vector ||
|
|
irq == info->msix_entries[info->tbus_msix_ind[1]].vector) {
|
|
return 1;
|
|
} else if (irq == info->msix_entries[info->tbus_msix_ind[2]].vector) {
|
|
/* log error */
|
|
printk(KERN_ALERT "bf_tbus error msix\n");
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* interrupt handler which will check if the interrupt is from the right
|
|
* device. If so, disable it here and will be enabled later.
|
|
*/
|
|
static irqreturn_t bf_pci_irqhandler(int irq, struct bf_pci_dev *bfdev) {
|
|
/* Legacy mode need to mask in hardware */
|
|
if (bfdev->mode == BF_INTR_MODE_LEGACY &&
|
|
!pci_check_and_mask_intx(bfdev->pdev)) {
|
|
return IRQ_NONE;
|
|
}
|
|
|
|
/* NOTE : if bfdev->info.pci_error_state == 1, then do not access the
|
|
* device and return IRQ_NOTHANDLED.
|
|
*/
|
|
#ifdef BF_INCLUDE_KPKT
|
|
/* handle pkt DR interrpt (MSI vect-1) if it has to be in kernel */
|
|
if (kpkt_dr_int_en && bfdev->info.irq != 0) {
|
|
if (bfdev->mode == BF_INTR_MODE_LEGACY) {
|
|
bf_kpkt_irqhandler(irq, bfdev->adapter_ptr);
|
|
} else if (bfdev->mode == BF_INTR_MODE_MSI) {
|
|
/* do not process packet unless the MSI interrupt is from tbus */
|
|
/* all BF interrupts arrive on one single MSI if "1" MSI is configured */
|
|
if (bfdev->info.num_irq == 1 || (irq == (bfdev->info.irq + BF_MSI_INT_TBUS))) {
|
|
bf_kpkt_irqhandler(irq, bfdev->adapter_ptr);
|
|
}
|
|
} else if (bfdev->mode == BF_INTR_MODE_MSIX) {
|
|
if (bfdev->info.tof_type == BF_TOFINO_2 && bf_irq_is_tbus_msix(bfdev,irq)) {
|
|
bf_kpkt_irqhandler(irq, bfdev->adapter_ptr);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
/* Message signal mode, no share IRQ and automasked */
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
/* Remap pci resources described by bar #pci_bar */
|
|
static int bf_pci_setup_iomem(struct pci_dev *dev,
|
|
struct bf_dev_info *info,
|
|
int n,
|
|
int pci_bar,
|
|
const char *name) {
|
|
unsigned long addr, len;
|
|
void *internal_addr;
|
|
|
|
if (sizeof(info->mem) / sizeof(info->mem[0]) <= n) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
addr = pci_resource_start(dev, pci_bar);
|
|
len = pci_resource_len(dev, pci_bar);
|
|
if (addr == 0 || len == 0) {
|
|
return -1;
|
|
}
|
|
internal_addr = pci_ioremap_bar(dev, pci_bar);
|
|
if (internal_addr == NULL) {
|
|
return -1;
|
|
}
|
|
info->mem[n].name = name;
|
|
info->mem[n].addr = addr;
|
|
info->mem[n].internal_addr = internal_addr;
|
|
info->mem[n].size = len;
|
|
return 0;
|
|
}
|
|
|
|
/* Unmap previously ioremap'd resources */
|
|
static void bf_pci_release_iomem(struct bf_dev_info *info) {
|
|
int i;
|
|
|
|
for (i = 0; i < BF_MAX_BAR_MAPS; i++) {
|
|
if (info->mem[i].internal_addr) {
|
|
iounmap(info->mem[i].internal_addr);
|
|
}
|
|
}
|
|
}
|
|
|
|
static int bf_setup_bars(struct pci_dev *dev, struct bf_dev_info *info) {
|
|
int i, iom, ret;
|
|
unsigned long flags;
|
|
static const char *bar_names[BF_MAX_BAR_MAPS] = {
|
|
"BAR0", "BAR1", "BAR2", "BAR3", "BAR4", "BAR5",
|
|
};
|
|
|
|
iom = 0;
|
|
|
|
for (i = 0; i < BF_MAX_BAR_MAPS; i++) {
|
|
if (pci_resource_len(dev, i) != 0 && pci_resource_start(dev, i) != 0) {
|
|
flags = pci_resource_flags(dev, i);
|
|
if (flags & IORESOURCE_MEM) {
|
|
ret = bf_pci_setup_iomem(dev, info, iom, i, bar_names[i]);
|
|
if (ret != 0) {
|
|
return ret;
|
|
}
|
|
iom++;
|
|
}
|
|
}
|
|
}
|
|
return (iom != 0) ? ret : -ENOENT;
|
|
}
|
|
|
|
static irqreturn_t bf_interrupt(int irq, void *bfdev_id) {
|
|
struct bf_pci_dev *bfdev = ((struct bf_int_vector *)bfdev_id)->bf_dev;
|
|
int vect_off = ((struct bf_int_vector *)bfdev_id)->int_vec_offset;
|
|
|
|
irqreturn_t ret = bf_pci_irqhandler(irq, bfdev);
|
|
|
|
if (ret == IRQ_HANDLED) {
|
|
atomic_inc(&(bfdev->info.event[vect_off]));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static unsigned int bf_poll(struct file *filep, poll_table *wait) {
|
|
struct bf_listener *listener = (struct bf_listener *)filep->private_data;
|
|
struct bf_pci_dev *bfdev = listener->bfdev;
|
|
int i;
|
|
|
|
if (!bfdev) {
|
|
return -ENODEV;
|
|
}
|
|
if (!bfdev->info.irq) {
|
|
return -EIO;
|
|
}
|
|
|
|
poll_wait(filep, &bfdev->info.wait, wait);
|
|
|
|
for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) {
|
|
if (listener->event_count[i] != atomic_read(&bfdev->info.event[i])) {
|
|
return POLLIN | POLLRDNORM;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int bf_find_mem_index(struct vm_area_struct *vma) {
|
|
struct bf_pci_dev *bfdev = vma->vm_private_data;
|
|
if (vma->vm_pgoff < BF_MAX_BAR_MAPS) {
|
|
if (bfdev->info.mem[vma->vm_pgoff].size == 0) {
|
|
return -1;
|
|
}
|
|
return (int)vma->vm_pgoff;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static const struct vm_operations_struct bf_physical_vm_ops = {
|
|
#ifdef CONFIG_HAVE_IOREMAP_PROT
|
|
.access = generic_access_phys,
|
|
#endif
|
|
};
|
|
|
|
static int bf_mmap_physical(struct vm_area_struct *vma) {
|
|
struct bf_pci_dev *bfdev = vma->vm_private_data;
|
|
int bar = bf_find_mem_index(vma);
|
|
struct bf_dev_mem *mem;
|
|
if (bar < 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
mem = bfdev->info.mem + bar;
|
|
|
|
if (mem->addr & ~PAGE_MASK) {
|
|
return -ENODEV;
|
|
}
|
|
if (vma->vm_end - vma->vm_start > mem->size) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
vma->vm_ops = &bf_physical_vm_ops;
|
|
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
|
|
|
/*
|
|
* We cannot use the vm_iomap_memory() helper here,
|
|
* because vma->vm_pgoff is the map index we looked
|
|
* up above in bf_find_mem_index(), rather than an
|
|
* actual page offset into the mmap.
|
|
*
|
|
* So we just do the physical mmap without a page
|
|
* offset.
|
|
*/
|
|
return remap_pfn_range(vma,
|
|
vma->vm_start,
|
|
mem->addr >> PAGE_SHIFT,
|
|
vma->vm_end - vma->vm_start,
|
|
vma->vm_page_prot);
|
|
}
|
|
|
|
static int bf_mmap(struct file *filep, struct vm_area_struct *vma) {
|
|
struct bf_listener *listener = filep->private_data;
|
|
struct bf_pci_dev *bfdev = listener->bfdev;
|
|
int bar;
|
|
unsigned long requested_pages, actual_pages;
|
|
|
|
if (!bfdev) {
|
|
return -ENODEV;
|
|
}
|
|
if (vma->vm_end < vma->vm_start) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
vma->vm_private_data = bfdev;
|
|
|
|
bar = bf_find_mem_index(vma);
|
|
if (bar < 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
requested_pages = vma_pages(vma);
|
|
actual_pages = ((bfdev->info.mem[bar].addr & ~PAGE_MASK) +
|
|
bfdev->info.mem[bar].size + PAGE_SIZE - 1) >>
|
|
PAGE_SHIFT;
|
|
if (requested_pages > actual_pages) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
return bf_mmap_physical(vma);
|
|
}
|
|
|
|
static int bf_fasync(int fd, struct file *filep, int mode) {
|
|
int minor;
|
|
|
|
if (!filep->private_data) {
|
|
return (-EINVAL);
|
|
}
|
|
minor = ((struct bf_listener *)filep->private_data)->minor;
|
|
if (minor >= BF_MAX_DEVICE_CNT) {
|
|
return (-EINVAL);
|
|
}
|
|
if (mode == 0 && &bf_global[minor].async_queue == NULL) {
|
|
return 0; /* nothing to do */
|
|
}
|
|
return (fasync_helper(fd, filep, mode, &bf_global[minor].async_queue));
|
|
}
|
|
|
|
static int bf_open(struct inode *inode, struct file *filep) {
|
|
struct bf_pci_dev *bfdev;
|
|
struct bf_listener *listener;
|
|
int i;
|
|
|
|
bfdev = bf_global[iminor(inode)].bfdev;
|
|
listener = kmalloc(sizeof(*listener), GFP_KERNEL);
|
|
if (listener) {
|
|
listener->bfdev = bfdev;
|
|
listener->minor = bfdev->info.minor;
|
|
listener->next = NULL;
|
|
bf_add_listener(bfdev, listener);
|
|
for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) {
|
|
listener->event_count[i] = atomic_read(&bfdev->info.event[i]);
|
|
}
|
|
filep->private_data = listener;
|
|
return 0;
|
|
} else {
|
|
return (-ENOMEM);
|
|
}
|
|
}
|
|
|
|
static int bf_release(struct inode *inode, struct file *filep) {
|
|
struct bf_listener *listener = filep->private_data;
|
|
|
|
bf_fasync(-1, filep, 0); /* empty any process id in the notification list */
|
|
if (listener->bfdev) {
|
|
bf_remove_listener(listener->bfdev, listener);
|
|
}
|
|
kfree(listener);
|
|
return 0;
|
|
}
|
|
|
|
/* user space support: make read() system call after poll() of select() */
|
|
static ssize_t bf_read(struct file *filep,
|
|
char __user *buf,
|
|
size_t count,
|
|
loff_t *ppos) {
|
|
struct bf_listener *listener = filep->private_data;
|
|
struct bf_pci_dev *bfdev = listener->bfdev;
|
|
int retval, event_count[BF_MSIX_ENTRY_CNT];
|
|
int i, mismatch_found = 0; /* OR of per vector mismatch */
|
|
unsigned char cnt_match[BF_MSIX_ENTRY_CNT]; /* per vector mismatch */
|
|
|
|
if (!bfdev) {
|
|
return -ENODEV;
|
|
}
|
|
/* irq must be setup for read() to work */
|
|
if (!bfdev->info.irq) {
|
|
return -EIO;
|
|
}
|
|
|
|
/* ensure that there is enough space on user buffer for the given interrupt
|
|
* mode */
|
|
if (bfdev->mode == BF_INTR_MODE_MSIX) {
|
|
if (count < sizeof(s32) * BF_MSIX_ENTRY_CNT) {
|
|
return -EINVAL;
|
|
}
|
|
count = sizeof(s32) * BF_MSIX_ENTRY_CNT;
|
|
} else if (bfdev->mode == BF_INTR_MODE_MSI) {
|
|
if (count < sizeof(s32) * BF_MSI_ENTRY_CNT) {
|
|
return -EINVAL;
|
|
}
|
|
count = sizeof(s32) * BF_MSI_ENTRY_CNT;
|
|
} else {
|
|
if (count < sizeof(s32)) {
|
|
return -EINVAL;
|
|
}
|
|
count = sizeof(s32);
|
|
}
|
|
|
|
do {
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
|
|
for (i = 0; i < (count / sizeof(s32)); i++) {
|
|
event_count[i] = atomic_read(&(bfdev->info.event[i]));
|
|
if (event_count[i] != listener->event_count[i]) {
|
|
mismatch_found |= 1;
|
|
cnt_match[i] = 1;
|
|
} else {
|
|
event_count[i] = 0;
|
|
cnt_match[i] = 0;
|
|
}
|
|
}
|
|
if (mismatch_found) {
|
|
__set_current_state(TASK_RUNNING);
|
|
if (copy_to_user(buf, &event_count, count)) {
|
|
retval = -EFAULT;
|
|
} else { /* adjust the listener->event_count; */
|
|
for (i = 0; i < (count / sizeof(s32)); i++) {
|
|
if (cnt_match[i]) {
|
|
listener->event_count[i] = event_count[i];
|
|
}
|
|
}
|
|
retval = count;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (filep->f_flags & O_NONBLOCK) {
|
|
retval = -EAGAIN;
|
|
break;
|
|
}
|
|
|
|
if (signal_pending(current)) {
|
|
retval = -ERESTARTSYS;
|
|
break;
|
|
}
|
|
schedule();
|
|
} while (1);
|
|
|
|
__set_current_state(TASK_RUNNING);
|
|
|
|
return retval;
|
|
}
|
|
|
|
/* user space is supposed to call this after it is done with interrupt
|
|
* processing
|
|
*/
|
|
static ssize_t bf_write(struct file *filep,
|
|
const char __user *buf,
|
|
size_t count,
|
|
loff_t *ppos) {
|
|
struct bf_listener *listener = filep->private_data;
|
|
struct bf_pci_dev *bfdev = listener->bfdev;
|
|
ssize_t ret;
|
|
s32 int_en;
|
|
|
|
if (!bfdev || !bfdev->info.irq) {
|
|
return -EIO;
|
|
}
|
|
|
|
if (count != sizeof(s32)) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (copy_from_user(&int_en, buf, count)) {
|
|
return -EFAULT;
|
|
}
|
|
|
|
/* clear pci_error_state */
|
|
bfdev->info.pci_error_state = 0;
|
|
|
|
ret = bf_pci_irqcontrol(bfdev, int_en);
|
|
|
|
return ret ? ret : sizeof(s32);
|
|
}
|
|
|
|
static long bf_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct bf_listener *listener = filep->private_data;
|
|
struct bf_pci_dev *bfdev = listener->bfdev;
|
|
bf_dma_bus_map_t dma_map;
|
|
void *addr = (void __user *)arg;
|
|
dma_addr_t dma_hndl;
|
|
|
|
if (!bfdev || !addr) {
|
|
return EFAULT;
|
|
}
|
|
switch(cmd) {
|
|
case BF_IOCMAPDMAADDR:
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
|
|
if (access_ok(addr, sizeof(bf_dma_bus_map_t))) {
|
|
#else
|
|
if (access_ok(VERIFY_WRITE, addr, sizeof(bf_dma_bus_map_t))) {
|
|
#endif
|
|
if (copy_from_user(&dma_map, addr, sizeof(bf_dma_bus_map_t))) {
|
|
return EFAULT;
|
|
}
|
|
if (!dma_map.phy_addr || !dma_map.size) {
|
|
return EFAULT;
|
|
}
|
|
dma_hndl = dma_map_single(&bfdev->pdev->dev, phys_to_virt(dma_map.phy_addr), dma_map.size, DMA_BIDIRECTIONAL);
|
|
if (dma_mapping_error(&bfdev->pdev->dev, dma_hndl)) {
|
|
return EFAULT;
|
|
}
|
|
dma_map.dma_addr = (void *)(uintptr_t)dma_hndl;
|
|
if (copy_to_user(addr, &dma_map, sizeof(bf_dma_bus_map_t))) {
|
|
return EFAULT;
|
|
}
|
|
} else {
|
|
return EFAULT;
|
|
}
|
|
break;
|
|
case BF_IOCUNMAPDMAADDR:
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
|
|
if (access_ok(addr, sizeof(bf_dma_bus_map_t))) {
|
|
#else
|
|
if (access_ok(VERIFY_READ, addr, sizeof(bf_dma_bus_map_t))) {
|
|
#endif
|
|
if (copy_from_user(&dma_map, addr, sizeof(bf_dma_bus_map_t))) {
|
|
return EFAULT;
|
|
}
|
|
if (!dma_map.dma_addr || !dma_map.size) {
|
|
return EFAULT;
|
|
}
|
|
dma_unmap_single(&bfdev->pdev->dev, (dma_addr_t)(uintptr_t)(dma_map.dma_addr), dma_map.size, DMA_BIDIRECTIONAL);
|
|
} else {
|
|
return EFAULT;
|
|
}
|
|
break;
|
|
case BF_TBUS_MSIX_INDEX:
|
|
/* not supported for Tofino-1 */
|
|
if (bfdev->info.tof_type == BF_TOFINO_1) {
|
|
return EINVAL;
|
|
} else {
|
|
int i;
|
|
bf_tbus_msix_indices_t msix_ind;
|
|
if (copy_from_user(&msix_ind, addr, sizeof(bf_tbus_msix_indices_t))) {
|
|
return EFAULT;
|
|
}
|
|
if (msix_ind.cnt > BF_TBUS_MSIX_INDICES_MAX) {
|
|
return EINVAL;
|
|
}
|
|
for (i = 0; i < msix_ind.cnt; i++) {
|
|
if (msix_ind.indices[i] >= BF_MSIX_ENTRY_CNT) {
|
|
return EINVAL;
|
|
}
|
|
}
|
|
for (i = 0; i < msix_ind.cnt; i++) {
|
|
bfdev->info.tbus_msix_ind[i] = msix_ind.indices[i];
|
|
}
|
|
bfdev->info.tbus_msix_map_enable = 1;
|
|
}
|
|
break;
|
|
case BF_GET_INTR_MODE:
|
|
{
|
|
bf_intr_mode_t i_mode;
|
|
i_mode.intr_mode = bfdev->mode;
|
|
if (copy_to_user(addr, &i_mode, sizeof(bf_intr_mode_t))) {
|
|
return EFAULT;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
return EINVAL;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static const struct file_operations bf_fops = {
|
|
.owner = THIS_MODULE,
|
|
.open = bf_open,
|
|
.release = bf_release,
|
|
.unlocked_ioctl = bf_ioctl,
|
|
.read = bf_read,
|
|
.write = bf_write,
|
|
.mmap = bf_mmap,
|
|
.poll = bf_poll,
|
|
.fasync = bf_fasync,
|
|
};
|
|
|
|
static int bf_major_init(struct bf_pci_dev *bfdev, int minor) {
|
|
struct cdev *cdev;
|
|
static const char name[] = "bf";
|
|
dev_t bf_dev = 0;
|
|
int result;
|
|
|
|
result = alloc_chrdev_region(&bf_dev, 0, BF_MAX_DEVICE_CNT, name);
|
|
if (result) {
|
|
return result;
|
|
}
|
|
|
|
result = -ENOMEM;
|
|
cdev = cdev_alloc();
|
|
if (!cdev) {
|
|
goto fail_dev_add;
|
|
}
|
|
cdev->ops = &bf_fops;
|
|
cdev->owner = THIS_MODULE;
|
|
kobject_set_name(&cdev->kobj, "%s", name);
|
|
result = cdev_add(cdev, bf_dev, BF_MAX_DEVICE_CNT);
|
|
|
|
if (result) {
|
|
goto fail_dev_add;
|
|
}
|
|
|
|
bf_major = MAJOR(bf_dev);
|
|
bf_global[minor].bf_cdev = cdev;
|
|
return 0;
|
|
|
|
fail_dev_add:
|
|
unregister_chrdev_region(bf_dev, BF_MAX_DEVICE_CNT);
|
|
return result;
|
|
}
|
|
|
|
static void bf_major_cleanup(struct bf_pci_dev *bfdev, int minor) {
|
|
unregister_chrdev_region(MKDEV(bf_major, 0), BF_MAX_DEVICE_CNT);
|
|
cdev_del(bf_global[minor].bf_cdev);
|
|
}
|
|
|
|
static int bf_init_cdev(struct bf_pci_dev *bfdev, int minor) {
|
|
int ret;
|
|
ret = bf_major_init(bfdev, minor);
|
|
if (ret) return ret;
|
|
|
|
bf_class = class_create(THIS_MODULE, BF_CLASS_NAME);
|
|
if (!bf_class) {
|
|
printk(KERN_ERR "create_class failed for bf_dev\n");
|
|
ret = -ENODEV;
|
|
goto err_class_register;
|
|
}
|
|
return 0;
|
|
|
|
err_class_register:
|
|
bf_major_cleanup(bfdev, minor);
|
|
return ret;
|
|
}
|
|
|
|
static void bf_remove_cdev(struct bf_pci_dev *bfdev) {
|
|
class_destroy(bf_class);
|
|
bf_major_cleanup(bfdev, bfdev->info.minor);
|
|
}
|
|
|
|
/**
|
|
* bf_register_device - register a new userspace mem device
|
|
* @parent: parent device
|
|
* @bfdev: bf pci device
|
|
*
|
|
* returns zero on success or a negative error code.
|
|
*/
|
|
int bf_register_device(struct device *parent, struct bf_pci_dev *bfdev) {
|
|
struct bf_dev_info *info = &bfdev->info;
|
|
int i, j, ret = 0;
|
|
int minor;
|
|
|
|
if (!parent || !info || !info->version) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
init_waitqueue_head(&info->wait);
|
|
|
|
for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) {
|
|
atomic_set(&info->event[i], 0);
|
|
}
|
|
|
|
if (bf_get_next_minor_no(&minor)) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
ret = bf_init_cdev(bfdev, minor);
|
|
if (ret) {
|
|
printk(KERN_ERR "BF: device cdev creation failed\n");
|
|
return ret;
|
|
}
|
|
|
|
info->dev = device_create(
|
|
bf_class, parent, MKDEV(bf_major, minor), bfdev, "bf%d", minor);
|
|
if (!info->dev) {
|
|
printk(KERN_ERR "BF: device creation failed\n");
|
|
return -ENODEV;
|
|
}
|
|
|
|
info->minor = minor;
|
|
|
|
/* bind ISRs and request interrupts */
|
|
if (info->irq && (bfdev->mode != BF_INTR_MODE_NONE)) {
|
|
/*
|
|
* Note that we deliberately don't use devm_request_irq
|
|
* here. The parent module can unregister the UIO device
|
|
* and call pci_disable_msi, which requires that this
|
|
* irq has been freed. However, the device may have open
|
|
* FDs at the time of unregister and therefore may not be
|
|
* freed until they are released.
|
|
*/
|
|
if (bfdev->mode == BF_INTR_MODE_LEGACY) {
|
|
ret = request_irq(info->irq,
|
|
bf_interrupt,
|
|
info->irq_flags,
|
|
bfdev->name,
|
|
(void *)&(bfdev->bf_int_vec[0]));
|
|
if (ret) {
|
|
printk(KERN_ERR "bf failed to request legacy irq %ld error %d\n",
|
|
info->irq,
|
|
ret);
|
|
return ret;
|
|
}
|
|
printk(KERN_NOTICE "BF allocating legacy int vector %ld\n", info->irq);
|
|
} else if (bfdev->mode == BF_INTR_MODE_MSIX) {
|
|
for (i = 0; i < info->num_irq; i++) {
|
|
ret = request_irq(info->msix_entries[i].vector,
|
|
bf_interrupt,
|
|
info->irq_flags,
|
|
bfdev->name,
|
|
(void *)&(bfdev->bf_int_vec[i]));
|
|
if (ret) {
|
|
/* undo all other previous bindings */
|
|
printk(KERN_ERR "bf failed to request MSIX ret %d itr %d\n", ret, i);
|
|
for (j = i - 1; j >= 0; j--) {
|
|
free_irq(info->msix_entries[j].vector,
|
|
(void *)&(bfdev->bf_int_vec[j]));
|
|
}
|
|
return ret;
|
|
}
|
|
}
|
|
printk(KERN_NOTICE "BF allocating %d MSIx vectors from %ld\n",
|
|
info->num_irq,
|
|
info->irq);
|
|
} else if (bfdev->mode == BF_INTR_MODE_MSI) {
|
|
for (i = 0; i < info->num_irq; i++) {
|
|
ret = request_irq(info->irq + i,
|
|
bf_interrupt,
|
|
info->irq_flags,
|
|
bfdev->name,
|
|
(void *)&(bfdev->bf_int_vec[i]));
|
|
if (ret) {
|
|
/* undo all other previous bindings */
|
|
printk(KERN_ERR "bf failed to request MSI ret %d itr %d\n", ret, i);
|
|
for (j = i - 1; j >= 0; j--) {
|
|
free_irq(info->irq + j, (void *)&(bfdev->bf_int_vec[j]));
|
|
}
|
|
return ret;
|
|
}
|
|
}
|
|
printk(KERN_NOTICE "BF allocating %d MSI vectors from %ld\n",
|
|
info->num_irq,
|
|
info->irq);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* bf_unregister_device - register a new userspace mem device
|
|
* @bfdev: bf pci device
|
|
*
|
|
* returns none
|
|
*/
|
|
void bf_unregister_device(struct bf_pci_dev *bfdev) {
|
|
struct bf_dev_info *info = &bfdev->info;
|
|
int i;
|
|
|
|
if (info->irq) {
|
|
if (bfdev->mode == BF_INTR_MODE_LEGACY) {
|
|
free_irq(info->irq, (void *)&(bfdev->bf_int_vec[0]));
|
|
} else if (bfdev->mode == BF_INTR_MODE_MSIX) {
|
|
for (i = 0; i < info->num_irq; i++) {
|
|
free_irq(info->msix_entries[i].vector, (void *)&(bfdev->bf_int_vec[i]));
|
|
}
|
|
} else if (bfdev->mode == BF_INTR_MODE_MSI) {
|
|
for (i = 0; i < info->num_irq; i++) {
|
|
free_irq(info->irq + i, (void *)&(bfdev->bf_int_vec[i]));
|
|
}
|
|
}
|
|
}
|
|
device_destroy(bf_class, MKDEV(bf_major, info->minor));
|
|
bf_remove_cdev(bfdev);
|
|
bf_return_minor_no(info->minor);
|
|
return;
|
|
}
|
|
|
|
static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) {
|
|
return &pdev->dev;
|
|
}
|
|
|
|
static void bf_disable_int_dma(struct bf_pci_dev *bfdev) {
|
|
u8 *bf_base_addr, i;
|
|
u32 *bf_addr;
|
|
volatile u32 val;
|
|
|
|
/* maskinterrupts and DMA */
|
|
bf_base_addr = (bfdev->info.mem[0].internal_addr);
|
|
/* return if called before mmap */
|
|
if (!bf_base_addr) {
|
|
return;
|
|
}
|
|
/* mask interrupt at shadow level */
|
|
bf_addr = (u32 *)((u8 *)bf_base_addr + 0xc0);
|
|
for (i = 0; i < 16; i++) {
|
|
*bf_addr = 0xffffffffUL;
|
|
bf_addr++;
|
|
}
|
|
/* mask DMA */
|
|
bf_addr = (u32 *)((u8 *)bf_base_addr + 0x14);
|
|
val = *bf_addr;
|
|
val &= 0xfffffffeUL;
|
|
*bf_addr = val;
|
|
}
|
|
|
|
static int bf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) {
|
|
struct bf_pci_dev *bfdev;
|
|
int err, pci_use_highmem;
|
|
int i, num_irq;
|
|
|
|
memset(bf_global, 0, sizeof(bf_global));
|
|
|
|
bfdev = kzalloc(sizeof(struct bf_pci_dev), GFP_KERNEL);
|
|
if (!bfdev) {
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/* init the cookies to be passed to ISRs */
|
|
for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) {
|
|
bfdev->bf_int_vec[i].int_vec_offset = i;
|
|
bfdev->bf_int_vec[i].bf_dev = bfdev;
|
|
}
|
|
|
|
/* initialize intr_mode to none */
|
|
bfdev->mode = BF_INTR_MODE_NONE;
|
|
|
|
/* clear pci_error_state */
|
|
bfdev->info.pci_error_state = 0;
|
|
|
|
/* mark pci device_id type */
|
|
bfdev->info.pci_dev_id = pdev->device;
|
|
switch (pdev->device) {
|
|
case TOFINO2_DEV_ID_A0:
|
|
case TOFINO2_DEV_ID_A00:
|
|
case TOFINO2_DEV_ID_B0:
|
|
bfdev->info.tof_type = BF_TOFINO_2;
|
|
break;
|
|
default:
|
|
bfdev->info.tof_type = BF_TOFINO_1;
|
|
break;
|
|
}
|
|
/* intialize TBUS MSIX indices */
|
|
for (i = 0; i < BF_TBUS_MSIX_INDICES_MAX; i++) {
|
|
if (bfdev->info.tof_type == BF_TOFINO_1) {
|
|
bfdev->info.tbus_msix_ind[i] = BF_TBUS_MSIX_BASE_INDEX_TOF1 + i;
|
|
} else if (bfdev->info.tof_type == BF_TOFINO_2) {
|
|
bfdev->info.tbus_msix_ind[i] = BF_TBUS_MSIX_INDEX_INVALID;
|
|
}
|
|
}
|
|
/*
|
|
* enable device
|
|
*/
|
|
err = pci_enable_device(pdev);
|
|
if (err != 0) {
|
|
printk(KERN_ERR "bf cannot enable PCI device\n");
|
|
goto fail_free;
|
|
}
|
|
|
|
/*
|
|
* reserve device's PCI memory regions for use by this
|
|
* module
|
|
*/
|
|
err = pci_request_regions(pdev, "bf_umem");
|
|
if (err != 0) {
|
|
printk(KERN_ERR "bf Cannot request regions\n");
|
|
goto fail_pci_disable;
|
|
}
|
|
/* remap IO memory */
|
|
err = bf_setup_bars(pdev, &bfdev->info);
|
|
if (err != 0) {
|
|
printk(KERN_ERR "bf Cannot setup BARs\n");
|
|
goto fail_release_iomem;
|
|
}
|
|
|
|
if (!dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)) &&
|
|
!dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64))) {
|
|
pci_use_highmem = 1;
|
|
} else {
|
|
err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
|
|
if (err) {
|
|
err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
|
|
if (err) {
|
|
printk(KERN_ERR "bf no usable DMA configuration, aborting\n");
|
|
goto fail_release_iomem;
|
|
}
|
|
}
|
|
pci_use_highmem = 0;
|
|
}
|
|
|
|
/* enable pci error reporting */
|
|
/* for the current kernel version, kernel config must have set the followings:
|
|
* CONFIG_PCIEPORTBUS=y and CONFIG_PCIEAER = y
|
|
* we have pci_error_handlers defined that gets invoked by kernel AER module
|
|
* upon detecting the pcie error on this device's addresses.
|
|
* However, there seems no way that AER would pass the offending addresses
|
|
* to the callback functions. AER logs the error messages on the console.
|
|
* This driver's calback function send the SIGIO signal to the user space
|
|
* to indicate the error condition.
|
|
*/
|
|
pci_enable_pcie_error_reporting(pdev);
|
|
|
|
bf_disable_int_dma(bfdev);
|
|
|
|
/* enable bus mastering on the device */
|
|
pci_set_master(pdev);
|
|
|
|
/* fill in bfdev info */
|
|
bfdev->info.version = "0.2";
|
|
bfdev->info.owner = THIS_MODULE;
|
|
bfdev->pdev = pdev;
|
|
|
|
switch (bf_intr_mode_default) {
|
|
#ifdef CONFIG_PCI_MSI
|
|
case BF_INTR_MODE_MSIX:
|
|
/* Only 1 msi-x vector needed */
|
|
bfdev->info.msix_entries =
|
|
kcalloc(BF_MSIX_ENTRY_CNT, sizeof(struct msix_entry), GFP_KERNEL);
|
|
if (!bfdev->info.msix_entries) {
|
|
err = -ENOMEM;
|
|
goto fail_clear_pci_master;
|
|
}
|
|
for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) {
|
|
bfdev->info.msix_entries[i].entry = i;
|
|
}
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
|
|
num_irq = pci_enable_msix(pdev, bfdev->info.msix_entries,
|
|
BF_MSIX_ENTRY_CNT);
|
|
if (num_irq == 0) {
|
|
bfdev->info.num_irq = BF_MSIX_ENTRY_CNT;
|
|
bfdev->info.irq = bfdev->info.msix_entries[0].vector;
|
|
bfdev->mode = BF_INTR_MODE_MSIX;
|
|
printk(KERN_DEBUG "bf using %d MSIX irq from %ld\n", num_irq,
|
|
bfdev->info.irq);
|
|
break;
|
|
}
|
|
#else
|
|
num_irq = pci_enable_msix_range(
|
|
pdev, bfdev->info.msix_entries, BF_MSIX_ENTRY_CNT, BF_MSIX_ENTRY_CNT);
|
|
if (num_irq == BF_MSIX_ENTRY_CNT) {
|
|
bfdev->info.num_irq = num_irq;
|
|
bfdev->info.irq = bfdev->info.msix_entries[0].vector;
|
|
bfdev->mode = BF_INTR_MODE_MSIX;
|
|
printk(KERN_DEBUG "bf using %d MSIX irq from %ld\n",
|
|
num_irq,
|
|
bfdev->info.irq);
|
|
break;
|
|
} else {
|
|
if (num_irq) pci_disable_msix(pdev);
|
|
kfree(bfdev->info.msix_entries);
|
|
bfdev->info.msix_entries = NULL;
|
|
printk(KERN_ERR "bf error allocating MSIX vectors. Trying MSI...\n");
|
|
/* and, fall back to MSI */
|
|
}
|
|
#endif /* LINUX_VERSION_CODE */
|
|
/* ** intentional no-break */
|
|
case BF_INTR_MODE_MSI:
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
|
|
num_irq = pci_enable_msi_block(pdev, BF_MSI_ENTRY_CNT);
|
|
/* we must get requested number of MSI vectors enabled */
|
|
if (num_irq == 0) {
|
|
bfdev->info.num_irq = BF_MSI_ENTRY_CNT;
|
|
bfdev->info.irq = pdev->irq;
|
|
bfdev->mode = BF_INTR_MODE_MSI;
|
|
printk(KERN_DEBUG "bf using %d MSI irq from %ld\n", bfdev->info.num_irq,
|
|
bfdev->info.irq);
|
|
break;
|
|
}
|
|
#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
|
|
num_irq = pci_enable_msi_range(pdev, BF_MSI_ENTRY_CNT, BF_MSI_ENTRY_CNT);
|
|
if (num_irq > 0) {
|
|
bfdev->info.num_irq = num_irq;
|
|
bfdev->info.irq = pdev->irq;
|
|
bfdev->mode = BF_INTR_MODE_MSI;
|
|
printk(KERN_DEBUG "bf using %d MSI irq from %ld\n",
|
|
bfdev->info.num_irq,
|
|
bfdev->info.irq);
|
|
break;
|
|
}
|
|
#else
|
|
num_irq = pci_alloc_irq_vectors_affinity(pdev, BF_MSI_ENTRY_CNT,
|
|
BF_MSI_ENTRY_CNT, PCI_IRQ_MSI | PCI_IRQ_AFFINITY, NULL);
|
|
if (num_irq > 0) {
|
|
bfdev->info.num_irq = num_irq;
|
|
bfdev->info.irq = pci_irq_vector(pdev, 0);
|
|
bfdev->mode = BF_INTR_MODE_MSI;
|
|
printk(KERN_DEBUG "bf using %d MSI irq from %ld\n", bfdev->info.num_irq,
|
|
bfdev->info.irq);
|
|
break;
|
|
}
|
|
#endif /* LINUX_VERSION_CODE */
|
|
#endif /* CONFIG_PCI_MSI */
|
|
/* fall back to Legacy Interrupt, intentional no-break */
|
|
|
|
case BF_INTR_MODE_LEGACY:
|
|
if (pci_intx_mask_supported(pdev)) {
|
|
bfdev->info.irq_flags = IRQF_SHARED;
|
|
bfdev->info.irq = pdev->irq;
|
|
bfdev->mode = BF_INTR_MODE_LEGACY;
|
|
printk(KERN_DEBUG "bf using LEGACY irq %ld\n", bfdev->info.irq);
|
|
break;
|
|
}
|
|
printk(KERN_NOTICE " bf PCI INTx mask not supported\n");
|
|
/* fall back to no Interrupt, intentional no-break */
|
|
case BF_INTR_MODE_NONE:
|
|
bfdev->info.irq = 0;
|
|
bfdev->info.num_irq = 0;
|
|
bfdev->mode = BF_INTR_MODE_NONE;
|
|
break;
|
|
|
|
default:
|
|
printk(KERN_DEBUG "bf invalid IRQ mode %u", bf_intr_mode_default);
|
|
err = -EINVAL;
|
|
goto fail_clear_pci_master;
|
|
}
|
|
|
|
pci_set_drvdata(pdev, bfdev);
|
|
sprintf(bfdev->name, "bf_%d", bfdev->info.minor);
|
|
/* register bf driver */
|
|
err = bf_register_device(&pdev->dev, bfdev);
|
|
if (err != 0) {
|
|
goto fail_release_irq;
|
|
}
|
|
|
|
bf_global[bfdev->info.minor].async_queue = NULL;
|
|
bf_global[bfdev->info.minor].bfdev = bfdev;
|
|
|
|
dev_info(&pdev->dev,
|
|
"bf device %d registered with irq %ld\n",
|
|
bfdev->instance,
|
|
bfdev->info.irq);
|
|
printk(KERN_ALERT "bf probe ok\n");
|
|
#ifdef BF_INCLUDE_KPKT
|
|
if (kpkt_mode) {
|
|
err = bf_kpkt_init(pdev,
|
|
bfdev->info.mem[0].internal_addr,
|
|
&bfdev->adapter_ptr,
|
|
bfdev->info.minor,
|
|
pci_use_highmem,
|
|
kpkt_hd_room,
|
|
kpkt_dr_int_en,
|
|
kpkt_rx_count);
|
|
if (err == 0) {
|
|
printk(KERN_ALERT "bf_kpkt kernel processing enabled\n");
|
|
} else {
|
|
printk(KERN_ALERT "error starting bf_kpkt kernel processing\n");
|
|
bfdev->adapter_ptr = NULL;
|
|
}
|
|
}
|
|
#endif
|
|
return 0;
|
|
|
|
fail_release_irq:
|
|
pci_set_drvdata(pdev, NULL);
|
|
if (bfdev->mode == BF_INTR_MODE_MSIX) {
|
|
pci_disable_msix(bfdev->pdev);
|
|
kfree(bfdev->info.msix_entries);
|
|
bfdev->info.msix_entries = NULL;
|
|
} else if (bfdev->mode == BF_INTR_MODE_MSI) {
|
|
pci_disable_msi(bfdev->pdev);
|
|
}
|
|
fail_clear_pci_master:
|
|
pci_clear_master(pdev);
|
|
fail_release_iomem:
|
|
bf_pci_release_iomem(&bfdev->info);
|
|
pci_release_regions(pdev);
|
|
fail_pci_disable:
|
|
pci_disable_device(pdev);
|
|
fail_free:
|
|
kfree(bfdev);
|
|
|
|
printk(KERN_ERR "bf probe not ok\n");
|
|
return err;
|
|
}
|
|
|
|
static void bf_pci_remove(struct pci_dev *pdev) {
|
|
struct bf_pci_dev *bfdev = pci_get_drvdata(pdev);
|
|
struct bf_listener *cur_listener;
|
|
|
|
#ifdef BF_INCLUDE_KPKT
|
|
if (kpkt_mode) {
|
|
bf_kpkt_remove(bfdev->adapter_ptr);
|
|
}
|
|
#endif
|
|
bf_disable_int_dma(bfdev);
|
|
bf_unregister_device(bfdev);
|
|
if (bfdev->mode == BF_INTR_MODE_MSIX) {
|
|
pci_disable_msix(pdev);
|
|
kfree(bfdev->info.msix_entries);
|
|
bfdev->info.msix_entries = NULL;
|
|
} else if (bfdev->mode == BF_INTR_MODE_MSI) {
|
|
pci_disable_msi(pdev);
|
|
}
|
|
pci_clear_master(pdev);
|
|
bf_pci_release_iomem(&bfdev->info);
|
|
pci_release_regions(pdev);
|
|
pci_disable_pcie_error_reporting(pdev);
|
|
pci_disable_device(pdev);
|
|
pci_set_drvdata(pdev, NULL);
|
|
bf_global[bfdev->info.minor].bfdev = NULL;
|
|
/* existing filep structures in open file(s) must be informed that
|
|
* bf_pci_dev is no longer valid */
|
|
spin_lock(&bf_nonisr_lock);
|
|
cur_listener = bfdev->listener_head;
|
|
while (cur_listener) {
|
|
cur_listener->bfdev = NULL;
|
|
cur_listener = cur_listener->next;
|
|
}
|
|
spin_unlock(&bf_nonisr_lock);
|
|
kfree(bfdev);
|
|
}
|
|
|
|
/* AER support callbacks. Refer to:
|
|
* https://www.kernel.org/doc/Documentation/PCI/pcieaer-howto.txt
|
|
* and
|
|
* https://www.kernel.org/doc/Documentation/PCI/pci-error-recovery.txt
|
|
*
|
|
* from bf_kdrv point of view, AER uncorrected errors (fatal and non-fatal)
|
|
* should not cause pci link reset (upstream port AER callbacks must also
|
|
* support this requirements of bf_kdrv)
|
|
* Device, however, is not expected to function after uncorrected errors
|
|
* but, application has chance to perform diags without resetting pci link
|
|
*/
|
|
/**
|
|
* bf_pci_error_detected - called when PCI error is detected
|
|
* @pdev: Pointer to PCI device
|
|
* @state: The current pci connection state
|
|
*
|
|
* called when root complex detects pci error associated with the device
|
|
*/
|
|
static pci_ers_result_t bf_pci_error_detected(struct pci_dev *pdev,
|
|
pci_channel_state_t state) {
|
|
struct bf_pci_dev *bfdev = pci_get_drvdata(pdev);
|
|
|
|
if (!bfdev) {
|
|
return PCI_ERS_RESULT_NONE;
|
|
}
|
|
printk(KERN_ERR "pci_err_detected state %d\n", state);
|
|
if (state == pci_channel_io_perm_failure || state == pci_channel_io_frozen) {
|
|
bfdev->info.pci_error_state = 1;
|
|
#ifdef BF_INCLUDE_KPKT
|
|
if (kpkt_mode) {
|
|
bf_kpkt_set_pci_error(bfdev->adapter_ptr, 1);
|
|
}
|
|
#endif
|
|
/* we do not want pci link to go down. The user space application
|
|
* should collect the diag info, terminate the application and unload the
|
|
* kernel module
|
|
*/
|
|
return PCI_ERS_RESULT_CAN_RECOVER; /* to prevent pci link down */
|
|
} else {
|
|
return PCI_ERS_RESULT_CAN_RECOVER;
|
|
}
|
|
}
|
|
|
|
static pci_ers_result_t bf_pci_mmio_enabled(struct pci_dev *dev) {
|
|
struct bf_pci_dev *bfdev = pci_get_drvdata(dev);
|
|
|
|
printk(KERN_ERR "BF pci_mmio_enabled invoked after pci error\n");
|
|
pci_cleanup_aer_uncorrect_error_status(dev);
|
|
|
|
if (bfdev) {
|
|
/* send a signal to the user space program of the error */
|
|
int minor = bfdev->info.minor;
|
|
if (minor < BF_MAX_DEVICE_CNT && bf_global[minor].async_queue) {
|
|
kill_fasync(&bf_global[minor].async_queue, SIGIO, POLL_ERR);
|
|
}
|
|
}
|
|
return PCI_ERS_RESULT_RECOVERED;
|
|
}
|
|
|
|
/**
|
|
* bf_pci_slot_reset - called after the pci bus has been reset.
|
|
* @pdev: Pointer to PCI device
|
|
*
|
|
* Restart the card from scratch, as if from a cold-boot.
|
|
*/
|
|
static pci_ers_result_t bf_pci_slot_reset(struct pci_dev *pdev) {
|
|
/* nothing to do for now as we do not expect to get backto normal after
|
|
* a pcie link reset. Not expected to be invoked.
|
|
* TBD: fill in this function if tofino can recover after an error
|
|
*/
|
|
printk(KERN_ERR "BF pci_slot_reset invoked after pci error\n");
|
|
return PCI_ERS_RESULT_RECOVERED;
|
|
}
|
|
|
|
/**
|
|
* bf_pci_resume - called when kernel thinks the device is up on PCIe.
|
|
* @pdev: Pointer to PCI device
|
|
*
|
|
* This callback is called when the error recovery driver tells us that
|
|
* its OK to resume normal operation.
|
|
*/
|
|
static void bf_pci_resume(struct pci_dev *pdev) {
|
|
printk(KERN_ERR "BF io_resume invoked after pci error\n");
|
|
}
|
|
|
|
static int bf_config_intr_mode(char *intr_str) {
|
|
if (!intr_str) {
|
|
pr_info("Use MSI interrupt by default\n");
|
|
return 0;
|
|
}
|
|
|
|
if (!strcmp(intr_str, BF_INTR_MODE_MSIX_NAME)) {
|
|
bf_intr_mode_default = BF_INTR_MODE_MSIX;
|
|
pr_info("Use MSIX interrupt\n");
|
|
} else if (!strcmp(intr_str, BF_INTR_MODE_MSI_NAME)) {
|
|
bf_intr_mode_default = BF_INTR_MODE_MSI;
|
|
pr_info("Use MSI interrupt\n");
|
|
} else if (!strcmp(intr_str, BF_INTR_MODE_LEGACY_NAME)) {
|
|
bf_intr_mode_default = BF_INTR_MODE_LEGACY;
|
|
pr_info("Use legacy interrupt\n");
|
|
} else if (!strcmp(intr_str, BF_INTR_MODE_NONE_NAME)) {
|
|
bf_intr_mode_default = BF_INTR_MODE_NONE;
|
|
pr_info("BF interrupt disabled\n");
|
|
} else {
|
|
pr_info("Error: bad intr_mode parameter - %s\n", intr_str);
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct pci_device_id bf_pci_tbl[] = {
|
|
{PCI_VDEVICE(BF, TOFINO_DEV_ID_A0), 0},
|
|
{PCI_VDEVICE(BF, TOFINO_DEV_ID_B0), 0},
|
|
{PCI_VDEVICE(BF, TOFINO2_DEV_ID_A0), 0},
|
|
{PCI_VDEVICE(BF, TOFINO2_DEV_ID_A00), 0},
|
|
{PCI_VDEVICE(BF, TOFINO2_DEV_ID_B0), 0},
|
|
/* required last entry */
|
|
{.device = 0}};
|
|
|
|
/* PCI bus error handlers */
|
|
static struct pci_error_handlers bf_pci_err_handler = {
|
|
.error_detected = bf_pci_error_detected,
|
|
.mmio_enabled = bf_pci_mmio_enabled,
|
|
.slot_reset = bf_pci_slot_reset,
|
|
.resume = bf_pci_resume,
|
|
};
|
|
|
|
static struct pci_driver bf_pci_driver = {.name = "bf",
|
|
.id_table = bf_pci_tbl,
|
|
.probe = bf_pci_probe,
|
|
.remove = bf_pci_remove,
|
|
.err_handler = &bf_pci_err_handler};
|
|
|
|
static int __init bfdrv_init(void) {
|
|
int ret;
|
|
|
|
ret = bf_config_intr_mode(intr_mode);
|
|
/* do not enable DR interrupt if not using MSI or not in kpkt mode */
|
|
if ((bf_intr_mode_default != BF_INTR_MODE_MSI &&
|
|
bf_intr_mode_default != BF_INTR_MODE_LEGACY) || kpkt_mode == 0) {
|
|
kpkt_dr_int_en = 0;
|
|
}
|
|
if (kpkt_mode) {
|
|
printk(KERN_NOTICE "kpkt_mode %d hd_room %d dr_int_en %d rx_count %d\n",
|
|
kpkt_mode,
|
|
kpkt_hd_room,
|
|
kpkt_dr_int_en,
|
|
kpkt_rx_count);
|
|
}
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
spin_lock_init(&bf_nonisr_lock);
|
|
return pci_register_driver(&bf_pci_driver);
|
|
}
|
|
|
|
static void __exit bfdrv_exit(void) {
|
|
pci_unregister_driver(&bf_pci_driver);
|
|
intr_mode = NULL;
|
|
kpkt_mode = 0;
|
|
}
|
|
|
|
module_init(bfdrv_init);
|
|
module_exit(bfdrv_exit);
|
|
|
|
module_param(kpkt_mode, int, S_IRUGO);
|
|
MODULE_PARM_DESC(kpkt_mode,
|
|
"bf kernel mode pkt processing (default=off):\n"
|
|
" 1 Use kernel mode bf_pkt processing\n"
|
|
" 0 Do not use kernel mode bf_pkt processing\n"
|
|
"\n");
|
|
|
|
module_param(kpkt_hd_room, int, S_IRUGO);
|
|
MODULE_PARM_DESC(kpkt_hd_room,
|
|
"head room to reserve when receiving packets (default=32):\n"
|
|
"\n");
|
|
|
|
module_param(kpkt_rx_count, int, S_IRUGO);
|
|
MODULE_PARM_DESC(kpkt_rx_count,
|
|
"number of buffers per rx pkt ring (default=256):\n"
|
|
"\n");
|
|
/* dr_int_en is applicable only if MSI interrupt mode is selected */
|
|
module_param(kpkt_dr_int_en, int, S_IRUGO);
|
|
MODULE_PARM_DESC(kpkt_dr_int_en,
|
|
"bf pkt Interrupt enable (default=1):\n"
|
|
" 1 use interrupt\n"
|
|
" 0 Do not use interrupt\n"
|
|
"\n");
|
|
|
|
module_param(intr_mode, charp, S_IRUGO);
|
|
MODULE_PARM_DESC(intr_mode,
|
|
"bf interrupt mode (default=msix):\n"
|
|
" " BF_INTR_MODE_MSIX_NAME
|
|
" Use MSIX interrupt\n"
|
|
" " BF_INTR_MODE_MSI_NAME
|
|
" Use MSI interrupt\n"
|
|
" " BF_INTR_MODE_LEGACY_NAME
|
|
" Use Legacy interrupt\n"
|
|
" " BF_INTR_MODE_NONE_NAME
|
|
" Use no interrupt\n"
|
|
"\n");
|
|
|
|
MODULE_DEVICE_TABLE(pci, bf_pci_tbl);
|
|
MODULE_DESCRIPTION("Barefoot Tofino PCI device");
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_AUTHOR("Barefoot Networks");
|