/******************************************************************************* * BAREFOOT NETWORKS CONFIDENTIAL & PROPRIETARY * * Copyright (c) 2015-2016 Barefoot Networks, Inc. * All Rights Reserved. * * NOTICE: All information contained herein is, and remains the property of * Barefoot Networks, Inc. and its suppliers, if any. The intellectual and * technical concepts contained herein are proprietary to Barefoot Networks, * Inc. * and its suppliers and may be covered by U.S. and Foreign Patents, patents in * process, and are protected by trade secret or copyright law. * Dissemination of this information or reproduction of this material is * strictly forbidden unless prior written permission is obtained from * Barefoot Networks, Inc. * * No warranty, explicit or implicit is provided, unless granted under a * written agreement with Barefoot Networks, Inc. * * $Id: $ * ******************************************************************************/ /** * * GPL LICENSE SUMMARY * * Copyright(c) 2015 Barefoot Networks. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the... * **/ /* bf_drv kernel module * * This is kernel mode driver for Tofino chip. * Provides user space mmap service and user space "wait for interrupt" * and "enable interrupt" services. */ #include #include #include #include #include #include #include #include #include #include #include #include "bf_ioctl.h" #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) #include #else #include #endif #include #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) //#error unsupported linux kernel version #endif /* TBD: Need to build with CONFIG_PCI_MSI */ #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); extern int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec); #else extern int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec); extern int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec); #endif #define PCI_VENDOR_ID_BF 0x1d1c #define TOFINO_DEV_ID_A0 0x01 #define TOFINO_DEV_ID_B0 0x10 #define TOFINO2_DEV_ID_A0 0x0100 #ifndef PCI_MSIX_ENTRY_SIZE #define PCI_MSIX_ENTRY_SIZE 16 #define PCI_MSIX_ENTRY_LOWER_ADDR 0 #define PCI_MSIX_ENTRY_UPPER_ADDR 4 #define PCI_MSIX_ENTRY_DATA 8 #define PCI_MSIX_ENTRY_VECTOR_CTRL 12 #define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 #endif #define BF_CLASS_NAME "bf" #define BF_MAX_DEVICE_CNT 256 #define BF_INTR_MODE_NONE_NAME "none" #define BF_INTR_MODE_LEGACY_NAME "legacy" #define BF_INTR_MODE_MSI_NAME "msi" #define BF_INTR_MODE_MSIX_NAME "msix" #define BF_MAX_BAR_MAPS 6 #define BF_MSIX_ENTRY_CNT 128 /* TBD make it 512 */ #define BF_MSI_ENTRY_CNT 2 /* interrupt mode */ enum bf_intr_mode { BF_INTR_MODE_NONE = 0, BF_INTR_MODE_LEGACY, BF_INTR_MODE_MSI, BF_INTR_MODE_MSIX }; /* device memory */ struct bf_dev_mem { const char *name; phys_addr_t addr; resource_size_t size; void __iomem *internal_addr; }; struct bf_listener { struct bf_pci_dev *bfdev; s32 event_count[BF_MSIX_ENTRY_CNT]; int minor; struct bf_listener *next; }; /* device information */ struct bf_dev_info { struct module *owner; struct device *dev; int minor; atomic_t event[BF_MSIX_ENTRY_CNT]; wait_queue_head_t wait; const char *version; struct bf_dev_mem mem[BF_MAX_BAR_MAPS]; struct msix_entry *msix_entries; long irq; /* first irq vector */ int num_irq; /* number of irq vectors */ unsigned long irq_flags;/* sharable ?? */ int pci_error_state; /* was there a pci bus error */ }; /* cookie to be passed to IRQ handler, useful especially with MSIX */ struct bf_int_vector { struct bf_pci_dev *bf_dev; int int_vec_offset; }; /** * A structure describing the private information for a BF pcie device. */ struct bf_pci_dev { struct bf_dev_info info; struct pci_dev *pdev; enum bf_intr_mode mode; u8 instance; char name[16]; struct bf_int_vector bf_int_vec[BF_MSIX_ENTRY_CNT]; struct bf_listener *listener_head; /* head of a singly linked list of listeners */ }; /* Keep any global information here that must survive even after the * bf_pci_dev is free-ed up. */ struct bf_global { struct bf_pci_dev *bfdev ; struct cdev *bf_cdev; struct fasync_struct *async_queue; }; static int bf_major; static int bf_minor[BF_MAX_DEVICE_CNT] = {0}; static struct class *bf_class = NULL; static char *intr_mode = NULL; static enum bf_intr_mode bf_intr_mode_default = BF_INTR_MODE_MSI; static spinlock_t bf_nonisr_lock; /* dev->minor should index into this array */ static struct bf_global bf_global[BF_MAX_DEVICE_CNT]; static void bf_add_listener(struct bf_pci_dev *bfdev, struct bf_listener *listener) { struct bf_listener **cur_listener = &bfdev->listener_head; if (!listener) { return; } spin_lock(&bf_nonisr_lock); while (*cur_listener) { cur_listener = &((*cur_listener)->next); } *cur_listener = listener; listener->next = NULL; spin_unlock(&bf_nonisr_lock); } static void bf_remove_listener(struct bf_pci_dev *bfdev, struct bf_listener *listener) { struct bf_listener **cur_listener = &bfdev->listener_head; /* in case of certain error conditions, this function might be called after bf_pci_remove() */ if (!bfdev || !listener) { return; } spin_lock(&bf_nonisr_lock); if (*cur_listener == listener) { *cur_listener = listener->next; } else { while (*cur_listener) { if ((*cur_listener)->next == listener) { (*cur_listener)->next = listener->next; break; } cur_listener = &((*cur_listener)->next); } listener->next = NULL; } spin_unlock(&bf_nonisr_lock); } /* a pool of minor numbers is maintained */ /* return the first available minor number */ static int bf_get_next_minor_no(int *minor) { int i; spin_lock(&bf_nonisr_lock); for(i = 0; i < BF_MAX_DEVICE_CNT; i++) { if (bf_minor[i] == 0) { *minor = i; bf_minor[i] = 1; /* mark it as taken */ spin_unlock(&bf_nonisr_lock); return 0; } } *minor = -1; spin_unlock(&bf_nonisr_lock); return -1; } /* return a minor number back to the pool for recycling */ static int bf_return_minor_no(int minor) { int err; spin_lock(&bf_nonisr_lock); if (bf_minor[minor] == 0) { /* was already returned */ err = -1; /* don't change anything, but return error */ } else { bf_minor[minor] = 0; /* mark it as available */ err = 0; } spin_unlock(&bf_nonisr_lock); return err; } static inline struct bf_pci_dev *bf_get_pci_dev(struct bf_dev_info *info) { return container_of(info, struct bf_pci_dev, info); } /* * It masks the msix on/off of generating MSI-X messages. */ static void bf_msix_mask_irq(struct msi_desc *desc, int32_t state) { u32 mask_bits = desc->masked; unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; if (state != 0) mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; else mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; if (mask_bits != desc->masked) { writel(mask_bits, desc->mask_base + offset); readl(desc->mask_base); desc->masked = mask_bits; } } /** * irqcontrol can be used to disable/enable interrupt from user space processes. * * @param bf_dev * pointer to bf_pci_dev * @param irq_state * state value. 1 to enable interrupt, 0 to disable interrupt. * * @return * - On success, 0. * - On failure, a negative value. */ static int bf_pci_irqcontrol(struct bf_pci_dev *bfdev, s32 irq_state) { struct pci_dev *pdev = bfdev->pdev; pci_cfg_access_lock(pdev); if (bfdev->mode == BF_INTR_MODE_LEGACY) pci_intx(pdev, !!irq_state); else if (bfdev->mode == BF_INTR_MODE_MSIX) { struct msi_desc *desc; #if LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0) list_for_each_entry(desc, &pdev->msi_list, list) bf_msix_mask_irq(desc, irq_state); #else for_each_pci_msi_entry(desc, pdev) bf_msix_mask_irq(desc, irq_state); #endif } pci_cfg_access_unlock(pdev); return 0; } /** * interrupt handler which will check if the interrupt is from the right * device. If so, disable it here and will be enabled later. */ static irqreturn_t bf_pci_irqhandler(int irq, struct bf_pci_dev *bfdev) { /* Legacy mode need to mask in hardware */ if (bfdev->mode == BF_INTR_MODE_LEGACY && !pci_check_and_mask_intx(bfdev->pdev)) return IRQ_NONE; /* NOTE : if bfdev->info.pci_error_state == 1, then do not access the * device and return IRQ_NOTHANDLED. */ /* Message signal mode, no share IRQ and automasked */ return IRQ_HANDLED; } /* Remap pci resources described by bar #pci_bar */ static int bf_pci_setup_iomem(struct pci_dev *dev, struct bf_dev_info *info, int n, int pci_bar, const char *name) { unsigned long addr, len; void *internal_addr; if (sizeof(info->mem) / sizeof(info->mem[0]) <= n) return -EINVAL; addr = pci_resource_start(dev, pci_bar); len = pci_resource_len(dev, pci_bar); if (addr == 0 || len == 0) return -1; internal_addr = pci_ioremap_bar(dev, pci_bar); if (internal_addr == NULL) return -1; info->mem[n].name = name; info->mem[n].addr = addr; info->mem[n].internal_addr = internal_addr; info->mem[n].size = len; return 0; } /* Unmap previously ioremap'd resources */ static void bf_pci_release_iomem(struct bf_dev_info *info) { int i; for (i = 0; i < BF_MAX_BAR_MAPS; i++) { if (info->mem[i].internal_addr) iounmap(info->mem[i].internal_addr); } } static int bf_setup_bars(struct pci_dev *dev, struct bf_dev_info *info) { int i, iom, ret; unsigned long flags; static const char *bar_names[BF_MAX_BAR_MAPS] = { "BAR0", "BAR1", "BAR2", "BAR3", "BAR4", "BAR5", }; iom = 0; for (i = 0; i < BF_MAX_BAR_MAPS; i++) { if (pci_resource_len(dev, i) != 0 && pci_resource_start(dev, i) != 0) { flags = pci_resource_flags(dev, i); if (flags & IORESOURCE_MEM) { ret = bf_pci_setup_iomem(dev, info, iom, i, bar_names[i]); if (ret != 0) return ret; iom++; } } } return (iom != 0) ? ret : -ENOENT; } static irqreturn_t bf_interrupt(int irq, void *bfdev_id) { struct bf_pci_dev *bfdev = ((struct bf_int_vector *)bfdev_id)->bf_dev; int vect_off = ((struct bf_int_vector *)bfdev_id)->int_vec_offset; irqreturn_t ret = bf_pci_irqhandler(irq, bfdev); if (ret == IRQ_HANDLED) atomic_inc(&(bfdev->info.event[vect_off])); return ret; } static unsigned int bf_poll(struct file *filep, poll_table *wait) { struct bf_listener *listener = (struct bf_listener *)filep->private_data; struct bf_pci_dev *bfdev = listener->bfdev; int i; if (!bfdev) { return -ENODEV; } if (!bfdev->info.irq) return -EIO; poll_wait(filep, &bfdev->info.wait, wait); for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) if (listener->event_count[i] != atomic_read(&bfdev->info.event[i])) return POLLIN | POLLRDNORM; return 0; } static int bf_find_mem_index(struct vm_area_struct *vma) { struct bf_pci_dev *bfdev = vma->vm_private_data; if (vma->vm_pgoff < BF_MAX_BAR_MAPS) { if (bfdev->info.mem[vma->vm_pgoff].size == 0) return -1; return (int)vma->vm_pgoff; } return -1; } static const struct vm_operations_struct bf_physical_vm_ops = { #ifdef CONFIG_HAVE_IOREMAP_PROT .access = generic_access_phys, #endif }; static int bf_mmap_physical(struct vm_area_struct *vma) { struct bf_pci_dev *bfdev = vma->vm_private_data; int bar = bf_find_mem_index(vma); struct bf_dev_mem *mem; if (bar < 0) return -EINVAL; mem = bfdev->info.mem + bar; if (mem->addr & ~PAGE_MASK) return -ENODEV; if (vma->vm_end - vma->vm_start > mem->size) return -EINVAL; vma->vm_ops = &bf_physical_vm_ops; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); /* * We cannot use the vm_iomap_memory() helper here, * because vma->vm_pgoff is the map index we looked * up above in bf_find_mem_index(), rather than an * actual page offset into the mmap. * * So we just do the physical mmap without a page * offset. */ return remap_pfn_range(vma, vma->vm_start, mem->addr >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); } static int bf_mmap(struct file *filep, struct vm_area_struct *vma) { struct bf_listener *listener = filep->private_data; struct bf_pci_dev *bfdev = listener->bfdev; int bar; unsigned long requested_pages, actual_pages; if (!bfdev) { return -ENODEV; } if (vma->vm_end < vma->vm_start) return -EINVAL; vma->vm_private_data = bfdev; bar = bf_find_mem_index(vma); if (bar < 0) return -EINVAL; requested_pages = vma_pages(vma); actual_pages = ((bfdev->info.mem[bar].addr & ~PAGE_MASK) + bfdev->info.mem[bar].size + PAGE_SIZE -1) >> PAGE_SHIFT; if (requested_pages > actual_pages) return -EINVAL; return bf_mmap_physical(vma); } static int bf_fasync(int fd, struct file *filep, int mode) { int minor; if (!filep->private_data) { return (-EINVAL); } minor = ((struct bf_listener *)filep->private_data)->minor; if (minor >= BF_MAX_DEVICE_CNT) { return (-EINVAL); } if (mode == 0 && &bf_global[minor].async_queue == NULL) { return 0; /* nothing to do */ } return (fasync_helper(fd, filep, mode, &bf_global[minor].async_queue)); } static int bf_open(struct inode *inode, struct file *filep) { struct bf_pci_dev *bfdev; struct bf_listener *listener; int i; bfdev = bf_global[iminor(inode)].bfdev; listener = kmalloc(sizeof(*listener), GFP_KERNEL); if (listener) { listener->bfdev = bfdev; listener->minor = bfdev->info.minor; listener->next = NULL; bf_add_listener(bfdev, listener); for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) listener->event_count[i] = atomic_read(&bfdev->info.event[i]); filep->private_data = listener; return 0; } else { return(-ENOMEM); } } static int bf_release(struct inode *inode, struct file *filep) { struct bf_listener *listener = filep->private_data; bf_fasync(-1, filep, 0); /* empty any process id in the notification list */ if (listener->bfdev) { bf_remove_listener(listener->bfdev, listener); } kfree(listener); return 0; } /* user space support: make read() system call after poll() of select() */ static ssize_t bf_read(struct file *filep, char __user *buf, size_t count, loff_t *ppos) { struct bf_listener *listener = filep->private_data; struct bf_pci_dev *bfdev = listener->bfdev; int retval, event_count[BF_MSIX_ENTRY_CNT]; int i, mismatch_found = 0; /* OR of per vector mismatch */ unsigned char cnt_match[BF_MSIX_ENTRY_CNT]; /* per vector mismatch */ if (!bfdev) { return -ENODEV; } /* irq must be setup for read() to work */ if (!bfdev->info.irq) return -EIO; /* ensure that there is enough space on user buffer for the given interrupt * mode */ if (bfdev->mode == BF_INTR_MODE_MSIX) { if (count < sizeof(s32)*BF_MSIX_ENTRY_CNT) return -EINVAL; count = sizeof(s32)*BF_MSIX_ENTRY_CNT; } else if (bfdev->mode == BF_INTR_MODE_MSI) { if (count < sizeof(s32)*BF_MSI_ENTRY_CNT) return -EINVAL; count = sizeof(s32)*BF_MSI_ENTRY_CNT; } else { if (count < sizeof(s32)) return -EINVAL; count = sizeof(s32); } do { set_current_state(TASK_INTERRUPTIBLE); for (i = 0; i < (count/sizeof(s32)); i++) { event_count[i] = atomic_read(&(bfdev->info.event[i])); if (event_count[i] != listener->event_count[i]) { mismatch_found |= 1; cnt_match[i] = 1; } else { event_count[i] = 0; cnt_match[i] = 0; } } if (mismatch_found) { __set_current_state(TASK_RUNNING); if (copy_to_user(buf, &event_count, count)) retval = -EFAULT; else { /* adjust the listener->event_count; */ for (i = 0 ; i < (count/sizeof(s32)); i++) { if (cnt_match[i]) { listener->event_count[i] = event_count[i]; } } retval = count; } break; } if (filep->f_flags & O_NONBLOCK) { retval = -EAGAIN; break; } if (signal_pending(current)) { retval = -ERESTARTSYS; break; } schedule(); } while (1); __set_current_state(TASK_RUNNING); return retval; } /* user space is supposed to call this after it is done with interrupt * processing */ static ssize_t bf_write(struct file *filep, const char __user *buf, size_t count, loff_t *ppos) { struct bf_listener *listener = filep->private_data; struct bf_pci_dev *bfdev = listener->bfdev; ssize_t ret; s32 int_en; if (!bfdev || !bfdev->info.irq) return -EIO; if (count != sizeof(s32)) return -EINVAL; if (copy_from_user(&int_en, buf, count)) return -EFAULT; /* clear pci_error_state */ bfdev->info.pci_error_state = 0; ret = bf_pci_irqcontrol(bfdev, int_en); return ret ? ret : sizeof(s32); } static long bf_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct bf_listener *listener = filep->private_data; struct bf_pci_dev *bfdev = listener->bfdev; bf_dma_bus_map_t dma_map; void *addr = (void __user *)arg; dma_addr_t dma_hndl; if (!bfdev || !addr) { return EFAULT; } switch(cmd) { case BF_IOCMAPDMAADDR: if (access_ok(VERIFY_WRITE, addr, sizeof(bf_dma_bus_map_t))) { if (copy_from_user(&dma_map, addr, sizeof(bf_dma_bus_map_t))) { return EFAULT; } if (!dma_map.phy_addr || !dma_map.size) { return EFAULT; } dma_hndl = dma_map_single(&bfdev->pdev->dev, phys_to_virt(dma_map.phy_addr), dma_map.size, DMA_BIDIRECTIONAL); if (dma_mapping_error(&bfdev->pdev->dev, dma_hndl)) { return EFAULT; } dma_map.dma_addr = (void *)dma_hndl; if (copy_to_user(addr, &dma_map, sizeof(bf_dma_bus_map_t))) { return EFAULT; } } else { return EFAULT; } break; case BF_IOCUNMAPDMAADDR: if (access_ok(VERIFY_READ, addr, sizeof(bf_dma_bus_map_t))) { if (copy_from_user(&dma_map, addr, sizeof(bf_dma_bus_map_t))) { return EFAULT; } if (!dma_map.dma_addr || !dma_map.size) { return EFAULT; } dma_unmap_single(&bfdev->pdev->dev, (dma_addr_t)dma_map.dma_addr, dma_map.size, DMA_BIDIRECTIONAL); } else { return EFAULT; } break; default: return EINVAL; } return 0; } static const struct file_operations bf_fops = { .owner = THIS_MODULE, .open = bf_open, .release = bf_release, .unlocked_ioctl = bf_ioctl, .read = bf_read, .write = bf_write, .mmap = bf_mmap, .poll = bf_poll, .fasync = bf_fasync, }; static int bf_major_init(struct bf_pci_dev *bfdev, int minor) { struct cdev *cdev; static const char name[] = "bf"; dev_t bf_dev = 0; int result; result = alloc_chrdev_region(&bf_dev, 0, BF_MAX_DEVICE_CNT, name); if (result) return result; result = -ENOMEM; cdev = cdev_alloc(); if (!cdev) { goto fail_dev_add; } cdev->ops = &bf_fops; cdev->owner = THIS_MODULE; kobject_set_name(&cdev->kobj, "%s", name); result = cdev_add(cdev, bf_dev, BF_MAX_DEVICE_CNT); if (result) goto fail_dev_add; bf_major = MAJOR(bf_dev); bf_global[minor].bf_cdev = cdev; return 0; fail_dev_add: unregister_chrdev_region(bf_dev, BF_MAX_DEVICE_CNT); return result; } static void bf_major_cleanup(struct bf_pci_dev *bfdev, int minor) { unregister_chrdev_region(MKDEV(bf_major, 0), BF_MAX_DEVICE_CNT); cdev_del(bf_global[minor].bf_cdev); } static int bf_init_cdev(struct bf_pci_dev *bfdev, int minor) { int ret; ret = bf_major_init(bfdev, minor); if (ret) return ret; bf_class = class_create(THIS_MODULE, BF_CLASS_NAME); if (!bf_class) { printk(KERN_ERR "create_class failed for bf_dev\n"); ret = -ENODEV; goto err_class_register; } return 0; err_class_register: bf_major_cleanup(bfdev, minor); return ret; } static void bf_remove_cdev(struct bf_pci_dev *bfdev) { class_destroy(bf_class); bf_major_cleanup(bfdev, bfdev->info.minor); } /** * bf_register_device - register a new userspace mem device * @parent: parent device * @bfdev: bf pci device * * returns zero on success or a negative error code. */ int bf_register_device(struct device *parent, struct bf_pci_dev *bfdev) { struct bf_dev_info *info = &bfdev->info; int i, j, ret = 0; int minor; if (!parent || !info || !info->version) return -EINVAL; init_waitqueue_head(&info->wait); for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) { atomic_set(&info->event[i], 0); } if (bf_get_next_minor_no(&minor)) { return -EINVAL; } ret = bf_init_cdev(bfdev, minor); if (ret) { printk(KERN_ERR "BF: device cdev creation failed\n"); return ret; } info->dev = device_create(bf_class, parent, MKDEV(bf_major, minor), bfdev, "bf%d", minor); if (!info->dev) { printk(KERN_ERR "BF: device creation failed\n"); return -ENODEV; } info->minor = minor; /* bind ISRs and request interrupts */ if (info->irq && (bfdev->mode != BF_INTR_MODE_NONE)) { /* * Note that we deliberately don't use devm_request_irq * here. The parent module can unregister the UIO device * and call pci_disable_msi, which requires that this * irq has been freed. However, the device may have open * FDs at the time of unregister and therefore may not be * freed until they are released. */ if (bfdev->mode == BF_INTR_MODE_LEGACY) { ret = request_irq(info->irq, bf_interrupt, info->irq_flags, bfdev->name, (void *)&(bfdev->bf_int_vec[0])); if (ret) { printk(KERN_ERR "bf failed to request legacy irq %ld error %d\n", info->irq, ret); return ret; } printk(KERN_NOTICE "BF allocating legacy int vector %ld\n", info->irq); } else if (bfdev->mode == BF_INTR_MODE_MSIX) { for (i = 0; i < info->num_irq; i++) { ret = request_irq(info->msix_entries[i].vector, bf_interrupt, info->irq_flags, bfdev->name, (void *)&(bfdev->bf_int_vec[i])); if (ret) { /* undo all other previous bindings */ printk(KERN_ERR "bf failed to request MSIX ret %d itr %d\n", ret, i); for (j = i - 1; j >= 0; j--) { free_irq(info->msix_entries[j].vector, (void *)&(bfdev->bf_int_vec[j])); } return ret; } } printk(KERN_NOTICE "BF allocating %d MSIx vectors from %ld\n", info->num_irq, info->irq); } else if (bfdev->mode == BF_INTR_MODE_MSI) { for (i = 0; i < info->num_irq; i++) { ret = request_irq(info->irq + i, bf_interrupt, info->irq_flags, bfdev->name, (void *)&(bfdev->bf_int_vec[i])); if (ret) { /* undo all other previous bindings */ printk(KERN_ERR "bf failed to request MSI ret %d itr %d\n", ret, i); for (j = i - 1; j >= 0; j--) { free_irq(info->irq + j, (void *)&(bfdev->bf_int_vec[j])); } return ret; } } printk(KERN_NOTICE "BF allocating %d MSI vectors from %ld\n", info->num_irq, info->irq); } } return 0; } /** * bf_unregister_device - register a new userspace mem device * @bfdev: bf pci device * * returns none */ void bf_unregister_device(struct bf_pci_dev *bfdev) { struct bf_dev_info *info = &bfdev->info; int i; if (info->irq) { if (bfdev->mode == BF_INTR_MODE_LEGACY) { free_irq(info->irq, (void *)&(bfdev->bf_int_vec[0])); } else if (bfdev->mode == BF_INTR_MODE_MSIX) { for (i = 0; i < info->num_irq; i++) { free_irq(info->msix_entries[i].vector, (void *)&(bfdev->bf_int_vec[i])); } } else if (bfdev->mode == BF_INTR_MODE_MSI) { for (i = 0; i < info->num_irq; i++) { free_irq(info->irq + i, (void *)&(bfdev->bf_int_vec[i])); } } } device_destroy(bf_class, MKDEV(bf_major, info->minor)); bf_remove_cdev(bfdev); bf_return_minor_no(info->minor); return; } static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) { return &pdev->dev; } static void bf_disable_int_dma(struct bf_pci_dev *bfdev) { u8 *bf_base_addr, i; u32 *bf_addr; volatile u32 val; /* maskinterrupts and DMA */ bf_base_addr = (bfdev->info.mem[0].internal_addr); /* return if called before mmap */ if (!bf_base_addr) { return; } /* mask interrupt at shadow level */ bf_addr = (u32 *)((u8 *)bf_base_addr + 0xc0); for (i = 0; i < 16; i++) { *bf_addr = 0xffffffff; bf_addr++; } /* mask DMA */ bf_addr = (u32 *)((u8 *)bf_base_addr + 0x14); val = *bf_addr; val &= 0xfffffffeUL; *bf_addr = val; } static int bf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct bf_pci_dev *bfdev; int err; int i, num_irq; memset(bf_global, 0, sizeof(bf_global)); bfdev = kzalloc(sizeof(struct bf_pci_dev), GFP_KERNEL); if (!bfdev) return -ENOMEM; /* init the cookies to be passed to ISRs */ for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) { bfdev->bf_int_vec[i].int_vec_offset = i; bfdev->bf_int_vec[i].bf_dev = bfdev; } /* initialize intr_mode to none */ bfdev->mode = BF_INTR_MODE_NONE; /* clear pci_error_state */ bfdev->info.pci_error_state = 0; /* * enable device */ err = pci_enable_device(pdev); if (err != 0) { dev_err(&pdev->dev, "Cannot enable PCI device\n"); goto fail_free; } /* * reserve device's PCI memory regions for use by this * module */ err = pci_request_regions(pdev, "bf_umem"); if (err != 0) { dev_err(&pdev->dev, "Cannot request regions\n"); goto fail_pci_disable; } /* remap IO memory */ err = bf_setup_bars(pdev, &bfdev->info); if (err != 0) goto fail_release_iomem; if (!dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)) && !dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64))) { } else { err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); if (err) { err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); if (err) { dev_err(pci_dev_to_dev(pdev), "No usable DMA " "configuration, aborting\n"); goto fail_release_iomem; } } } /* enable pci error reporting */ /* for the current kernel version, kernel config must have set the followings: * CONFIG_PCIEPORTBUS=y and CONFIG_PCIEAER = y * we have pci_error_handlers defined that gets invoked by kernel AER module * upon detecting the pcie error on this device's addresses. * However, there seems no way that AER would pass the offending addresses * to the callback functions. AER logs the error messages on the console. * This driver's calback function send the SIGIO signal to the user space * to indicate the error condition. */ pci_enable_pcie_error_reporting(pdev); bf_disable_int_dma(bfdev); /* enable bus mastering on the device */ pci_set_master(pdev); /* fill in bfdev info */ bfdev->info.version = "0.2"; bfdev->info.owner = THIS_MODULE; bfdev->pdev = pdev; switch (bf_intr_mode_default) { #ifdef CONFIG_PCI_MSI case BF_INTR_MODE_MSIX: /* Only 1 msi-x vector needed */ bfdev->info.msix_entries = kcalloc(BF_MSIX_ENTRY_CNT, sizeof(struct msix_entry), GFP_KERNEL); if (!bfdev->info.msix_entries) { err = -ENOMEM; goto fail_clear_pci_master; } for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) { bfdev->info.msix_entries[i].entry= i; } #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) num_irq = pci_enable_msix(pdev, bfdev->info.msix_entries, BF_MSIX_ENTRY_CNT); if (num_irq == 0) { dev_dbg(&pdev->dev, "using MSI-X"); bfdev->info.num_irq = BF_MSIX_ENTRY_CNT; bfdev->info.irq = bfdev->info.msix_entries[0].vector; bfdev->mode = BF_INTR_MODE_MSIX; printk(KERN_DEBUG "bf using %d MSIX irq from %ld\n", num_irq, bfdev->info.irq); break; } #else num_irq = pci_enable_msix_range(pdev, bfdev->info.msix_entries, BF_MSIX_ENTRY_CNT, BF_MSIX_ENTRY_CNT); if (num_irq == BF_MSIX_ENTRY_CNT) { dev_dbg(&pdev->dev, "using MSI-X"); bfdev->info.num_irq = num_irq; bfdev->info.irq = bfdev->info.msix_entries[0].vector; bfdev->mode = BF_INTR_MODE_MSIX; printk(KERN_DEBUG "bf using %d MSIX irq from %ld\n", num_irq, bfdev->info.irq); break; } else { if (num_irq) pci_disable_msix(pdev); kfree(bfdev->info.msix_entries); bfdev->info.msix_entries = NULL; printk(KERN_ERR "bf error allocating MSIX vectors. Trying MSI...\n"); /* and, fall back to MSI */ } #endif /* LINUX_VERSION_CODE */ /* ** intentional no-break */ case BF_INTR_MODE_MSI: #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) num_irq = pci_enable_msi_block(pdev, BF_MSI_ENTRY_CNT); /* we must get requested number of MSI vectors enabled */ if (num_irq == 0) { dev_dbg(&pdev->dev, "using MSI"); bfdev->info.num_irq = BF_MSI_ENTRY_CNT; bfdev->info.irq = pdev->irq; bfdev->mode = BF_INTR_MODE_MSI; printk(KERN_DEBUG "bf using %d MSI irq from %ld\n", bfdev->info.num_irq, bfdev->info.irq); break; } #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) num_irq = pci_enable_msi_range(pdev, BF_MSI_ENTRY_CNT, BF_MSI_ENTRY_CNT); if (num_irq > 0) { dev_dbg(&pdev->dev, "using MSI"); bfdev->info.num_irq = num_irq; bfdev->info.irq = pdev->irq; bfdev->mode = BF_INTR_MODE_MSI; printk(KERN_DEBUG "bf using %d MSI irq from %ld\n", bfdev->info.num_irq, bfdev->info.irq); break; } #else num_irq = pci_alloc_irq_vectors_affinity(pdev, BF_MSI_ENTRY_CNT, BF_MSI_ENTRY_CNT, PCI_IRQ_MSI | PCI_IRQ_AFFINITY, NULL); if (num_irq > 0) { dev_dbg(&pdev->dev, "using MSI"); bfdev->info.num_irq = num_irq; bfdev->info.irq = pci_irq_vector(pdev, 0); bfdev->mode = BF_INTR_MODE_MSI; printk(KERN_DEBUG "bf using %d MSI irq from %ld\n", bfdev->info.num_irq, bfdev->info.irq); break; } #endif /* LINUX_VERSION_CODE */ #endif /* CONFIG_PCI_MSI */ /* fall back to Legacy Interrupt, intentional no-break */ case BF_INTR_MODE_LEGACY: if (pci_intx_mask_supported(pdev)) { dev_dbg(&pdev->dev, "using INTX"); bfdev->info.irq_flags = IRQF_SHARED; bfdev->info.irq = pdev->irq; bfdev->mode = BF_INTR_MODE_LEGACY; printk(KERN_DEBUG "bf using LEGACY irq %ld\n", bfdev->info.irq); break; } dev_notice(&pdev->dev, "PCI INTx mask not supported\n"); /* fall back to no Interrupt, intentional no-break */ case BF_INTR_MODE_NONE: bfdev->info.irq = 0; bfdev->info.num_irq = 0; bfdev->mode = BF_INTR_MODE_NONE; break; default: dev_err(&pdev->dev, "invalid IRQ mode %u", bf_intr_mode_default); err = -EINVAL; goto fail_clear_pci_master; } pci_set_drvdata(pdev, bfdev); sprintf(bfdev->name, "bf_%d", bfdev->info.minor); /* register bf driver */ err = bf_register_device(&pdev->dev, bfdev); if (err != 0) goto fail_release_irq; bf_global[bfdev->info.minor].async_queue = NULL; bf_global[bfdev->info.minor].bfdev = bfdev; dev_info(&pdev->dev, "bf device %d registered with irq %ld\n", bfdev->instance, bfdev->info.irq); printk(KERN_ALERT "bf probe ok\n"); return 0; fail_release_irq: pci_set_drvdata(pdev, NULL); if (bfdev->mode == BF_INTR_MODE_MSIX) { pci_disable_msix(bfdev->pdev); kfree(bfdev->info.msix_entries); bfdev->info.msix_entries = NULL; } else if (bfdev->mode == BF_INTR_MODE_MSI) pci_disable_msi(bfdev->pdev); fail_clear_pci_master: pci_clear_master(pdev); fail_release_iomem: bf_pci_release_iomem(&bfdev->info); pci_release_regions(pdev); fail_pci_disable: pci_disable_device(pdev); fail_free: kfree(bfdev); printk(KERN_ERR "bf probe not ok\n"); return err; } static void bf_pci_remove(struct pci_dev *pdev) { struct bf_pci_dev *bfdev = pci_get_drvdata(pdev); struct bf_listener *cur_listener; bf_disable_int_dma(bfdev); bf_unregister_device(bfdev); if (bfdev->mode == BF_INTR_MODE_MSIX) { pci_disable_msix(pdev); kfree(bfdev->info.msix_entries); bfdev->info.msix_entries = NULL; } else if (bfdev->mode == BF_INTR_MODE_MSI) pci_disable_msi(pdev); pci_clear_master(pdev); bf_pci_release_iomem(&bfdev->info); pci_release_regions(pdev); pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); bf_global[bfdev->info.minor].bfdev = NULL; /* existing filep structures in open file(s) must be informed that * bf_pci_dev is no longer valid */ spin_lock(&bf_nonisr_lock); cur_listener = bfdev->listener_head; while (cur_listener) { cur_listener->bfdev = NULL; cur_listener = cur_listener->next; } spin_unlock(&bf_nonisr_lock); kfree(bfdev); } /** * bf_pci_error_detected - called when PCI error is detected * @pdev: Pointer to PCI device * @state: The current pci connection state * * called when root complex detects pci error associated with the device */ static pci_ers_result_t bf_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct bf_pci_dev *bfdev = pci_get_drvdata(pdev); int minor; if (!bfdev) { return PCI_ERS_RESULT_NONE; } printk(KERN_ERR "pci_err_detected state %d\n", state); if (state == pci_channel_io_perm_failure || state == pci_channel_io_frozen) { bfdev->info.pci_error_state = 1; /* send a signal to the user space program of the error */ minor = bfdev->info.minor; if (minor < BF_MAX_DEVICE_CNT && bf_global[minor].async_queue) { kill_fasync(&bf_global[minor].async_queue, SIGIO, POLL_ERR); } return PCI_ERS_RESULT_DISCONNECT; } else { return PCI_ERS_RESULT_NONE; } } /** * bf_pci_slot_reset - called after the pci bus has been reset. * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot. */ static pci_ers_result_t bf_pci_slot_reset(struct pci_dev *pdev) { /* nothing to do for now as we do not expect to get backto normal after * a pcie link reset * TBD: fill in this function if tofino can recover after an error */ return PCI_ERS_RESULT_DISCONNECT; } /** * bf_pci_resume - called when kernel thinks the device is up on PCIe. * @pdev: Pointer to PCI device * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation. */ static void bf_pci_resume(struct pci_dev *pdev) { /* this function should never be called for Tofinoi */ struct bf_pci_dev *bfdev = pci_get_drvdata(pdev); printk(KERN_ERR "BF io_resume invoked after pci error\n"); if (bfdev) { bfdev->info.pci_error_state = 0; } } static int bf_config_intr_mode(char *intr_str) { if (!intr_str) { pr_info("Use MSIX interrupt by default\n"); return 0; } if (!strcmp(intr_str, BF_INTR_MODE_MSIX_NAME)) { bf_intr_mode_default = BF_INTR_MODE_MSIX; pr_info("Use MSIX interrupt\n"); } else if (!strcmp(intr_str, BF_INTR_MODE_MSI_NAME)) { bf_intr_mode_default = BF_INTR_MODE_MSI; pr_info("Use MSI interrupt\n"); } else if (!strcmp(intr_str, BF_INTR_MODE_LEGACY_NAME)) { bf_intr_mode_default = BF_INTR_MODE_LEGACY; pr_info("Use legacy interrupt\n"); } else { bf_intr_mode_default = BF_INTR_MODE_NONE; pr_info(" No Interrupt \n"); } return 0; } static const struct pci_device_id bf_pci_tbl[] = { {PCI_VDEVICE(BF, TOFINO_DEV_ID_A0), 0}, {PCI_VDEVICE(BF, TOFINO_DEV_ID_B0), 0}, {PCI_VDEVICE(BF, TOFINO2_DEV_ID_A0), 0}, /* required last entry */ { .device = 0 } }; /* PCI bus error handlers */ static struct pci_error_handlers bf_pci_err_handler = { .error_detected = bf_pci_error_detected, .slot_reset = bf_pci_slot_reset, .resume = bf_pci_resume, }; static struct pci_driver bf_pci_driver = { .name = "bf", .id_table = bf_pci_tbl, .probe = bf_pci_probe, .remove = bf_pci_remove, .err_handler = &bf_pci_err_handler }; static int __init bfdrv_init(void) { int ret; ret = bf_config_intr_mode(intr_mode); if (ret < 0) return ret; spin_lock_init(&bf_nonisr_lock); return pci_register_driver(&bf_pci_driver); } static void __exit bfdrv_exit(void) { pci_unregister_driver(&bf_pci_driver); } module_init(bfdrv_init); module_exit(bfdrv_exit); module_param(intr_mode, charp, S_IRUGO); MODULE_PARM_DESC(intr_mode, "bf interrupt mode (default=msix):\n" " " BF_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n" " " BF_INTR_MODE_MSI_NAME " Use MSI interrupt\n" " " BF_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n" "\n"); MODULE_DEVICE_TABLE(pci, bf_pci_tbl); MODULE_DESCRIPTION("Barefoot Tofino PCI device"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Barefoot Networks");