From 86189ca78645d67880e67565b7b71acbd39ef77e Mon Sep 17 00:00:00 2001 From: Sagar Balani Date: Wed, 24 Oct 2018 06:27:04 -0700 Subject: [PATCH] [barefoot]: Move bfn asic drivers out of platform package (#30) (#2183) * Move bfn asic drivers out of platform package (#30) * Move bfn asic drivers out of platform package * Change debian pkg name for bfn modules * Change license file * Fix build breakage due to dependency issue * Minor changes to debian bld files (#31) --- platform/barefoot/bfn-modules.mk | 10 + platform/barefoot/bfn-modules/LICENSE | 37 + platform/barefoot/bfn-modules/MAINTAINERS | 4 + platform/barefoot/bfn-modules/README.md | 2 + .../barefoot/bfn-modules/debian/changelog | 5 + platform/barefoot/bfn-modules/debian/compat | 1 + platform/barefoot/bfn-modules/debian/control | 12 + .../barefoot/bfn-modules/debian/copyright | 15 + platform/barefoot/bfn-modules/debian/files | 1 + platform/barefoot/bfn-modules/debian/rules | 30 + .../modules/Makefile | 0 .../barefoot/bfn-modules/modules/bf_ioctl.h | 49 + .../modules/bf_kdrv.c | 138 +- .../modules/bf_tun.c | 0 platform/barefoot/one-image.mk | 3 +- .../barefoot/platform-modules-bfn-montara.mk | 4 +- platform/barefoot/platform-modules-bfn.mk | 4 +- platform/barefoot/rules.mk | 1 + .../MAINTAINERS | 2 +- .../README.md | 2 +- .../debian/changelog | 8 +- .../debian/control | 4 +- .../debian/rules | 15 +- .../modules/Makefile | 1 - .../modules/bf_kdrv.c | 1 - .../modules/bf_tun.c | 1 - .../debian/changelog | 8 +- .../sonic-platform-modules-bfn/debian/control | 4 +- .../sonic-platform-modules-bfn/debian/rules | 15 +- .../modules/bf_kdrv.c | 1254 -------- .../modules/Makefile | 2 - .../modules/bf_tun.c | 2616 ----------------- 32 files changed, 328 insertions(+), 3921 deletions(-) create mode 100644 platform/barefoot/bfn-modules.mk create mode 100644 platform/barefoot/bfn-modules/LICENSE create mode 100644 platform/barefoot/bfn-modules/MAINTAINERS create mode 100644 platform/barefoot/bfn-modules/README.md create mode 100644 platform/barefoot/bfn-modules/debian/changelog create mode 100644 platform/barefoot/bfn-modules/debian/compat create mode 100644 platform/barefoot/bfn-modules/debian/control create mode 100644 platform/barefoot/bfn-modules/debian/copyright create mode 100644 platform/barefoot/bfn-modules/debian/files create mode 100755 platform/barefoot/bfn-modules/debian/rules rename platform/barefoot/{sonic-platform-modules-bfn => bfn-modules}/modules/Makefile (100%) create mode 100644 platform/barefoot/bfn-modules/modules/bf_ioctl.h rename platform/barefoot/{sonic-platform-modules-wnc-osw1800 => bfn-modules}/modules/bf_kdrv.c (89%) rename platform/barefoot/{sonic-platform-modules-bfn => bfn-modules}/modules/bf_tun.c (100%) delete mode 120000 platform/barefoot/sonic-platform-modules-bfn-montara/modules/Makefile delete mode 120000 platform/barefoot/sonic-platform-modules-bfn-montara/modules/bf_kdrv.c delete mode 120000 platform/barefoot/sonic-platform-modules-bfn-montara/modules/bf_tun.c delete mode 100644 platform/barefoot/sonic-platform-modules-bfn/modules/bf_kdrv.c delete mode 100644 platform/barefoot/sonic-platform-modules-wnc-osw1800/modules/bf_tun.c diff --git a/platform/barefoot/bfn-modules.mk b/platform/barefoot/bfn-modules.mk new file mode 100644 index 0000000000..67b7fa924d --- /dev/null +++ b/platform/barefoot/bfn-modules.mk @@ -0,0 +1,10 @@ +# BFN Platform modules + +VERSION = 1.0 + +BFN_MODULE = bfn-modules_$(VERSION)_amd64.deb +$(BFN_MODULE)_SRC_PATH = $(PLATFORM_PATH)/bfn-modules +$(BFN_MODULE)_DEPENDS += $(LINUX_HEADERS) $(LINUX_HEADERS_COMMON) +SONIC_DPKG_DEBS += $(BFN_MODULE) + +SONIC_STRETCH_DEBS += $(BFN_MODULE) diff --git a/platform/barefoot/bfn-modules/LICENSE b/platform/barefoot/bfn-modules/LICENSE new file mode 100644 index 0000000000..e9d73a1c63 --- /dev/null +++ b/platform/barefoot/bfn-modules/LICENSE @@ -0,0 +1,37 @@ +BAREFOOT NETWORKS CONFIDENTIAL & PROPRIETARY + + Copyright (c) 2015-2016 Barefoot Networks, Inc. + + All Rights Reserved. + + NOTICE: All information contained herein is, and remains the property of + Barefoot Networks, Inc. and its suppliers, if any. The intellectual and + technical concepts contained herein are proprietary to Barefoot Networks, + Inc. + and its suppliers and may be covered by U.S. and Foreign Patents, patents in + process, and are protected by trade secret or copyright law. + Dissemination of this information or reproduction of this material is + strictly forbidden unless prior written permission is obtained from + Barefoot Networks, Inc. + + No warranty, explicit or implicit is provided, unless granted under a + written agreement with Barefoot Networks, Inc. + + +GPL LICENSE SUMMARY + + Copyright(c) 2015 Barefoot Networks. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the... + + diff --git a/platform/barefoot/bfn-modules/MAINTAINERS b/platform/barefoot/bfn-modules/MAINTAINERS new file mode 100644 index 0000000000..b9c62ce015 --- /dev/null +++ b/platform/barefoot/bfn-modules/MAINTAINERS @@ -0,0 +1,4 @@ +# This file describes the maintainers for sonic-platform-modules-bfn +# See the SONiC project governance document for more information + +Mailinglist = sonicproject@googlegroups.com diff --git a/platform/barefoot/bfn-modules/README.md b/platform/barefoot/bfn-modules/README.md new file mode 100644 index 0000000000..6c47cbed2a --- /dev/null +++ b/platform/barefoot/bfn-modules/README.md @@ -0,0 +1,2 @@ +# bfn-modules +Device drivers for support of BFN platform for the SONiC project diff --git a/platform/barefoot/bfn-modules/debian/changelog b/platform/barefoot/bfn-modules/debian/changelog new file mode 100644 index 0000000000..85ab933da3 --- /dev/null +++ b/platform/barefoot/bfn-modules/debian/changelog @@ -0,0 +1,5 @@ +bfn-modules (1.0) unstable; urgency=low + + * Initial release + + -- Support Mon, 22 Oct 2018 11:11:11 -0800 diff --git a/platform/barefoot/bfn-modules/debian/compat b/platform/barefoot/bfn-modules/debian/compat new file mode 100644 index 0000000000..45a4fb75db --- /dev/null +++ b/platform/barefoot/bfn-modules/debian/compat @@ -0,0 +1 @@ +8 diff --git a/platform/barefoot/bfn-modules/debian/control b/platform/barefoot/bfn-modules/debian/control new file mode 100644 index 0000000000..7e0ae5cf50 --- /dev/null +++ b/platform/barefoot/bfn-modules/debian/control @@ -0,0 +1,12 @@ +Source: bfn-modules +Section: main +Priority: extra +Maintainer: support +Build-Depends: debhelper (>= 8.0.0), bzip2 +Standards-Version: 3.9.3 + +Package: bfn-modules +Architecture: amd64 +Depends: linux-image-4.9.0-7-amd64 +Description: kernel modules for bfn asic for mmap + diff --git a/platform/barefoot/bfn-modules/debian/copyright b/platform/barefoot/bfn-modules/debian/copyright new file mode 100644 index 0000000000..ade42b7aa7 --- /dev/null +++ b/platform/barefoot/bfn-modules/debian/copyright @@ -0,0 +1,15 @@ +Provides linux kernel driver for BF PCIe devices + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/platform/barefoot/bfn-modules/debian/files b/platform/barefoot/bfn-modules/debian/files new file mode 100644 index 0000000000..cffeb8a7d9 --- /dev/null +++ b/platform/barefoot/bfn-modules/debian/files @@ -0,0 +1 @@ +bfn-modules_1.0_amd64.deb main extra diff --git a/platform/barefoot/bfn-modules/debian/rules b/platform/barefoot/bfn-modules/debian/rules new file mode 100755 index 0000000000..c588b075e5 --- /dev/null +++ b/platform/barefoot/bfn-modules/debian/rules @@ -0,0 +1,30 @@ +#!/usr/bin/make -f + +export INSTALL_MOD_DIR:=extra + +PACKAGE_NAME := bfn-modules +KVERSION ?= $(shell uname -r) +KERNEL_SRC := /lib/modules/$(KVERSION) +MODULE_SRC := $(shell pwd)/modules + +%: + dh $@ + +override_dh_auto_build: + make -C $(KERNEL_SRC)/build M=$(MODULE_SRC) + +override_dh_auto_install: + dh_installdirs -p$(PACKAGE_NAME) $(KERNEL_SRC)/$(INSTALL_MOD_DIR) + cp $(MODULE_SRC)/*.ko debian/$(PACKAGE_NAME)/$(KERNEL_SRC)/$(INSTALL_MOD_DIR) + dh_installdirs -p$(PACKAGE_NAME) usr/local/bin + +override_dh_usrlocal: + +override_dh_pysupport: + +override_dh_clean: + dh_clean + rm -f $(MODULE_SRC)/*.o $(MODULE_SRC)/*.ko $(MODULE_SRC)/*.mod.c $(MODULE_SRC)/.*.cmd + rm -f $(MODULE_SRC)/Module.markers $(MODULE_SRC)/Module.symvers $(MODULE_SRC)/modules.order + rm -rf $(MODULE_SRC)/.tmp_versions + diff --git a/platform/barefoot/sonic-platform-modules-bfn/modules/Makefile b/platform/barefoot/bfn-modules/modules/Makefile similarity index 100% rename from platform/barefoot/sonic-platform-modules-bfn/modules/Makefile rename to platform/barefoot/bfn-modules/modules/Makefile diff --git a/platform/barefoot/bfn-modules/modules/bf_ioctl.h b/platform/barefoot/bfn-modules/modules/bf_ioctl.h new file mode 100644 index 0000000000..e14716f50f --- /dev/null +++ b/platform/barefoot/bfn-modules/modules/bf_ioctl.h @@ -0,0 +1,49 @@ +/******************************************************************************* + * BAREFOOT NETWORKS CONFIDENTIAL & PROPRIETARY + * + * Copyright (c) 2018-2018 Barefoot Networks, Inc. + * + * NOTICE: All information contained herein is, and remains the property of + * Barefoot Networks, Inc. and its suppliers, if any. The intellectual and + * technical concepts contained herein are proprietary to Barefoot Networks, + * Inc. + * and its suppliers and may be covered by U.S. and Foreign Patents, patents in + * process, and are protected by trade secret or copyright law. + * Dissemination of this information or reproduction of this material is + * strictly forbidden unless prior written permission is obtained from + * Barefoot Networks, Inc. + * + * No warranty, explicit or implicit is provided, unless granted under a + * written agreement with Barefoot Networks, Inc. + * + * $Id: $ + * + ******************************************************************************/ + +#ifndef _BF_IOCTL_H_ +#define _BF_IOCTL_H_ + +#ifdef __KERNEL__ +#include +#else +#include + +#ifndef phys_addr_t +typedef uint64_t phys_addr_t; +#endif + +#endif /* __KERNEL__ */ + +#define BF_IOC_MAGIC 'b' + +typedef struct bf_dma_bus_map_s +{ + phys_addr_t phy_addr; + void *dma_addr; + size_t size; +} bf_dma_bus_map_t; + +#define BF_IOCMAPDMAADDR _IOWR(BF_IOC_MAGIC, 0, bf_dma_bus_map_t) +#define BF_IOCUNMAPDMAADDR _IOW(BF_IOC_MAGIC, 0, bf_dma_bus_map_t) + +#endif /* _BF_IOCTL_H_ */ diff --git a/platform/barefoot/sonic-platform-modules-wnc-osw1800/modules/bf_kdrv.c b/platform/barefoot/bfn-modules/modules/bf_kdrv.c similarity index 89% rename from platform/barefoot/sonic-platform-modules-wnc-osw1800/modules/bf_kdrv.c rename to platform/barefoot/bfn-modules/modules/bf_kdrv.c index fd66ad0949..a9e8e65f96 100644 --- a/platform/barefoot/sonic-platform-modules-wnc-osw1800/modules/bf_kdrv.c +++ b/platform/barefoot/bfn-modules/modules/bf_kdrv.c @@ -58,6 +58,8 @@ #include #include #include +#include +#include "bf_ioctl.h" #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) #include @@ -74,12 +76,18 @@ #endif /* TBD: Need to build with CONFIG_PCI_MSI */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); +extern int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec); +#else extern int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec); extern int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec); +#endif #define PCI_VENDOR_ID_BF 0x1d1c #define TOFINO_DEV_ID_A0 0x01 #define TOFINO_DEV_ID_B0 0x10 +#define TOFINO2_DEV_ID_A0 0x0100 #ifndef PCI_MSIX_ENTRY_SIZE #define PCI_MSIX_ENTRY_SIZE 16 @@ -671,10 +679,62 @@ static ssize_t bf_write(struct file *filep, const char __user *buf, return ret ? ret : sizeof(s32); } +static long bf_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct bf_listener *listener = filep->private_data; + struct bf_pci_dev *bfdev = listener->bfdev; + bf_dma_bus_map_t dma_map; + void *addr = (void __user *)arg; + dma_addr_t dma_hndl; + + if (!bfdev || !addr) { + return EFAULT; + } + switch(cmd) { + case BF_IOCMAPDMAADDR: + if (access_ok(VERIFY_WRITE, addr, sizeof(bf_dma_bus_map_t))) { + if (copy_from_user(&dma_map, addr, sizeof(bf_dma_bus_map_t))) { + return EFAULT; + } + if (!dma_map.phy_addr || !dma_map.size) { + return EFAULT; + } + dma_hndl = dma_map_single(&bfdev->pdev->dev, phys_to_virt(dma_map.phy_addr), dma_map.size, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&bfdev->pdev->dev, dma_hndl)) { + return EFAULT; + } + dma_map.dma_addr = (void *)dma_hndl; + if (copy_to_user(addr, &dma_map, sizeof(bf_dma_bus_map_t))) { + return EFAULT; + } + } else { + return EFAULT; + } + break; + case BF_IOCUNMAPDMAADDR: + if (access_ok(VERIFY_READ, addr, sizeof(bf_dma_bus_map_t))) { + if (copy_from_user(&dma_map, addr, sizeof(bf_dma_bus_map_t))) { + return EFAULT; + } + if (!dma_map.dma_addr || !dma_map.size) { + return EFAULT; + } + dma_unmap_single(&bfdev->pdev->dev, (dma_addr_t)dma_map.dma_addr, dma_map.size, DMA_BIDIRECTIONAL); + } else { + return EFAULT; + } + break; + default: + return EINVAL; + } + return 0; +} + static const struct file_operations bf_fops = { .owner = THIS_MODULE, .open = bf_open, .release = bf_release, + .unlocked_ioctl = bf_ioctl, .read = bf_read, .write = bf_write, .mmap = bf_mmap, @@ -883,11 +943,35 @@ static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) return &pdev->dev; } +static void bf_disable_int_dma(struct bf_pci_dev *bfdev) { + u8 *bf_base_addr, i; + u32 *bf_addr; + volatile u32 val; + + /* maskinterrupts and DMA */ + bf_base_addr = (bfdev->info.mem[0].internal_addr); + /* return if called before mmap */ + if (!bf_base_addr) { + return; + } + /* mask interrupt at shadow level */ + bf_addr = (u32 *)((u8 *)bf_base_addr + 0xc0); + for (i = 0; i < 16; i++) { + *bf_addr = 0xffffffff; + bf_addr++; + } + /* mask DMA */ + bf_addr = (u32 *)((u8 *)bf_base_addr + 0x14); + val = *bf_addr; + val &= 0xfffffffeUL; + *bf_addr = val; +} + static int bf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct bf_pci_dev *bfdev; - int err, pci_use_highmem; + int err; int i, num_irq; memset(bf_global, 0, sizeof(bf_global)); @@ -933,7 +1017,6 @@ bf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)) && !dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64))) { - pci_use_highmem = 1; } else { err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); if (err) { @@ -945,7 +1028,6 @@ bf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto fail_release_iomem; } } - pci_use_highmem = 0; } /* enable pci error reporting */ @@ -960,6 +1042,8 @@ bf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) */ pci_enable_pcie_error_reporting(pdev); + bf_disable_int_dma(bfdev); + /* enable bus mastering on the device */ pci_set_master(pdev); @@ -981,6 +1065,19 @@ bf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) { bfdev->info.msix_entries[i].entry= i; } +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) + num_irq = pci_enable_msix(pdev, bfdev->info.msix_entries, + BF_MSIX_ENTRY_CNT); + if (num_irq == 0) { + dev_dbg(&pdev->dev, "using MSI-X"); + bfdev->info.num_irq = BF_MSIX_ENTRY_CNT; + bfdev->info.irq = bfdev->info.msix_entries[0].vector; + bfdev->mode = BF_INTR_MODE_MSIX; + printk(KERN_DEBUG "bf using %d MSIX irq from %ld\n", num_irq, + bfdev->info.irq); + break; + } +#else num_irq = pci_enable_msix_range(pdev, bfdev->info.msix_entries, BF_MSIX_ENTRY_CNT, BF_MSIX_ENTRY_CNT); if (num_irq == BF_MSIX_ENTRY_CNT) { @@ -999,8 +1096,22 @@ bf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) printk(KERN_ERR "bf error allocating MSIX vectors. Trying MSI...\n"); /* and, fall back to MSI */ } +#endif /* LINUX_VERSION_CODE */ /* ** intentional no-break */ case BF_INTR_MODE_MSI: +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) + num_irq = pci_enable_msi_block(pdev, BF_MSI_ENTRY_CNT); + /* we must get requested number of MSI vectors enabled */ + if (num_irq == 0) { + dev_dbg(&pdev->dev, "using MSI"); + bfdev->info.num_irq = BF_MSI_ENTRY_CNT; + bfdev->info.irq = pdev->irq; + bfdev->mode = BF_INTR_MODE_MSI; + printk(KERN_DEBUG "bf using %d MSI irq from %ld\n", bfdev->info.num_irq, + bfdev->info.irq); + break; + } +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) num_irq = pci_enable_msi_range(pdev, BF_MSI_ENTRY_CNT, BF_MSI_ENTRY_CNT); if (num_irq > 0) { dev_dbg(&pdev->dev, "using MSI"); @@ -1011,6 +1122,19 @@ bf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) bfdev->info.irq); break; } +#else + num_irq = pci_alloc_irq_vectors_affinity(pdev, BF_MSI_ENTRY_CNT, + BF_MSI_ENTRY_CNT, PCI_IRQ_MSI | PCI_IRQ_AFFINITY, NULL); + if (num_irq > 0) { + dev_dbg(&pdev->dev, "using MSI"); + bfdev->info.num_irq = num_irq; + bfdev->info.irq = pci_irq_vector(pdev, 0); + bfdev->mode = BF_INTR_MODE_MSI; + printk(KERN_DEBUG "bf using %d MSI irq from %ld\n", bfdev->info.num_irq, + bfdev->info.irq); + break; + } +#endif /* LINUX_VERSION_CODE */ #endif /* CONFIG_PCI_MSI */ /* fall back to Legacy Interrupt, intentional no-break */ @@ -1075,12 +1199,14 @@ fail_free: return err; } + static void bf_pci_remove(struct pci_dev *pdev) { struct bf_pci_dev *bfdev = pci_get_drvdata(pdev); struct bf_listener *cur_listener; + bf_disable_int_dma(bfdev); bf_unregister_device(bfdev); if (bfdev->mode == BF_INTR_MODE_MSIX) { pci_disable_msix(pdev); @@ -1189,9 +1315,10 @@ bf_config_intr_mode(char *intr_str) bf_intr_mode_default = BF_INTR_MODE_LEGACY; pr_info("Use legacy interrupt\n"); } else { - pr_info("Error: bad parameter - %s\n", intr_str); - return -EINVAL; + bf_intr_mode_default = BF_INTR_MODE_NONE; + pr_info(" No Interrupt \n"); } + return 0; } @@ -1199,6 +1326,7 @@ bf_config_intr_mode(char *intr_str) static const struct pci_device_id bf_pci_tbl[] = { {PCI_VDEVICE(BF, TOFINO_DEV_ID_A0), 0}, {PCI_VDEVICE(BF, TOFINO_DEV_ID_B0), 0}, + {PCI_VDEVICE(BF, TOFINO2_DEV_ID_A0), 0}, /* required last entry */ { .device = 0 } }; diff --git a/platform/barefoot/sonic-platform-modules-bfn/modules/bf_tun.c b/platform/barefoot/bfn-modules/modules/bf_tun.c similarity index 100% rename from platform/barefoot/sonic-platform-modules-bfn/modules/bf_tun.c rename to platform/barefoot/bfn-modules/modules/bf_tun.c diff --git a/platform/barefoot/one-image.mk b/platform/barefoot/one-image.mk index 490712f681..fc4e197cd5 100644 --- a/platform/barefoot/one-image.mk +++ b/platform/barefoot/one-image.mk @@ -3,7 +3,8 @@ SONIC_ONE_IMAGE = sonic-barefoot.bin $(SONIC_ONE_IMAGE)_MACHINE = barefoot $(SONIC_ONE_IMAGE)_IMAGE_TYPE = onie -$(SONIC_ONE_IMAGE)_INSTALLS += $(BFN_PLATFORM_MODULE) +$(SONIC_ONE_IMAGE)_INSTALLS += $(BFN_MODULE) +$(SONIC_ONE_IMAGE)_LAZY_INSTALLS += $(BFN_PLATFORM_MODULE) $(SONIC_ONE_IMAGE)_LAZY_INSTALLS += $(BFN_MONTARA_PLATFORM_MODULE) $(SONIC_ONE_IMAGE)_LAZY_INSTALLS += $(WNC_OSW1800_PLATFORM_MODULE) $(SONIC_ONE_IMAGE)_LAZY_INSTALLS += $(INGRASYS_S9180_32X_PLATFORM_MODULE) diff --git a/platform/barefoot/platform-modules-bfn-montara.mk b/platform/barefoot/platform-modules-bfn-montara.mk index c1b401ad3f..d089218c94 100644 --- a/platform/barefoot/platform-modules-bfn-montara.mk +++ b/platform/barefoot/platform-modules-bfn-montara.mk @@ -1,10 +1,10 @@ # BFN Platform modules -BFN_MONTARA_PLATFORM_MODULE_VERSION = 1.0 +BFN_MONTARA_PLATFORM_MODULE_VERSION = 1.1 export BFN_MONTARA_PLATFORM_MODULE_VERSION -BFN_MONTARA_PLATFORM_MODULE = platform-modules-bfn-montara_$(BFN_MONTARA_PLATFORM_MODULE_VERSION)_amd64.deb +BFN_MONTARA_PLATFORM_MODULE = sonic-platform-modules-bfn-montara_$(BFN_MONTARA_PLATFORM_MODULE_VERSION)_amd64.deb $(BFN_MONTARA_PLATFORM_MODULE)_SRC_PATH = $(PLATFORM_PATH)/sonic-platform-modules-bfn-montara $(BFN_MONTARA_PLATFORM_MODULE)_DEPENDS += $(LINUX_HEADERS) $(LINUX_HEADERS_COMMON) $(BFN_MONTARA_PLATFORM_MODULE)_PLATFORM = x86_64-accton_wedge100bf_32x-r0 diff --git a/platform/barefoot/platform-modules-bfn.mk b/platform/barefoot/platform-modules-bfn.mk index 5c3df75c1b..1caa92c8f2 100644 --- a/platform/barefoot/platform-modules-bfn.mk +++ b/platform/barefoot/platform-modules-bfn.mk @@ -1,10 +1,10 @@ # BFN Platform modules -BFN_PLATFORM_MODULE_VERSION = 1.0 +BFN_PLATFORM_MODULE_VERSION = 1.1 export BFN_PLATFORM_MODULE_VERSION -BFN_PLATFORM_MODULE = platform-modules-bfn_$(BFN_PLATFORM_MODULE_VERSION)_amd64.deb +BFN_PLATFORM_MODULE = sonic-platform-modules-bfn_$(BFN_PLATFORM_MODULE_VERSION)_amd64.deb $(BFN_PLATFORM_MODULE)_SRC_PATH = $(PLATFORM_PATH)/sonic-platform-modules-bfn $(BFN_PLATFORM_MODULE)_DEPENDS += $(LINUX_HEADERS) $(LINUX_HEADERS_COMMON) $(BFN_PLATFORM_MODULE)_PLATFORM = x86_64-accton_wedge100bf_65x-r0 diff --git a/platform/barefoot/rules.mk b/platform/barefoot/rules.mk index 2ee49adb8a..4937b747c2 100644 --- a/platform/barefoot/rules.mk +++ b/platform/barefoot/rules.mk @@ -15,6 +15,7 @@ include $(PLATFORM_PATH)/docker-ptf-bfn.mk include $(PLATFORM_PATH)/bfn-platform.mk include $(PLATFORM_PATH)/bfn-platform-wnc.mk include $(PLATFORM_PATH)/bfn-platform-ingrasys.mk +include $(PLATFORM_PATH)/bfn-modules.mk SONIC_ALL += $(SONIC_ONE_IMAGE) $(SONIC_ONE_ABOOT) \ $(DOCKER_FPM) diff --git a/platform/barefoot/sonic-platform-modules-bfn-montara/MAINTAINERS b/platform/barefoot/sonic-platform-modules-bfn-montara/MAINTAINERS index ed64c78242..6396065f4a 100644 --- a/platform/barefoot/sonic-platform-modules-bfn-montara/MAINTAINERS +++ b/platform/barefoot/sonic-platform-modules-bfn-montara/MAINTAINERS @@ -1,3 +1,3 @@ -# This file describes the maintainers for sonic-platform-modules-bfn +# This file describes the maintainers for sonic-platform-modules-bfn-montara # See the SONiC project governance document for more information Mailinglist = sonicproject@googlegroups.com diff --git a/platform/barefoot/sonic-platform-modules-bfn-montara/README.md b/platform/barefoot/sonic-platform-modules-bfn-montara/README.md index 5dc055a1d9..ac1fffb7dc 100644 --- a/platform/barefoot/sonic-platform-modules-bfn-montara/README.md +++ b/platform/barefoot/sonic-platform-modules-bfn-montara/README.md @@ -1,2 +1,2 @@ -# sonic-platform-modules-bfn +# sonic-platform-modules-bfn-montara Device drivers for support of BFN platform for the SONiC project diff --git a/platform/barefoot/sonic-platform-modules-bfn-montara/debian/changelog b/platform/barefoot/sonic-platform-modules-bfn-montara/debian/changelog index 80c1d96d34..07fbc70814 100644 --- a/platform/barefoot/sonic-platform-modules-bfn-montara/debian/changelog +++ b/platform/barefoot/sonic-platform-modules-bfn-montara/debian/changelog @@ -1,4 +1,10 @@ -platform-modules-bfn-montara (1.0) unstable; urgency=low +sonic-platform-modules-bfn-montara (1.1) unstable; urgency=low + + * Remove bfn asic kernel modules from platform package + + -- Support Mon, 22 Oct 2018 15:40:00 -0800 + +sonic-platform-modules-bfn-montara (1.0) unstable; urgency=low * Initial release diff --git a/platform/barefoot/sonic-platform-modules-bfn-montara/debian/control b/platform/barefoot/sonic-platform-modules-bfn-montara/debian/control index 589f03d2f4..d3a1f02495 100644 --- a/platform/barefoot/sonic-platform-modules-bfn-montara/debian/control +++ b/platform/barefoot/sonic-platform-modules-bfn-montara/debian/control @@ -1,11 +1,11 @@ -Source: platform-modules-bfn-montara +Source: sonic-platform-modules-bfn-montara Section: main Priority: extra Maintainer: Support Build-Depends: debhelper (>= 8.0.0), bzip2 Standards-Version: 3.9.3 -Package: platform-modules-bfn-montara +Package: sonic-platform-modules-bfn-montara Architecture: amd64 Depends: linux-image-4.9.0-7-amd64 Description: kernel modules for platform devices such as fan, led, sfp diff --git a/platform/barefoot/sonic-platform-modules-bfn-montara/debian/rules b/platform/barefoot/sonic-platform-modules-bfn-montara/debian/rules index e48edc3049..479f7f4896 100755 --- a/platform/barefoot/sonic-platform-modules-bfn-montara/debian/rules +++ b/platform/barefoot/sonic-platform-modules-bfn-montara/debian/rules @@ -1,22 +1,12 @@ #!/usr/bin/make -f -export INSTALL_MOD_DIR:=extra - -PACKAGE_NAME := platform-modules-bfn-montara -KVERSION ?= $(shell uname -r) -KERNEL_SRC := /lib/modules/$(KVERSION) -MODULE_SRC := $(shell pwd)/modules +PACKAGE_NAME := sonic-platform-modules-bfn-montara SCRIPT_SRC := $(shell pwd)/scripts %: dh $@ -override_dh_auto_build: - make -C $(KERNEL_SRC)/build M=$(MODULE_SRC) - override_dh_auto_install: - dh_installdirs -p$(PACKAGE_NAME) $(KERNEL_SRC)/$(INSTALL_MOD_DIR) - cp $(MODULE_SRC)/*.ko debian/$(PACKAGE_NAME)/$(KERNEL_SRC)/$(INSTALL_MOD_DIR) dh_installdirs -p$(PACKAGE_NAME) usr/local/bin cp -r $(SCRIPT_SRC)/* debian/$(PACKAGE_NAME)/usr/local/bin @@ -26,7 +16,4 @@ override_dh_pysupport: override_dh_clean: dh_clean - rm -f $(MODULE_SRC)/*.o $(MODULE_SRC)/*.ko $(MODULE_SRC)/*.mod.c $(MODULE_SRC)/.*.cmd - rm -f $(MODULE_SRC)/Module.markers $(MODULE_SRC)/Module.symvers $(MODULE_SRC)/modules.order - rm -rf $(MODULE_SRC)/.tmp_versions diff --git a/platform/barefoot/sonic-platform-modules-bfn-montara/modules/Makefile b/platform/barefoot/sonic-platform-modules-bfn-montara/modules/Makefile deleted file mode 120000 index d394585dd2..0000000000 --- a/platform/barefoot/sonic-platform-modules-bfn-montara/modules/Makefile +++ /dev/null @@ -1 +0,0 @@ -../../sonic-platform-modules-bfn/modules/Makefile \ No newline at end of file diff --git a/platform/barefoot/sonic-platform-modules-bfn-montara/modules/bf_kdrv.c b/platform/barefoot/sonic-platform-modules-bfn-montara/modules/bf_kdrv.c deleted file mode 120000 index 959d811823..0000000000 --- a/platform/barefoot/sonic-platform-modules-bfn-montara/modules/bf_kdrv.c +++ /dev/null @@ -1 +0,0 @@ -../../sonic-platform-modules-bfn/modules/bf_kdrv.c \ No newline at end of file diff --git a/platform/barefoot/sonic-platform-modules-bfn-montara/modules/bf_tun.c b/platform/barefoot/sonic-platform-modules-bfn-montara/modules/bf_tun.c deleted file mode 120000 index ea380c3075..0000000000 --- a/platform/barefoot/sonic-platform-modules-bfn-montara/modules/bf_tun.c +++ /dev/null @@ -1 +0,0 @@ -../../sonic-platform-modules-bfn/modules/bf_tun.c \ No newline at end of file diff --git a/platform/barefoot/sonic-platform-modules-bfn/debian/changelog b/platform/barefoot/sonic-platform-modules-bfn/debian/changelog index d5df341126..984cc0dbfb 100644 --- a/platform/barefoot/sonic-platform-modules-bfn/debian/changelog +++ b/platform/barefoot/sonic-platform-modules-bfn/debian/changelog @@ -1,4 +1,10 @@ -platform-modules-bfn (1.0) unstable; urgency=low +sonic-platform-modules-bfn (1.1) unstable; urgency=low + + * Remove bfn asic kernel modules from platform package + + -- Support Mon, 22 Oct 2018 15:40:00 -0800 + +sonic-platform-modules-bfn (1.0) unstable; urgency=low * Initial release diff --git a/platform/barefoot/sonic-platform-modules-bfn/debian/control b/platform/barefoot/sonic-platform-modules-bfn/debian/control index d4fd702298..fe82a2336d 100644 --- a/platform/barefoot/sonic-platform-modules-bfn/debian/control +++ b/platform/barefoot/sonic-platform-modules-bfn/debian/control @@ -1,11 +1,11 @@ -Source: platform-modules-bfn +Source: sonic-platform-modules-bfn Section: main Priority: extra Maintainer: support Build-Depends: debhelper (>= 8.0.0), bzip2 Standards-Version: 3.9.3 -Package: platform-modules-bfn +Package: sonic-platform-modules-bfn Architecture: amd64 Depends: linux-image-4.9.0-7-amd64 Description: kernel modules for platform devices such as fan, led, sfp diff --git a/platform/barefoot/sonic-platform-modules-bfn/debian/rules b/platform/barefoot/sonic-platform-modules-bfn/debian/rules index 73aa5d84da..69946c3357 100755 --- a/platform/barefoot/sonic-platform-modules-bfn/debian/rules +++ b/platform/barefoot/sonic-platform-modules-bfn/debian/rules @@ -1,22 +1,12 @@ #!/usr/bin/make -f -export INSTALL_MOD_DIR:=extra - -PACKAGE_NAME := platform-modules-bfn -KVERSION ?= $(shell uname -r) -KERNEL_SRC := /lib/modules/$(KVERSION) -MODULE_SRC := $(shell pwd)/modules +PACKAGE_NAME := sonic-platform-modules-bfn SCRIPT_SRC := $(shell pwd)/scripts %: dh $@ -override_dh_auto_build: - make -C $(KERNEL_SRC)/build M=$(MODULE_SRC) - override_dh_auto_install: - dh_installdirs -p$(PACKAGE_NAME) $(KERNEL_SRC)/$(INSTALL_MOD_DIR) - cp $(MODULE_SRC)/*.ko debian/$(PACKAGE_NAME)/$(KERNEL_SRC)/$(INSTALL_MOD_DIR) dh_installdirs -p$(PACKAGE_NAME) usr/local/bin cp -r $(SCRIPT_SRC)/* debian/$(PACKAGE_NAME)/usr/local/bin @@ -26,7 +16,4 @@ override_dh_pysupport: override_dh_clean: dh_clean - rm -f $(MODULE_SRC)/*.o $(MODULE_SRC)/*.ko $(MODULE_SRC)/*.mod.c $(MODULE_SRC)/.*.cmd - rm -f $(MODULE_SRC)/Module.markers $(MODULE_SRC)/Module.symvers $(MODULE_SRC)/modules.order - rm -rf $(MODULE_SRC)/.tmp_versions diff --git a/platform/barefoot/sonic-platform-modules-bfn/modules/bf_kdrv.c b/platform/barefoot/sonic-platform-modules-bfn/modules/bf_kdrv.c deleted file mode 100644 index fd66ad0949..0000000000 --- a/platform/barefoot/sonic-platform-modules-bfn/modules/bf_kdrv.c +++ /dev/null @@ -1,1254 +0,0 @@ -/******************************************************************************* - * BAREFOOT NETWORKS CONFIDENTIAL & PROPRIETARY - * - * Copyright (c) 2015-2016 Barefoot Networks, Inc. - - * All Rights Reserved. - * - * NOTICE: All information contained herein is, and remains the property of - * Barefoot Networks, Inc. and its suppliers, if any. The intellectual and - * technical concepts contained herein are proprietary to Barefoot Networks, - * Inc. - * and its suppliers and may be covered by U.S. and Foreign Patents, patents in - * process, and are protected by trade secret or copyright law. - * Dissemination of this information or reproduction of this material is - * strictly forbidden unless prior written permission is obtained from - * Barefoot Networks, Inc. - * - * No warranty, explicit or implicit is provided, unless granted under a - * written agreement with Barefoot Networks, Inc. - * - * $Id: $ - * - ******************************************************************************/ -/** - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2015 Barefoot Networks. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the... - * - **/ - -/* bf_drv kernel module - * - * This is kernel mode driver for Tofino chip. - * Provides user space mmap service and user space "wait for interrupt" - * and "enable interrupt" services. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) - #include -#else - #include -#endif - -#include -#include -#include - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) -//#error unsupported linux kernel version -#endif - -/* TBD: Need to build with CONFIG_PCI_MSI */ -extern int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec); -extern int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec); - -#define PCI_VENDOR_ID_BF 0x1d1c -#define TOFINO_DEV_ID_A0 0x01 -#define TOFINO_DEV_ID_B0 0x10 - -#ifndef PCI_MSIX_ENTRY_SIZE -#define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR 0 -#define PCI_MSIX_ENTRY_UPPER_ADDR 4 -#define PCI_MSIX_ENTRY_DATA 8 -#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 -#endif - -#define BF_CLASS_NAME "bf" -#define BF_MAX_DEVICE_CNT 256 -#define BF_INTR_MODE_NONE_NAME "none" -#define BF_INTR_MODE_LEGACY_NAME "legacy" -#define BF_INTR_MODE_MSI_NAME "msi" -#define BF_INTR_MODE_MSIX_NAME "msix" -#define BF_MAX_BAR_MAPS 6 -#define BF_MSIX_ENTRY_CNT 128 /* TBD make it 512 */ -#define BF_MSI_ENTRY_CNT 2 - -/* interrupt mode */ -enum bf_intr_mode { - BF_INTR_MODE_NONE = 0, - BF_INTR_MODE_LEGACY, - BF_INTR_MODE_MSI, - BF_INTR_MODE_MSIX -}; - -/* device memory */ -struct bf_dev_mem { - const char *name; - phys_addr_t addr; - resource_size_t size; - void __iomem *internal_addr; -}; - -struct bf_listener { - struct bf_pci_dev *bfdev; - s32 event_count[BF_MSIX_ENTRY_CNT]; - int minor; - struct bf_listener *next; -}; - -/* device information */ -struct bf_dev_info { - struct module *owner; - struct device *dev; - int minor; - atomic_t event[BF_MSIX_ENTRY_CNT]; - wait_queue_head_t wait; - const char *version; - struct bf_dev_mem mem[BF_MAX_BAR_MAPS]; - struct msix_entry *msix_entries; - long irq; /* first irq vector */ - int num_irq; /* number of irq vectors */ - unsigned long irq_flags;/* sharable ?? */ - int pci_error_state; /* was there a pci bus error */ -}; - -/* cookie to be passed to IRQ handler, useful especially with MSIX */ -struct bf_int_vector { - struct bf_pci_dev *bf_dev; - int int_vec_offset; -}; - - -/** - * A structure describing the private information for a BF pcie device. - */ -struct bf_pci_dev { - struct bf_dev_info info; - struct pci_dev *pdev; - enum bf_intr_mode mode; - u8 instance; - char name[16]; - struct bf_int_vector bf_int_vec[BF_MSIX_ENTRY_CNT]; - struct bf_listener *listener_head; /* head of a singly linked list of - listeners */ -}; - -/* Keep any global information here that must survive even after the - * bf_pci_dev is free-ed up. - */ -struct bf_global { - struct bf_pci_dev *bfdev ; - struct cdev *bf_cdev; - struct fasync_struct *async_queue; -}; - -static int bf_major; -static int bf_minor[BF_MAX_DEVICE_CNT] = {0}; -static struct class *bf_class = NULL; -static char *intr_mode = NULL; -static enum bf_intr_mode bf_intr_mode_default = BF_INTR_MODE_MSI; -static spinlock_t bf_nonisr_lock; -/* dev->minor should index into this array */ -static struct bf_global bf_global[BF_MAX_DEVICE_CNT]; - -static void bf_add_listener(struct bf_pci_dev *bfdev, - struct bf_listener *listener) -{ - struct bf_listener **cur_listener = &bfdev->listener_head; - - if (!listener) { - return; - } - spin_lock(&bf_nonisr_lock); - - while (*cur_listener) { - cur_listener = &((*cur_listener)->next); - } - *cur_listener = listener; - listener->next = NULL; - - spin_unlock(&bf_nonisr_lock); -} - -static void bf_remove_listener(struct bf_pci_dev *bfdev, - struct bf_listener *listener) -{ - struct bf_listener **cur_listener = &bfdev->listener_head; - - /* in case of certain error conditions, this function might be called after bf_pci_remove() - */ - if (!bfdev || !listener) { - return; - } - spin_lock(&bf_nonisr_lock); - - if (*cur_listener == listener) { - *cur_listener = listener->next; - } else { - while (*cur_listener) { - if ((*cur_listener)->next == listener) { - (*cur_listener)->next = listener->next; - break; - } - cur_listener = &((*cur_listener)->next); - } - listener->next = NULL; - } - - spin_unlock(&bf_nonisr_lock); -} - -/* a pool of minor numbers is maintained */ -/* return the first available minor number */ -static int bf_get_next_minor_no(int *minor) -{ - int i; - - spin_lock(&bf_nonisr_lock); - for(i = 0; i < BF_MAX_DEVICE_CNT; i++) { - if (bf_minor[i] == 0) { - *minor = i; - bf_minor[i] = 1; /* mark it as taken */ - spin_unlock(&bf_nonisr_lock); - return 0; - } - } - *minor = -1; - spin_unlock(&bf_nonisr_lock); - return -1; -} - -/* return a minor number back to the pool for recycling */ -static int bf_return_minor_no(int minor) -{ - int err; - - spin_lock(&bf_nonisr_lock); - if (bf_minor[minor] == 0) { /* was already returned */ - err = -1; /* don't change anything, but return error */ - } else { - bf_minor[minor] = 0; /* mark it as available */ - err = 0; - } - spin_unlock(&bf_nonisr_lock); - return err; -} - -static inline struct bf_pci_dev *bf_get_pci_dev(struct bf_dev_info *info) -{ - return container_of(info, struct bf_pci_dev, info); -} - -/* - * It masks the msix on/off of generating MSI-X messages. - */ -static void -bf_msix_mask_irq(struct msi_desc *desc, int32_t state) -{ - u32 mask_bits = desc->masked; - unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL; - - if (state != 0) - mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; - else - mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; - - if (mask_bits != desc->masked) { - writel(mask_bits, desc->mask_base + offset); - readl(desc->mask_base); - desc->masked = mask_bits; - } -} - -/** - * irqcontrol can be used to disable/enable interrupt from user space processes. - * - * @param bf_dev - * pointer to bf_pci_dev - * @param irq_state - * state value. 1 to enable interrupt, 0 to disable interrupt. - * - * @return - * - On success, 0. - * - On failure, a negative value. - */ -static int -bf_pci_irqcontrol(struct bf_pci_dev *bfdev, s32 irq_state) -{ - struct pci_dev *pdev = bfdev->pdev; - - pci_cfg_access_lock(pdev); - if (bfdev->mode == BF_INTR_MODE_LEGACY) - pci_intx(pdev, !!irq_state); - - else if (bfdev->mode == BF_INTR_MODE_MSIX) { - struct msi_desc *desc; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0) - list_for_each_entry(desc, &pdev->msi_list, list) - bf_msix_mask_irq(desc, irq_state); -#else - for_each_pci_msi_entry(desc, pdev) - bf_msix_mask_irq(desc, irq_state); -#endif - } - pci_cfg_access_unlock(pdev); - - return 0; -} - -/** - * interrupt handler which will check if the interrupt is from the right - * device. If so, disable it here and will be enabled later. - */ -static irqreturn_t bf_pci_irqhandler(int irq, struct bf_pci_dev *bfdev) -{ - /* Legacy mode need to mask in hardware */ - if (bfdev->mode == BF_INTR_MODE_LEGACY && - !pci_check_and_mask_intx(bfdev->pdev)) - return IRQ_NONE; - - /* NOTE : if bfdev->info.pci_error_state == 1, then do not access the - * device and return IRQ_NOTHANDLED. - */ - /* Message signal mode, no share IRQ and automasked */ - return IRQ_HANDLED; -} - -/* Remap pci resources described by bar #pci_bar */ -static int -bf_pci_setup_iomem(struct pci_dev *dev, struct bf_dev_info *info, - int n, int pci_bar, const char *name) -{ - unsigned long addr, len; - void *internal_addr; - - if (sizeof(info->mem) / sizeof(info->mem[0]) <= n) - return -EINVAL; - - addr = pci_resource_start(dev, pci_bar); - len = pci_resource_len(dev, pci_bar); - if (addr == 0 || len == 0) - return -1; - internal_addr = pci_ioremap_bar(dev, pci_bar); - if (internal_addr == NULL) - return -1; - info->mem[n].name = name; - info->mem[n].addr = addr; - info->mem[n].internal_addr = internal_addr; - info->mem[n].size = len; - return 0; -} - -/* Unmap previously ioremap'd resources */ -static void -bf_pci_release_iomem(struct bf_dev_info *info) -{ - int i; - - for (i = 0; i < BF_MAX_BAR_MAPS; i++) { - if (info->mem[i].internal_addr) - iounmap(info->mem[i].internal_addr); - } -} - -static int -bf_setup_bars(struct pci_dev *dev, struct bf_dev_info *info) -{ - int i, iom, ret; - unsigned long flags; - static const char *bar_names[BF_MAX_BAR_MAPS] = { - "BAR0", "BAR1", "BAR2", "BAR3", "BAR4", "BAR5", - }; - - iom = 0; - - for (i = 0; i < BF_MAX_BAR_MAPS; i++) { - if (pci_resource_len(dev, i) != 0 && - pci_resource_start(dev, i) != 0) { - flags = pci_resource_flags(dev, i); - if (flags & IORESOURCE_MEM) { - ret = bf_pci_setup_iomem(dev, info, iom, i, bar_names[i]); - if (ret != 0) - return ret; - iom++; - } - } - } - return (iom != 0) ? ret : -ENOENT; -} - -static irqreturn_t bf_interrupt(int irq, void *bfdev_id) -{ - struct bf_pci_dev *bfdev = ((struct bf_int_vector *)bfdev_id)->bf_dev; - int vect_off = ((struct bf_int_vector *)bfdev_id)->int_vec_offset; - - irqreturn_t ret = bf_pci_irqhandler(irq, bfdev); - - if (ret == IRQ_HANDLED) - atomic_inc(&(bfdev->info.event[vect_off])); - - return ret; -} - -static unsigned int bf_poll(struct file *filep, poll_table *wait) -{ - struct bf_listener *listener = (struct bf_listener *)filep->private_data; - struct bf_pci_dev *bfdev = listener->bfdev; - int i; - - if (!bfdev) { - return -ENODEV; - } - if (!bfdev->info.irq) - return -EIO; - - poll_wait(filep, &bfdev->info.wait, wait); - - for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) - if (listener->event_count[i] != atomic_read(&bfdev->info.event[i])) - return POLLIN | POLLRDNORM; - return 0; -} - -static int bf_find_mem_index(struct vm_area_struct *vma) -{ - struct bf_pci_dev *bfdev = vma->vm_private_data; - if (vma->vm_pgoff < BF_MAX_BAR_MAPS) { - if (bfdev->info.mem[vma->vm_pgoff].size == 0) - return -1; - return (int)vma->vm_pgoff; - } - return -1; -} - -static const struct vm_operations_struct bf_physical_vm_ops = { -#ifdef CONFIG_HAVE_IOREMAP_PROT - .access = generic_access_phys, -#endif -}; - -static int bf_mmap_physical(struct vm_area_struct *vma) -{ - struct bf_pci_dev *bfdev = vma->vm_private_data; - int bar = bf_find_mem_index(vma); - struct bf_dev_mem *mem; - if (bar < 0) - return -EINVAL; - - mem = bfdev->info.mem + bar; - - if (mem->addr & ~PAGE_MASK) - return -ENODEV; - if (vma->vm_end - vma->vm_start > mem->size) - return -EINVAL; - - vma->vm_ops = &bf_physical_vm_ops; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - /* - * We cannot use the vm_iomap_memory() helper here, - * because vma->vm_pgoff is the map index we looked - * up above in bf_find_mem_index(), rather than an - * actual page offset into the mmap. - * - * So we just do the physical mmap without a page - * offset. - */ - return remap_pfn_range(vma, vma->vm_start, mem->addr >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, vma->vm_page_prot); -} - -static int bf_mmap(struct file *filep, struct vm_area_struct *vma) -{ - struct bf_listener *listener = filep->private_data; - struct bf_pci_dev *bfdev = listener->bfdev; - int bar; - unsigned long requested_pages, actual_pages; - - if (!bfdev) { - return -ENODEV; - } - if (vma->vm_end < vma->vm_start) - return -EINVAL; - - vma->vm_private_data = bfdev; - - bar = bf_find_mem_index(vma); - if (bar < 0) - return -EINVAL; - - requested_pages = vma_pages(vma); - actual_pages = ((bfdev->info.mem[bar].addr & ~PAGE_MASK) - + bfdev->info.mem[bar].size + PAGE_SIZE -1) >> PAGE_SHIFT; - if (requested_pages > actual_pages) - return -EINVAL; - - return bf_mmap_physical(vma); -} - -static int bf_fasync(int fd, struct file *filep, int mode) -{ - int minor; - - if (!filep->private_data) { - return (-EINVAL); - } - minor = ((struct bf_listener *)filep->private_data)->minor; - if (minor >= BF_MAX_DEVICE_CNT) { - return (-EINVAL); - } - if (mode == 0 && &bf_global[minor].async_queue == NULL) { - return 0; /* nothing to do */ - } - return (fasync_helper(fd, filep, mode, &bf_global[minor].async_queue)); -} - -static int bf_open(struct inode *inode, struct file *filep) -{ - struct bf_pci_dev *bfdev; - struct bf_listener *listener; - int i; - - bfdev = bf_global[iminor(inode)].bfdev; - listener = kmalloc(sizeof(*listener), GFP_KERNEL); - if (listener) { - listener->bfdev = bfdev; - listener->minor = bfdev->info.minor; - listener->next = NULL; - bf_add_listener(bfdev, listener); - for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) - listener->event_count[i] = atomic_read(&bfdev->info.event[i]); - filep->private_data = listener; - return 0; - } else { - return(-ENOMEM); - } -} - -static int bf_release(struct inode *inode, struct file *filep) -{ - struct bf_listener *listener = filep->private_data; - - bf_fasync(-1, filep, 0); /* empty any process id in the notification list */ - if (listener->bfdev) { - bf_remove_listener(listener->bfdev, listener); - } - kfree(listener); - return 0; -} - -/* user space support: make read() system call after poll() of select() */ -static ssize_t bf_read(struct file *filep, char __user *buf, - size_t count, loff_t *ppos) -{ - struct bf_listener *listener = filep->private_data; - struct bf_pci_dev *bfdev = listener->bfdev; - int retval, event_count[BF_MSIX_ENTRY_CNT]; - int i, mismatch_found = 0; /* OR of per vector mismatch */ - unsigned char cnt_match[BF_MSIX_ENTRY_CNT]; /* per vector mismatch */ - - if (!bfdev) { - return -ENODEV; - } - /* irq must be setup for read() to work */ - if (!bfdev->info.irq) - return -EIO; - - /* ensure that there is enough space on user buffer for the given interrupt - * mode */ - if (bfdev->mode == BF_INTR_MODE_MSIX) { - if (count < sizeof(s32)*BF_MSIX_ENTRY_CNT) - return -EINVAL; - count = sizeof(s32)*BF_MSIX_ENTRY_CNT; - } else if (bfdev->mode == BF_INTR_MODE_MSI) { - if (count < sizeof(s32)*BF_MSI_ENTRY_CNT) - return -EINVAL; - count = sizeof(s32)*BF_MSI_ENTRY_CNT; - } else { - if (count < sizeof(s32)) - return -EINVAL; - count = sizeof(s32); - } - - do { - set_current_state(TASK_INTERRUPTIBLE); - - for (i = 0; i < (count/sizeof(s32)); i++) { - event_count[i] = atomic_read(&(bfdev->info.event[i])); - if (event_count[i] != listener->event_count[i]) { - mismatch_found |= 1; - cnt_match[i] = 1; - } else { - event_count[i] = 0; - cnt_match[i] = 0; - } - } - if (mismatch_found) { - __set_current_state(TASK_RUNNING); - if (copy_to_user(buf, &event_count, count)) - retval = -EFAULT; - else { /* adjust the listener->event_count; */ - for (i = 0 ; i < (count/sizeof(s32)); i++) { - if (cnt_match[i]) { - listener->event_count[i] = event_count[i]; - } - } - retval = count; - } - break; - } - - if (filep->f_flags & O_NONBLOCK) { - retval = -EAGAIN; - break; - } - - if (signal_pending(current)) { - retval = -ERESTARTSYS; - break; - } - schedule(); - } while (1); - - __set_current_state(TASK_RUNNING); - - return retval; -} - -/* user space is supposed to call this after it is done with interrupt - * processing - */ -static ssize_t bf_write(struct file *filep, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct bf_listener *listener = filep->private_data; - struct bf_pci_dev *bfdev = listener->bfdev; - ssize_t ret; - s32 int_en; - - if (!bfdev || !bfdev->info.irq) - return -EIO; - - if (count != sizeof(s32)) - return -EINVAL; - - if (copy_from_user(&int_en, buf, count)) - return -EFAULT; - - /* clear pci_error_state */ - bfdev->info.pci_error_state = 0; - - ret = bf_pci_irqcontrol(bfdev, int_en); - - return ret ? ret : sizeof(s32); -} - -static const struct file_operations bf_fops = { - .owner = THIS_MODULE, - .open = bf_open, - .release = bf_release, - .read = bf_read, - .write = bf_write, - .mmap = bf_mmap, - .poll = bf_poll, - .fasync = bf_fasync, -}; - -static int bf_major_init(struct bf_pci_dev *bfdev, int minor) -{ - struct cdev *cdev; - static const char name[] = "bf"; - dev_t bf_dev = 0; - int result; - - result = alloc_chrdev_region(&bf_dev, 0, BF_MAX_DEVICE_CNT, name); - if (result) - return result; - - result = -ENOMEM; - cdev = cdev_alloc(); - if (!cdev) { - goto fail_dev_add; - } - cdev->ops = &bf_fops; - cdev->owner = THIS_MODULE; - kobject_set_name(&cdev->kobj, "%s", name); - result = cdev_add(cdev, bf_dev, BF_MAX_DEVICE_CNT); - - if (result) - goto fail_dev_add; - - bf_major = MAJOR(bf_dev); - bf_global[minor].bf_cdev = cdev; - return 0; - -fail_dev_add: - unregister_chrdev_region(bf_dev, BF_MAX_DEVICE_CNT); - return result; -} - -static void bf_major_cleanup(struct bf_pci_dev *bfdev, int minor) -{ - unregister_chrdev_region(MKDEV(bf_major, 0), BF_MAX_DEVICE_CNT); - cdev_del(bf_global[minor].bf_cdev); -} - -static int bf_init_cdev(struct bf_pci_dev *bfdev, int minor) -{ - int ret; - ret = bf_major_init(bfdev, minor); - if (ret) - return ret; - - bf_class = class_create(THIS_MODULE, BF_CLASS_NAME); - if (!bf_class) { - printk(KERN_ERR "create_class failed for bf_dev\n"); - ret = -ENODEV; - goto err_class_register; - } - return 0; - -err_class_register: - bf_major_cleanup(bfdev, minor); - return ret; -} - -static void bf_remove_cdev(struct bf_pci_dev *bfdev) -{ - class_destroy(bf_class); - bf_major_cleanup(bfdev, bfdev->info.minor); -} - - -/** - * bf_register_device - register a new userspace mem device - * @parent: parent device - * @bfdev: bf pci device - * - * returns zero on success or a negative error code. - */ -int bf_register_device(struct device *parent, struct bf_pci_dev *bfdev) -{ - struct bf_dev_info *info = &bfdev->info; - int i, j, ret = 0; - int minor; - - if (!parent || !info || !info->version) - return -EINVAL; - - init_waitqueue_head(&info->wait); - - for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) { - atomic_set(&info->event[i], 0); - } - - if (bf_get_next_minor_no(&minor)) { - return -EINVAL; - } - - ret = bf_init_cdev(bfdev, minor); - if (ret) { - printk(KERN_ERR "BF: device cdev creation failed\n"); - return ret; - } - - info->dev = device_create(bf_class, parent, - MKDEV(bf_major, minor), bfdev, - "bf%d", minor); - if (!info->dev) { - printk(KERN_ERR "BF: device creation failed\n"); - return -ENODEV; - } - - info->minor = minor; - - /* bind ISRs and request interrupts */ - if (info->irq && (bfdev->mode != BF_INTR_MODE_NONE)) { - /* - * Note that we deliberately don't use devm_request_irq - * here. The parent module can unregister the UIO device - * and call pci_disable_msi, which requires that this - * irq has been freed. However, the device may have open - * FDs at the time of unregister and therefore may not be - * freed until they are released. - */ - if (bfdev->mode == BF_INTR_MODE_LEGACY) { - ret = request_irq(info->irq, bf_interrupt, - info->irq_flags, bfdev->name, - (void *)&(bfdev->bf_int_vec[0])); - if (ret) { - printk(KERN_ERR "bf failed to request legacy irq %ld error %d\n", - info->irq, ret); - return ret; - } - printk(KERN_NOTICE "BF allocating legacy int vector %ld\n", info->irq); - } else if (bfdev->mode == BF_INTR_MODE_MSIX) { - for (i = 0; i < info->num_irq; i++) { - ret = request_irq(info->msix_entries[i].vector, bf_interrupt, - info->irq_flags, bfdev->name, - (void *)&(bfdev->bf_int_vec[i])); - if (ret) { - /* undo all other previous bindings */ - printk(KERN_ERR "bf failed to request MSIX ret %d itr %d\n", ret, i); - for (j = i - 1; j >= 0; j--) { - free_irq(info->msix_entries[j].vector, - (void *)&(bfdev->bf_int_vec[j])); - } - return ret; - } - } - printk(KERN_NOTICE "BF allocating %d MSIx vectors from %ld\n", - info->num_irq, info->irq); - } else if (bfdev->mode == BF_INTR_MODE_MSI) { - for (i = 0; i < info->num_irq; i++) { - ret = request_irq(info->irq + i, bf_interrupt, - info->irq_flags, bfdev->name, - (void *)&(bfdev->bf_int_vec[i])); - if (ret) { - /* undo all other previous bindings */ - printk(KERN_ERR "bf failed to request MSI ret %d itr %d\n", ret, i); - for (j = i - 1; j >= 0; j--) { - free_irq(info->irq + j, (void *)&(bfdev->bf_int_vec[j])); - } - return ret; - } - } - printk(KERN_NOTICE "BF allocating %d MSI vectors from %ld\n", - info->num_irq, info->irq); - } - } - return 0; -} - -/** - * bf_unregister_device - register a new userspace mem device - * @bfdev: bf pci device - * - * returns none - */ -void bf_unregister_device(struct bf_pci_dev *bfdev) -{ - struct bf_dev_info *info = &bfdev->info; - int i; - - if (info->irq) { - if (bfdev->mode == BF_INTR_MODE_LEGACY) { - free_irq(info->irq, (void *)&(bfdev->bf_int_vec[0])); - } else if (bfdev->mode == BF_INTR_MODE_MSIX) { - for (i = 0; i < info->num_irq; i++) { - free_irq(info->msix_entries[i].vector, (void *)&(bfdev->bf_int_vec[i])); - } - } else if (bfdev->mode == BF_INTR_MODE_MSI) { - for (i = 0; i < info->num_irq; i++) { - free_irq(info->irq + i, (void *)&(bfdev->bf_int_vec[i])); - } - } - } - device_destroy(bf_class, MKDEV(bf_major, info->minor)); - bf_remove_cdev(bfdev); - bf_return_minor_no(info->minor); - return; -} - -static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) -{ - return &pdev->dev; -} - -static int -bf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) -{ - struct bf_pci_dev *bfdev; - int err, pci_use_highmem; - int i, num_irq; - - memset(bf_global, 0, sizeof(bf_global)); - - bfdev = kzalloc(sizeof(struct bf_pci_dev), GFP_KERNEL); - if (!bfdev) - return -ENOMEM; - - /* init the cookies to be passed to ISRs */ - for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) { - bfdev->bf_int_vec[i].int_vec_offset = i; - bfdev->bf_int_vec[i].bf_dev = bfdev; - } - - /* initialize intr_mode to none */ - bfdev->mode = BF_INTR_MODE_NONE; - - /* clear pci_error_state */ - bfdev->info.pci_error_state = 0; - - /* - * enable device - */ - err = pci_enable_device(pdev); - if (err != 0) { - dev_err(&pdev->dev, "Cannot enable PCI device\n"); - goto fail_free; - } - - /* - * reserve device's PCI memory regions for use by this - * module - */ - err = pci_request_regions(pdev, "bf_umem"); - if (err != 0) { - dev_err(&pdev->dev, "Cannot request regions\n"); - goto fail_pci_disable; - } - /* remap IO memory */ - err = bf_setup_bars(pdev, &bfdev->info); - if (err != 0) - goto fail_release_iomem; - - if (!dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)) && - !dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64))) { - pci_use_highmem = 1; - } else { - err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); - if (err) { - err = dma_set_coherent_mask(pci_dev_to_dev(pdev), - DMA_BIT_MASK(32)); - if (err) { - dev_err(pci_dev_to_dev(pdev), "No usable DMA " - "configuration, aborting\n"); - goto fail_release_iomem; - } - } - pci_use_highmem = 0; - } - - /* enable pci error reporting */ - /* for the current kernel version, kernel config must have set the followings: - * CONFIG_PCIEPORTBUS=y and CONFIG_PCIEAER = y - * we have pci_error_handlers defined that gets invoked by kernel AER module - * upon detecting the pcie error on this device's addresses. - * However, there seems no way that AER would pass the offending addresses - * to the callback functions. AER logs the error messages on the console. - * This driver's calback function send the SIGIO signal to the user space - * to indicate the error condition. - */ - pci_enable_pcie_error_reporting(pdev); - - /* enable bus mastering on the device */ - pci_set_master(pdev); - - /* fill in bfdev info */ - bfdev->info.version = "0.2"; - bfdev->info.owner = THIS_MODULE; - bfdev->pdev = pdev; - - switch (bf_intr_mode_default) { -#ifdef CONFIG_PCI_MSI - case BF_INTR_MODE_MSIX: - /* Only 1 msi-x vector needed */ - bfdev->info.msix_entries = kcalloc(BF_MSIX_ENTRY_CNT, - sizeof(struct msix_entry), GFP_KERNEL); - if (!bfdev->info.msix_entries) { - err = -ENOMEM; - goto fail_clear_pci_master; - } - for (i = 0; i < BF_MSIX_ENTRY_CNT; i++) { - bfdev->info.msix_entries[i].entry= i; - } - num_irq = pci_enable_msix_range(pdev, bfdev->info.msix_entries, - BF_MSIX_ENTRY_CNT, BF_MSIX_ENTRY_CNT); - if (num_irq == BF_MSIX_ENTRY_CNT) { - dev_dbg(&pdev->dev, "using MSI-X"); - bfdev->info.num_irq = num_irq; - bfdev->info.irq = bfdev->info.msix_entries[0].vector; - bfdev->mode = BF_INTR_MODE_MSIX; - printk(KERN_DEBUG "bf using %d MSIX irq from %ld\n", num_irq, - bfdev->info.irq); - break; - } else { - if (num_irq) - pci_disable_msix(pdev); - kfree(bfdev->info.msix_entries); - bfdev->info.msix_entries = NULL; - printk(KERN_ERR "bf error allocating MSIX vectors. Trying MSI...\n"); - /* and, fall back to MSI */ - } - /* ** intentional no-break */ - case BF_INTR_MODE_MSI: - num_irq = pci_enable_msi_range(pdev, BF_MSI_ENTRY_CNT, BF_MSI_ENTRY_CNT); - if (num_irq > 0) { - dev_dbg(&pdev->dev, "using MSI"); - bfdev->info.num_irq = num_irq; - bfdev->info.irq = pdev->irq; - bfdev->mode = BF_INTR_MODE_MSI; - printk(KERN_DEBUG "bf using %d MSI irq from %ld\n", bfdev->info.num_irq, - bfdev->info.irq); - break; - } -#endif /* CONFIG_PCI_MSI */ - /* fall back to Legacy Interrupt, intentional no-break */ - - case BF_INTR_MODE_LEGACY: - if (pci_intx_mask_supported(pdev)) { - dev_dbg(&pdev->dev, "using INTX"); - bfdev->info.irq_flags = IRQF_SHARED; - bfdev->info.irq = pdev->irq; - bfdev->mode = BF_INTR_MODE_LEGACY; - printk(KERN_DEBUG "bf using LEGACY irq %ld\n", bfdev->info.irq); - break; - } - dev_notice(&pdev->dev, "PCI INTx mask not supported\n"); - /* fall back to no Interrupt, intentional no-break */ - case BF_INTR_MODE_NONE: - bfdev->info.irq = 0; - bfdev->info.num_irq = 0; - bfdev->mode = BF_INTR_MODE_NONE; - break; - - default: - dev_err(&pdev->dev, "invalid IRQ mode %u", bf_intr_mode_default); - err = -EINVAL; - goto fail_clear_pci_master; - } - - pci_set_drvdata(pdev, bfdev); - sprintf(bfdev->name, "bf_%d", bfdev->info.minor); - /* register bf driver */ - err = bf_register_device(&pdev->dev, bfdev); - if (err != 0) - goto fail_release_irq; - - bf_global[bfdev->info.minor].async_queue = NULL; - bf_global[bfdev->info.minor].bfdev = bfdev; - - dev_info(&pdev->dev, "bf device %d registered with irq %ld\n", - bfdev->instance, bfdev->info.irq); - printk(KERN_ALERT "bf probe ok\n"); - return 0; - -fail_release_irq: - pci_set_drvdata(pdev, NULL); - if (bfdev->mode == BF_INTR_MODE_MSIX) { - pci_disable_msix(bfdev->pdev); - kfree(bfdev->info.msix_entries); - bfdev->info.msix_entries = NULL; - } - else if (bfdev->mode == BF_INTR_MODE_MSI) - pci_disable_msi(bfdev->pdev); -fail_clear_pci_master: - pci_clear_master(pdev); -fail_release_iomem: - bf_pci_release_iomem(&bfdev->info); - pci_release_regions(pdev); -fail_pci_disable: - pci_disable_device(pdev); -fail_free: - kfree(bfdev); - - printk(KERN_ERR "bf probe not ok\n"); - return err; -} - -static void -bf_pci_remove(struct pci_dev *pdev) -{ - struct bf_pci_dev *bfdev = pci_get_drvdata(pdev); - struct bf_listener *cur_listener; - - bf_unregister_device(bfdev); - if (bfdev->mode == BF_INTR_MODE_MSIX) { - pci_disable_msix(pdev); - kfree(bfdev->info.msix_entries); - bfdev->info.msix_entries = NULL; - } - else if (bfdev->mode == BF_INTR_MODE_MSI) - pci_disable_msi(pdev); - pci_clear_master(pdev); - bf_pci_release_iomem(&bfdev->info); - pci_release_regions(pdev); - pci_disable_pcie_error_reporting(pdev); - pci_disable_device(pdev); - pci_set_drvdata(pdev, NULL); - bf_global[bfdev->info.minor].bfdev = NULL; - /* existing filep structures in open file(s) must be informed that - * bf_pci_dev is no longer valid */ - spin_lock(&bf_nonisr_lock); - cur_listener = bfdev->listener_head; - while (cur_listener) { - cur_listener->bfdev = NULL; - cur_listener = cur_listener->next; - } - spin_unlock(&bf_nonisr_lock); - kfree(bfdev); -} - -/** - * bf_pci_error_detected - called when PCI error is detected - * @pdev: Pointer to PCI device - * @state: The current pci connection state - * - * called when root complex detects pci error associated with the device - */ -static pci_ers_result_t bf_pci_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct bf_pci_dev *bfdev = pci_get_drvdata(pdev); - int minor; - - if (!bfdev) { - return PCI_ERS_RESULT_NONE; - } - printk(KERN_ERR "pci_err_detected state %d\n", state); - if (state == pci_channel_io_perm_failure || state == pci_channel_io_frozen) { - bfdev->info.pci_error_state = 1; - /* send a signal to the user space program of the error */ - minor = bfdev->info.minor; - if (minor < BF_MAX_DEVICE_CNT && bf_global[minor].async_queue) { - kill_fasync(&bf_global[minor].async_queue, SIGIO, POLL_ERR); - } - return PCI_ERS_RESULT_DISCONNECT; - } else { - return PCI_ERS_RESULT_NONE; - } -} - -/** - * bf_pci_slot_reset - called after the pci bus has been reset. - * @pdev: Pointer to PCI device - * - * Restart the card from scratch, as if from a cold-boot. - */ -static pci_ers_result_t bf_pci_slot_reset(struct pci_dev *pdev) -{ - /* nothing to do for now as we do not expect to get backto normal after - * a pcie link reset - * TBD: fill in this function if tofino can recover after an error - */ - return PCI_ERS_RESULT_DISCONNECT; -} - -/** - * bf_pci_resume - called when kernel thinks the device is up on PCIe. - * @pdev: Pointer to PCI device - * - * This callback is called when the error recovery driver tells us that - * its OK to resume normal operation. - */ -static void bf_pci_resume(struct pci_dev *pdev) -{ - /* this function should never be called for Tofinoi */ - struct bf_pci_dev *bfdev = pci_get_drvdata(pdev); - - printk(KERN_ERR "BF io_resume invoked after pci error\n"); - if (bfdev) { - bfdev->info.pci_error_state = 0; - } -} - -static int -bf_config_intr_mode(char *intr_str) -{ - if (!intr_str) { - pr_info("Use MSIX interrupt by default\n"); - return 0; - } - - if (!strcmp(intr_str, BF_INTR_MODE_MSIX_NAME)) { - bf_intr_mode_default = BF_INTR_MODE_MSIX; - pr_info("Use MSIX interrupt\n"); - } else if (!strcmp(intr_str, BF_INTR_MODE_MSI_NAME)) { - bf_intr_mode_default = BF_INTR_MODE_MSI; - pr_info("Use MSI interrupt\n"); - } else if (!strcmp(intr_str, BF_INTR_MODE_LEGACY_NAME)) { - bf_intr_mode_default = BF_INTR_MODE_LEGACY; - pr_info("Use legacy interrupt\n"); - } else { - pr_info("Error: bad parameter - %s\n", intr_str); - return -EINVAL; - } - - return 0; -} - -static const struct pci_device_id bf_pci_tbl[] = { - {PCI_VDEVICE(BF, TOFINO_DEV_ID_A0), 0}, - {PCI_VDEVICE(BF, TOFINO_DEV_ID_B0), 0}, - /* required last entry */ - { .device = 0 } -}; - -/* PCI bus error handlers */ -static struct pci_error_handlers bf_pci_err_handler = { - .error_detected = bf_pci_error_detected, - .slot_reset = bf_pci_slot_reset, - .resume = bf_pci_resume, -}; - -static struct pci_driver bf_pci_driver = { - .name = "bf", - .id_table = bf_pci_tbl, - .probe = bf_pci_probe, - .remove = bf_pci_remove, - .err_handler = &bf_pci_err_handler -}; - -static int __init -bfdrv_init(void) -{ - int ret; - - ret = bf_config_intr_mode(intr_mode); - if (ret < 0) - return ret; - - spin_lock_init(&bf_nonisr_lock); - return pci_register_driver(&bf_pci_driver); -} - -static void __exit -bfdrv_exit(void) -{ - pci_unregister_driver(&bf_pci_driver); -} - -module_init(bfdrv_init); -module_exit(bfdrv_exit); - -module_param(intr_mode, charp, S_IRUGO); -MODULE_PARM_DESC(intr_mode, -"bf interrupt mode (default=msix):\n" -" " BF_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n" -" " BF_INTR_MODE_MSI_NAME " Use MSI interrupt\n" -" " BF_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n" -"\n"); - -MODULE_DEVICE_TABLE(pci, bf_pci_tbl); -MODULE_DESCRIPTION("Barefoot Tofino PCI device"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Barefoot Networks"); diff --git a/platform/barefoot/sonic-platform-modules-wnc-osw1800/modules/Makefile b/platform/barefoot/sonic-platform-modules-wnc-osw1800/modules/Makefile index 29b904dd32..f01694d73e 100644 --- a/platform/barefoot/sonic-platform-modules-wnc-osw1800/modules/Makefile +++ b/platform/barefoot/sonic-platform-modules-wnc-osw1800/modules/Makefile @@ -1,5 +1,3 @@ -obj-m := bf_kdrv.o -obj-m += bf_tun.o obj-m += i2c-mcp2221.o obj-m += wnc_cpld.o obj-m += wnc_cpld3.o diff --git a/platform/barefoot/sonic-platform-modules-wnc-osw1800/modules/bf_tun.c b/platform/barefoot/sonic-platform-modules-wnc-osw1800/modules/bf_tun.c deleted file mode 100644 index abb906a246..0000000000 --- a/platform/barefoot/sonic-platform-modules-wnc-osw1800/modules/bf_tun.c +++ /dev/null @@ -1,2616 +0,0 @@ -/* - * TUN - Universal TUN/TAP device driver. - * Copyright (C) 1999-2002 Maxim Krasnyansky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $ - */ - -/* - * Changes: - * - * Mike Kershaw 2005/08/14 - * Add TUNSETLINK ioctl to set the link encapsulation - * - * Mark Smith - * Use eth_random_addr() for tap MAC address. - * - * Harald Roelle 2004/04/20 - * Fixes in packet dropping, queue length setting and queue wakeup. - * Increased default tx queue length. - * Added ethtool API. - * Minor cleanups - * - * Daniel Podlejski - * Modifications for 2.3.99-pre5 kernel. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#define DRV_NAME "bf_tun" -#define DRV_VERSION "1.6" -#define DRV_DESCRIPTION "Universal TUN/TAP device driver" -#define DRV_COPYRIGHT "(C) 1999-2004 Max Krasnyansky " - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define TUN_MINOR1 201 - -/* Uncomment to enable debugging */ -/* #define TUN_DEBUG 1 */ - -#ifdef TUN_DEBUG -static int debug; - -#define tun_debug(level, tun, fmt, args...) \ -do { \ - if (tun->debug) \ - netdev_printk(level, tun->dev, fmt, ##args); \ -} while (0) -#define DBG1(level, fmt, args...) \ -do { \ - if (debug == 2) \ - printk(level fmt, ##args); \ -} while (0) -#else -#define tun_debug(level, tun, fmt, args...) \ -do { \ - if (0) \ - netdev_printk(level, tun->dev, fmt, ##args); \ -} while (0) -#define DBG1(level, fmt, args...) \ -do { \ - if (0) \ - printk(level fmt, ##args); \ -} while (0) -#endif - -/* TUN device flags */ - -/* IFF_ATTACH_QUEUE is never stored in device flags, - * overload it to mean fasync when stored there. - */ -#define TUN_FASYNC IFF_ATTACH_QUEUE -/* High bits in flags field are unused. */ -#define TUN_VNET_LE 0x80000000 -#define TUN_VNET_BE 0x40000000 - -#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ - IFF_MULTI_QUEUE) -#define GOODCOPY_LEN 128 - -#define FLT_EXACT_COUNT 8 -struct tap_filter { - unsigned int count; /* Number of addrs. Zero means disabled */ - u32 mask[2]; /* Mask of the hashed addrs */ - unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; -}; - -/* MAX_TAP_QUEUES 256 is chosen to allow rx/tx queues to be equal - * to max number of VCPUs in guest. */ -#define MAX_TAP_QUEUES 256 -#define MAX_TAP_FLOWS 4096 - -#define TUN_FLOW_EXPIRE (3 * HZ) - -struct tun_pcpu_stats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; - u32 rx_dropped; - u32 tx_dropped; - u32 rx_frame_errors; -}; - -/* A tun_file connects an open character device to a tuntap netdevice. It - * also contains all socket related structures (except sock_fprog and tap_filter) - * to serve as one transmit queue for tuntap device. The sock_fprog and - * tap_filter were kept in tun_struct since they were used for filtering for the - * netdevice not for a specific queue (at least I didn't see the requirement for - * this). - * - * RCU usage: - * The tun_file and tun_struct are loosely coupled, the pointer from one to the - * other can only be read while rcu_read_lock or rtnl_lock is held. - */ -struct tun_file { - struct sock sk; - struct socket socket; - struct socket_wq wq; - struct tun_struct __rcu *tun; - struct fasync_struct *fasync; - /* only used for fasnyc */ - unsigned int flags; - union { - u16 queue_index; - unsigned int ifindex; - }; - struct list_head next; - struct tun_struct *detached; - struct skb_array tx_array; -}; - -struct tun_flow_entry { - struct hlist_node hash_link; - struct rcu_head rcu; - struct tun_struct *tun; - - u32 rxhash; - u32 rps_rxhash; - int queue_index; - unsigned long updated; -}; - -#define TUN_NUM_FLOW_ENTRIES 1024 - -/* Since the socket were moved to tun_file, to preserve the behavior of persist - * device, socket filter, sndbuf and vnet header size were restore when the - * file were attached to a persist device. - */ -struct tun_struct { - struct tun_file __rcu *tfiles[MAX_TAP_QUEUES]; - unsigned int numqueues; - unsigned int flags; - kuid_t owner; - kgid_t group; - - struct net_device *dev; - netdev_features_t set_features; -#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ - NETIF_F_TSO6|NETIF_F_UFO) - - int align; - int vnet_hdr_sz; - int sndbuf; - struct tap_filter txflt; - struct sock_fprog fprog; - /* protected by rtnl lock */ - bool filter_attached; -#ifdef TUN_DEBUG - int debug; -#endif - spinlock_t lock; - struct hlist_head flows[TUN_NUM_FLOW_ENTRIES]; - struct timer_list flow_gc_timer; - unsigned long ageing_time; - unsigned int numdisabled; - struct list_head disabled; - void *security; - u32 flow_count; - struct tun_pcpu_stats __percpu *pcpu_stats; -}; - -#ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) -{ - return tun->flags & TUN_VNET_BE ? false : - virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) -{ - int be = !!(tun->flags & TUN_VNET_BE); - - if (put_user(be, argp)) - return -EFAULT; - - return 0; -} - -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) -{ - int be; - - if (get_user(be, argp)) - return -EFAULT; - - if (be) - tun->flags |= TUN_VNET_BE; - else - tun->flags &= ~TUN_VNET_BE; - - return 0; -} -#else -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) -{ - return virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} - -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */ - -static inline bool tun_is_little_endian(struct tun_struct *tun) -{ - return tun->flags & TUN_VNET_LE || - tun_legacy_is_little_endian(tun); -} - -static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) -{ - return __virtio16_to_cpu(tun_is_little_endian(tun), val); -} - -static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) -{ - return __cpu_to_virtio16(tun_is_little_endian(tun), val); -} - -static inline u32 tun_hashfn(u32 rxhash) -{ - return rxhash & 0x3ff; -} - -static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash) -{ - struct tun_flow_entry *e; - - hlist_for_each_entry_rcu(e, head, hash_link) { - if (e->rxhash == rxhash) - return e; - } - return NULL; -} - -static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, - struct hlist_head *head, - u32 rxhash, u16 queue_index) -{ - struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC); - - if (e) { - tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n", - rxhash, queue_index); - e->updated = jiffies; - e->rxhash = rxhash; - e->rps_rxhash = 0; - e->queue_index = queue_index; - e->tun = tun; - hlist_add_head_rcu(&e->hash_link, head); - ++tun->flow_count; - } - return e; -} - -static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) -{ - tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", - e->rxhash, e->queue_index); - hlist_del_rcu(&e->hash_link); - kfree_rcu(e, rcu); - --tun->flow_count; -} - -static void tun_flow_flush(struct tun_struct *tun) -{ - int i; - - spin_lock_bh(&tun->lock); - for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { - struct tun_flow_entry *e; - struct hlist_node *n; - - hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) - tun_flow_delete(tun, e); - } - spin_unlock_bh(&tun->lock); -} - -static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index) -{ - int i; - - spin_lock_bh(&tun->lock); - for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { - struct tun_flow_entry *e; - struct hlist_node *n; - - hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { - if (e->queue_index == queue_index) - tun_flow_delete(tun, e); - } - } - spin_unlock_bh(&tun->lock); -} - -static void tun_flow_cleanup(unsigned long data) -{ - struct tun_struct *tun = (struct tun_struct *)data; - unsigned long delay = tun->ageing_time; - unsigned long next_timer = jiffies + delay; - unsigned long count = 0; - int i; - - tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n"); - - spin_lock_bh(&tun->lock); - for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { - struct tun_flow_entry *e; - struct hlist_node *n; - - hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { - unsigned long this_timer; - count++; - this_timer = e->updated + delay; - if (time_before_eq(this_timer, jiffies)) - tun_flow_delete(tun, e); - else if (time_before(this_timer, next_timer)) - next_timer = this_timer; - } - } - - if (count) - mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer)); - spin_unlock_bh(&tun->lock); -} - -static void tun_flow_update(struct tun_struct *tun, u32 rxhash, - struct tun_file *tfile) -{ - struct hlist_head *head; - struct tun_flow_entry *e; - unsigned long delay = tun->ageing_time; - u16 queue_index = tfile->queue_index; - - if (!rxhash) - return; - else - head = &tun->flows[tun_hashfn(rxhash)]; - - rcu_read_lock(); - - /* We may get a very small possibility of OOO during switching, not - * worth to optimize.*/ - if (tun->numqueues == 1 || tfile->detached) - goto unlock; - - e = tun_flow_find(head, rxhash); - if (likely(e)) { - /* TODO: keep queueing to old queue until it's empty? */ - e->queue_index = queue_index; - e->updated = jiffies; - sock_rps_record_flow_hash(e->rps_rxhash); - } else { - spin_lock_bh(&tun->lock); - if (!tun_flow_find(head, rxhash) && - tun->flow_count < MAX_TAP_FLOWS) - tun_flow_create(tun, head, rxhash, queue_index); - - if (!timer_pending(&tun->flow_gc_timer)) - mod_timer(&tun->flow_gc_timer, - round_jiffies_up(jiffies + delay)); - spin_unlock_bh(&tun->lock); - } - -unlock: - rcu_read_unlock(); -} - -/** - * Save the hash received in the stack receive path and update the - * flow_hash table accordingly. - */ -static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) -{ - if (unlikely(e->rps_rxhash != hash)) - e->rps_rxhash = hash; -} - -/* We try to identify a flow through its rxhash first. The reason that - * we do not check rxq no. is because some cards(e.g 82599), chooses - * the rxq based on the txq where the last packet of the flow comes. As - * the userspace application move between processors, we may get a - * different rxq no. here. If we could not get rxhash, then we would - * hope the rxq no. may help here. - */ -static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv, select_queue_fallback_t fallback) -{ - struct tun_struct *tun = netdev_priv(dev); - struct tun_flow_entry *e; - u32 txq = 0; - u32 numqueues = 0; - - rcu_read_lock(); - numqueues = ACCESS_ONCE(tun->numqueues); - - txq = skb_get_hash(skb); - if (txq) { - e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); - if (e) { - tun_flow_save_rps_rxhash(e, txq); - txq = e->queue_index; - } else - /* use multiply and shift instead of expensive divide */ - txq = ((u64)txq * numqueues) >> 32; - } else if (likely(skb_rx_queue_recorded(skb))) { - txq = skb_get_rx_queue(skb); - while (unlikely(txq >= numqueues)) - txq -= numqueues; - } - - rcu_read_unlock(); - return txq; -} - -static inline bool tun_not_capable(struct tun_struct *tun) -{ - const struct cred *cred = current_cred(); - struct net *net = dev_net(tun->dev); - - return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || - (gid_valid(tun->group) && !in_egroup_p(tun->group))) && - !ns_capable(net->user_ns, CAP_NET_ADMIN); -} - -static void tun_set_real_num_queues(struct tun_struct *tun) -{ - netif_set_real_num_tx_queues(tun->dev, tun->numqueues); - netif_set_real_num_rx_queues(tun->dev, tun->numqueues); -} - -static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile) -{ - tfile->detached = tun; - list_add_tail(&tfile->next, &tun->disabled); - ++tun->numdisabled; -} - -static struct tun_struct *tun_enable_queue(struct tun_file *tfile) -{ - struct tun_struct *tun = tfile->detached; - - tfile->detached = NULL; - list_del_init(&tfile->next); - --tun->numdisabled; - return tun; -} - -static void tun_queue_purge(struct tun_file *tfile) -{ - struct sk_buff *skb; - - while ((skb = skb_array_consume(&tfile->tx_array)) != NULL) - kfree_skb(skb); - - skb_queue_purge(&tfile->sk.sk_error_queue); -} - -static void tun_cleanup_tx_array(struct tun_file *tfile) -{ - if (tfile->tx_array.ring.queue) { - skb_array_cleanup(&tfile->tx_array); - memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); - } -} - -static void __tun_detach(struct tun_file *tfile, bool clean) -{ - struct tun_file *ntfile; - struct tun_struct *tun; - - tun = rtnl_dereference(tfile->tun); - - if (tun && !tfile->detached) { - u16 index = tfile->queue_index; - BUG_ON(index >= tun->numqueues); - - rcu_assign_pointer(tun->tfiles[index], - tun->tfiles[tun->numqueues - 1]); - ntfile = rtnl_dereference(tun->tfiles[index]); - ntfile->queue_index = index; - - --tun->numqueues; - if (clean) { - RCU_INIT_POINTER(tfile->tun, NULL); - sock_put(&tfile->sk); - } else - tun_disable_queue(tun, tfile); - - synchronize_net(); - tun_flow_delete_by_queue(tun, tun->numqueues + 1); - /* Drop read queue */ - tun_queue_purge(tfile); - tun_set_real_num_queues(tun); - } else if (tfile->detached && clean) { - tun = tun_enable_queue(tfile); - sock_put(&tfile->sk); - } - - if (clean) { - if (tun && tun->numqueues == 0 && tun->numdisabled == 0) { - netif_carrier_off(tun->dev); - - if (!(tun->flags & IFF_PERSIST) && - tun->dev->reg_state == NETREG_REGISTERED) - unregister_netdevice(tun->dev); - } - tun_cleanup_tx_array(tfile); - sock_put(&tfile->sk); - } -} - -static void tun_detach(struct tun_file *tfile, bool clean) -{ - rtnl_lock(); - __tun_detach(tfile, clean); - rtnl_unlock(); -} - -static void tun_detach_all(struct net_device *dev) -{ - struct tun_struct *tun = netdev_priv(dev); - struct tun_file *tfile, *tmp; - int i, n = tun->numqueues; - - for (i = 0; i < n; i++) { - tfile = rtnl_dereference(tun->tfiles[i]); - BUG_ON(!tfile); - tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; - tfile->socket.sk->sk_data_ready(tfile->socket.sk); - RCU_INIT_POINTER(tfile->tun, NULL); - --tun->numqueues; - } - list_for_each_entry(tfile, &tun->disabled, next) { - tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; - tfile->socket.sk->sk_data_ready(tfile->socket.sk); - RCU_INIT_POINTER(tfile->tun, NULL); - } - BUG_ON(tun->numqueues != 0); - - synchronize_net(); - for (i = 0; i < n; i++) { - tfile = rtnl_dereference(tun->tfiles[i]); - /* Drop read queue */ - tun_queue_purge(tfile); - sock_put(&tfile->sk); - tun_cleanup_tx_array(tfile); - } - list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { - tun_enable_queue(tfile); - tun_queue_purge(tfile); - sock_put(&tfile->sk); - tun_cleanup_tx_array(tfile); - } - BUG_ON(tun->numdisabled != 0); - - if (tun->flags & IFF_PERSIST) - module_put(THIS_MODULE); -} - -static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter) -{ - struct tun_file *tfile = file->private_data; - struct net_device *dev = tun->dev; - int err; - - err = security_tun_dev_attach(tfile->socket.sk, tun->security); - if (err < 0) - goto out; - - err = -EINVAL; - if (rtnl_dereference(tfile->tun) && !tfile->detached) - goto out; - - err = -EBUSY; - if (!(tun->flags & IFF_MULTI_QUEUE) && tun->numqueues == 1) - goto out; - - err = -E2BIG; - if (!tfile->detached && - tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES) - goto out; - - err = 0; - - /* Re-attach the filter to persist device */ - if (!skip_filter && (tun->filter_attached == true)) { - lock_sock(tfile->socket.sk); - err = sk_attach_filter(&tun->fprog, tfile->socket.sk); - release_sock(tfile->socket.sk); - if (!err) - goto out; - } - - if (!tfile->detached && - skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) { - err = -ENOMEM; - goto out; - } - - tfile->queue_index = tun->numqueues; - tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; - rcu_assign_pointer(tfile->tun, tun); - rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); - tun->numqueues++; - - if (tfile->detached) - tun_enable_queue(tfile); - else - sock_hold(&tfile->sk); - - tun_set_real_num_queues(tun); - - /* device is allowed to go away first, so no need to hold extra - * refcnt. - */ - -out: - return err; -} - -static struct tun_struct *__tun_get(struct tun_file *tfile) -{ - struct tun_struct *tun; - - rcu_read_lock(); - tun = rcu_dereference(tfile->tun); - if (tun) - dev_hold(tun->dev); - rcu_read_unlock(); - - return tun; -} - -static struct tun_struct *tun_get(struct file *file) -{ - return __tun_get(file->private_data); -} - -static void tun_put(struct tun_struct *tun) -{ - dev_put(tun->dev); -} - -/* TAP filtering */ -static void addr_hash_set(u32 *mask, const u8 *addr) -{ - int n = ether_crc(ETH_ALEN, addr) >> 26; - mask[n >> 5] |= (1 << (n & 31)); -} - -static unsigned int addr_hash_test(const u32 *mask, const u8 *addr) -{ - int n = ether_crc(ETH_ALEN, addr) >> 26; - return mask[n >> 5] & (1 << (n & 31)); -} - -static int update_filter(struct tap_filter *filter, void __user *arg) -{ - struct { u8 u[ETH_ALEN]; } *addr; - struct tun_filter uf; - int err, alen, n, nexact; - - if (copy_from_user(&uf, arg, sizeof(uf))) - return -EFAULT; - - if (!uf.count) { - /* Disabled */ - filter->count = 0; - return 0; - } - - alen = ETH_ALEN * uf.count; - addr = memdup_user(arg + sizeof(uf), alen); - if (IS_ERR(addr)) - return PTR_ERR(addr); - - /* The filter is updated without holding any locks. Which is - * perfectly safe. We disable it first and in the worst - * case we'll accept a few undesired packets. */ - filter->count = 0; - wmb(); - - /* Use first set of addresses as an exact filter */ - for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++) - memcpy(filter->addr[n], addr[n].u, ETH_ALEN); - - nexact = n; - - /* Remaining multicast addresses are hashed, - * unicast will leave the filter disabled. */ - memset(filter->mask, 0, sizeof(filter->mask)); - for (; n < uf.count; n++) { - if (!is_multicast_ether_addr(addr[n].u)) { - err = 0; /* no filter */ - goto free_addr; - } - addr_hash_set(filter->mask, addr[n].u); - } - - /* For ALLMULTI just set the mask to all ones. - * This overrides the mask populated above. */ - if ((uf.flags & TUN_FLT_ALLMULTI)) - memset(filter->mask, ~0, sizeof(filter->mask)); - - /* Now enable the filter */ - wmb(); - filter->count = nexact; - - /* Return the number of exact filters */ - err = nexact; -free_addr: - kfree(addr); - return err; -} - -/* Returns: 0 - drop, !=0 - accept */ -static int run_filter(struct tap_filter *filter, const struct sk_buff *skb) -{ - /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect - * at this point. */ - struct ethhdr *eh = (struct ethhdr *) skb->data; - int i; - - /* Exact match */ - for (i = 0; i < filter->count; i++) - if (ether_addr_equal(eh->h_dest, filter->addr[i])) - return 1; - - /* Inexact match (multicast only) */ - if (is_multicast_ether_addr(eh->h_dest)) - return addr_hash_test(filter->mask, eh->h_dest); - - return 0; -} - -/* - * Checks whether the packet is accepted or not. - * Returns: 0 - drop, !=0 - accept - */ -static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) -{ - if (!filter->count) - return 1; - - return run_filter(filter, skb); -} - -/* Network device part of the driver */ - -static const struct ethtool_ops tun_ethtool_ops; - -/* Net device detach from fd. */ -static void tun_net_uninit(struct net_device *dev) -{ - tun_detach_all(dev); -} - -/* Net device open. */ -static int tun_net_open(struct net_device *dev) -{ - struct tun_struct *tun = netdev_priv(dev); - int i; - - netif_tx_start_all_queues(dev); - - for (i = 0; i < tun->numqueues; i++) { - struct tun_file *tfile; - - tfile = rtnl_dereference(tun->tfiles[i]); - tfile->socket.sk->sk_write_space(tfile->socket.sk); - } - - return 0; -} - -/* Net device close. */ -static int tun_net_close(struct net_device *dev) -{ - netif_tx_stop_all_queues(dev); - return 0; -} - -/* Net device start xmit */ -static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct tun_struct *tun = netdev_priv(dev); - int txq = skb->queue_mapping; - struct tun_file *tfile; - u32 numqueues = 0; - - rcu_read_lock(); - tfile = rcu_dereference(tun->tfiles[txq]); - numqueues = ACCESS_ONCE(tun->numqueues); - - /* Drop packet if interface is not attached */ - if (txq >= numqueues) - goto drop; - -#ifdef CONFIG_RPS - if (numqueues == 1 && static_key_false(&rps_needed)) { - /* Select queue was not called for the skbuff, so we extract the - * RPS hash and save it into the flow_table here. - */ - __u32 rxhash; - - rxhash = skb_get_hash(skb); - if (rxhash) { - struct tun_flow_entry *e; - e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], - rxhash); - if (e) - tun_flow_save_rps_rxhash(e, rxhash); - } - } -#endif - - tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); - - BUG_ON(!tfile); - - /* Drop if the filter does not like it. - * This is a noop if the filter is disabled. - * Filter can be enabled only for the TAP devices. */ - if (!check_filter(&tun->txflt, skb)) - goto drop; - - if (tfile->socket.sk->sk_filter && - sk_filter(tfile->socket.sk, skb)) - goto drop; - - /* Limit the number of packets queued by dividing txq length with the - * number of queues. - */ - if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) * numqueues - >= dev->tx_queue_len) - goto drop; - - if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) - goto drop; - - skb_tx_timestamp(skb); - - /* Orphan the skb - required as we might hang on to it - * for indefinite time. - */ - skb_orphan(skb); - - nf_reset(skb); - - if (skb_array_produce(&tfile->tx_array, skb)) - goto drop; - - /* Notify and wake up reader process */ - if (tfile->flags & TUN_FASYNC) - kill_fasync(&tfile->fasync, SIGIO, POLL_IN); - tfile->socket.sk->sk_data_ready(tfile->socket.sk); - - rcu_read_unlock(); - return NETDEV_TX_OK; - -drop: - this_cpu_inc(tun->pcpu_stats->tx_dropped); - skb_tx_error(skb); - kfree_skb(skb); - rcu_read_unlock(); - return NET_XMIT_DROP; -} - -static void tun_net_mclist(struct net_device *dev) -{ - /* - * This callback is supposed to deal with mc filter in - * _rx_ path and has nothing to do with the _tx_ path. - * In rx path we always accept everything userspace gives us. - */ -} - -#define MIN_MTU 68 -#define MAX_MTU 65535 - -static int -tun_net_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < MIN_MTU || new_mtu + dev->hard_header_len > MAX_MTU) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - -static netdev_features_t tun_net_fix_features(struct net_device *dev, - netdev_features_t features) -{ - struct tun_struct *tun = netdev_priv(dev); - - return (features & tun->set_features) | (features & ~TUN_USER_FEATURES); -} -#ifdef CONFIG_NET_POLL_CONTROLLER -static void tun_poll_controller(struct net_device *dev) -{ - /* - * Tun only receives frames when: - * 1) the char device endpoint gets data from user space - * 2) the tun socket gets a sendmsg call from user space - * Since both of those are synchronous operations, we are guaranteed - * never to have pending data when we poll for it - * so there is nothing to do here but return. - * We need this though so netpoll recognizes us as an interface that - * supports polling, which enables bridge devices in virt setups to - * still use netconsole - */ - return; -} -#endif - -static void tun_set_headroom(struct net_device *dev, int new_hr) -{ - struct tun_struct *tun = netdev_priv(dev); - - if (new_hr < NET_SKB_PAD) - new_hr = NET_SKB_PAD; - - tun->align = new_hr; -} - -static struct rtnl_link_stats64 * -tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) -{ - u32 rx_dropped = 0, tx_dropped = 0, rx_frame_errors = 0; - struct tun_struct *tun = netdev_priv(dev); - struct tun_pcpu_stats *p; - int i; - - for_each_possible_cpu(i) { - u64 rxpackets, rxbytes, txpackets, txbytes; - unsigned int start; - - p = per_cpu_ptr(tun->pcpu_stats, i); - do { - start = u64_stats_fetch_begin(&p->syncp); - rxpackets = p->rx_packets; - rxbytes = p->rx_bytes; - txpackets = p->tx_packets; - txbytes = p->tx_bytes; - } while (u64_stats_fetch_retry(&p->syncp, start)); - - stats->rx_packets += rxpackets; - stats->rx_bytes += rxbytes; - stats->tx_packets += txpackets; - stats->tx_bytes += txbytes; - - /* u32 counters */ - rx_dropped += p->rx_dropped; - rx_frame_errors += p->rx_frame_errors; - tx_dropped += p->tx_dropped; - } - stats->rx_dropped = rx_dropped; - stats->rx_frame_errors = rx_frame_errors; - stats->tx_dropped = tx_dropped; - return stats; -} - -static int -tun_change_carrier(struct net_device *dev, bool new_carrier) { - if (new_carrier) - netif_carrier_on(dev); - else - netif_carrier_off(dev); - return 0; -} - -static const struct net_device_ops tun_netdev_ops = { - .ndo_uninit = tun_net_uninit, - .ndo_open = tun_net_open, - .ndo_stop = tun_net_close, - .ndo_start_xmit = tun_net_xmit, - .ndo_change_mtu = tun_net_change_mtu, - .ndo_fix_features = tun_net_fix_features, - .ndo_select_queue = tun_select_queue, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = tun_poll_controller, -#endif - .ndo_set_rx_headroom = tun_set_headroom, - .ndo_get_stats64 = tun_net_get_stats64, - .ndo_change_carrier = tun_change_carrier, -}; - -static const struct net_device_ops tap_netdev_ops = { - .ndo_uninit = tun_net_uninit, - .ndo_open = tun_net_open, - .ndo_stop = tun_net_close, - .ndo_start_xmit = tun_net_xmit, - .ndo_change_mtu = tun_net_change_mtu, - .ndo_fix_features = tun_net_fix_features, - .ndo_set_rx_mode = tun_net_mclist, - .ndo_set_mac_address = eth_mac_addr, - .ndo_validate_addr = eth_validate_addr, - .ndo_select_queue = tun_select_queue, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = tun_poll_controller, -#endif - .ndo_features_check = passthru_features_check, - .ndo_set_rx_headroom = tun_set_headroom, - .ndo_get_stats64 = tun_net_get_stats64, -}; - -static void tun_flow_init(struct tun_struct *tun) -{ - int i; - - for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) - INIT_HLIST_HEAD(&tun->flows[i]); - - tun->ageing_time = TUN_FLOW_EXPIRE; - setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun); - mod_timer(&tun->flow_gc_timer, - round_jiffies_up(jiffies + tun->ageing_time)); -} - -static void tun_flow_uninit(struct tun_struct *tun) -{ - del_timer_sync(&tun->flow_gc_timer); - tun_flow_flush(tun); -} - -/* Initialize net device. */ -static void tun_net_init(struct net_device *dev) -{ - struct tun_struct *tun = netdev_priv(dev); - - switch (tun->flags & TUN_TYPE_MASK) { - case IFF_TUN: - dev->netdev_ops = &tun_netdev_ops; - - /* Point-to-Point TUN Device */ - dev->hard_header_len = 0; - dev->addr_len = 0; - dev->mtu = 1500; - - /* Zero header length */ - dev->type = ARPHRD_NONE; - dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; - break; - - case IFF_TAP: - dev->netdev_ops = &tap_netdev_ops; - /* Ethernet TAP Device */ - ether_setup(dev); - dev->priv_flags &= ~IFF_TX_SKB_SHARING; - dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - - eth_hw_addr_random(dev); - - break; - } -} - -/* Character device part */ - -/* Poll */ -static unsigned int tun_chr_poll(struct file *file, poll_table *wait) -{ - struct tun_file *tfile = file->private_data; - struct tun_struct *tun = __tun_get(tfile); - struct sock *sk; - unsigned int mask = 0; - - if (!tun) - return POLLERR; - - sk = tfile->socket.sk; - - tun_debug(KERN_INFO, tun, "tun_chr_poll\n"); - - poll_wait(file, sk_sleep(sk), wait); - - if (!skb_array_empty(&tfile->tx_array)) - mask |= POLLIN | POLLRDNORM; - - if (tun->dev->flags & IFF_UP && - (sock_writeable(sk) || - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && - sock_writeable(sk)))) - mask |= POLLOUT | POLLWRNORM; - - if (tun->dev->reg_state != NETREG_REGISTERED) - mask = POLLERR; - - tun_put(tun); - return mask; -} - -/* prepad is the amount to reserve at front. len is length after that. - * linear is a hint as to how much to copy (usually headers). */ -static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, - size_t prepad, size_t len, - size_t linear, int noblock) -{ - struct sock *sk = tfile->socket.sk; - struct sk_buff *skb; - int err; - - /* Under a page? Don't bother with paged skb. */ - if (prepad + len < PAGE_SIZE || !linear) - linear = len; - - skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, - &err, 0); - if (!skb) - return ERR_PTR(err); - - skb_reserve(skb, prepad); - skb_put(skb, linear); - skb->data_len = len - linear; - skb->len += len - linear; - - return skb; -} - -/* Get packet from user space buffer */ -static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, - void *msg_control, struct iov_iter *from, - int noblock) -{ - struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; - struct sk_buff *skb; - size_t total_len = iov_iter_count(from); - size_t len = total_len, align = tun->align, linear; - struct virtio_net_hdr gso = { 0 }; - struct tun_pcpu_stats *stats; - int good_linear; - int copylen; - bool zerocopy = false; - int err; - u32 rxhash; - ssize_t n; - - if (!(tun->dev->flags & IFF_UP)) - return -EIO; - - if (!(tun->flags & IFF_NO_PI)) { - if (len < sizeof(pi)) - return -EINVAL; - len -= sizeof(pi); - - n = copy_from_iter(&pi, sizeof(pi), from); - if (n != sizeof(pi)) - return -EFAULT; - } - - if (tun->flags & IFF_VNET_HDR) { - int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - - if (len < vnet_hdr_sz) - return -EINVAL; - len -= vnet_hdr_sz; - - n = copy_from_iter(&gso, sizeof(gso), from); - if (n != sizeof(gso)) - return -EFAULT; - - if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len)) - gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2); - - if (tun16_to_cpu(tun, gso.hdr_len) > len) - return -EINVAL; - iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); - } - - if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { - align += NET_IP_ALIGN; - if (unlikely(len < ETH_HLEN || - (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN))) - return -EINVAL; - } - - good_linear = SKB_MAX_HEAD(align); - - if (msg_control) { - struct iov_iter i = *from; - - /* There are 256 bytes to be copied in skb, so there is - * enough room for skb expand head in case it is used. - * The rest of the buffer is mapped from userspace. - */ - copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN; - if (copylen > good_linear) - copylen = good_linear; - linear = copylen; - iov_iter_advance(&i, copylen); - if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) - zerocopy = true; - } - - if (!zerocopy) { - copylen = len; - if (tun16_to_cpu(tun, gso.hdr_len) > good_linear) - linear = good_linear; - else - linear = tun16_to_cpu(tun, gso.hdr_len); - } - - skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); - if (IS_ERR(skb)) { - if (PTR_ERR(skb) != -EAGAIN) - this_cpu_inc(tun->pcpu_stats->rx_dropped); - return PTR_ERR(skb); - } - - if (zerocopy) - err = zerocopy_sg_from_iter(skb, from); - else - err = skb_copy_datagram_from_iter(skb, 0, from, len); - - if (err) { - this_cpu_inc(tun->pcpu_stats->rx_dropped); - kfree_skb(skb); - return -EFAULT; - } - - err = virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun)); - if (err) { - this_cpu_inc(tun->pcpu_stats->rx_frame_errors); - kfree_skb(skb); - return -EINVAL; - } - - switch (tun->flags & TUN_TYPE_MASK) { - case IFF_TUN: - if (tun->flags & IFF_NO_PI) { - u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0; - - switch (ip_version) { - case 4: - pi.proto = htons(ETH_P_IP); - break; - case 6: - pi.proto = htons(ETH_P_IPV6); - break; - default: - this_cpu_inc(tun->pcpu_stats->rx_dropped); - kfree_skb(skb); - return -EINVAL; - } - } - - skb_reset_mac_header(skb); - skb->protocol = pi.proto; - skb->dev = tun->dev; - break; - case IFF_TAP: - skb->protocol = eth_type_trans(skb, tun->dev); - break; - } - - /* copy skb_ubuf_info for callback when skb has no error */ - if (zerocopy) { - skb_shinfo(skb)->destructor_arg = msg_control; - skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; - skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; - } else if (msg_control) { - struct ubuf_info *uarg = msg_control; - uarg->callback(uarg, false); - } - - skb_reset_network_header(skb); - skb_probe_transport_header(skb, 0); - - rxhash = skb_get_hash(skb); - netif_rx_ni(skb); - - stats = get_cpu_ptr(tun->pcpu_stats); - u64_stats_update_begin(&stats->syncp); - stats->rx_packets++; - stats->rx_bytes += len; - u64_stats_update_end(&stats->syncp); - put_cpu_ptr(stats); - - tun_flow_update(tun, rxhash, tfile); - return total_len; -} - -static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) -{ - struct file *file = iocb->ki_filp; - struct tun_struct *tun = tun_get(file); - struct tun_file *tfile = file->private_data; - ssize_t result; - - if (!tun) - return -EBADFD; - - result = tun_get_user(tun, tfile, NULL, from, file->f_flags & O_NONBLOCK); - - tun_put(tun); - return result; -} - -/* Put packet to the user space buffer */ -static ssize_t tun_put_user(struct tun_struct *tun, - struct tun_file *tfile, - struct sk_buff *skb, - struct iov_iter *iter) -{ - struct tun_pi pi = { 0, skb->protocol }; - struct tun_pcpu_stats *stats; - ssize_t total; - int vlan_offset = 0; - int vlan_hlen = 0; - int vnet_hdr_sz = 0; - - if (skb_vlan_tag_present(skb)) - vlan_hlen = VLAN_HLEN; - - if (tun->flags & IFF_VNET_HDR) - vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - - total = skb->len + vlan_hlen + vnet_hdr_sz; - - if (!(tun->flags & IFF_NO_PI)) { - if (iov_iter_count(iter) < sizeof(pi)) - return -EINVAL; - - total += sizeof(pi); - if (iov_iter_count(iter) < total) { - /* Packet will be striped */ - pi.flags |= TUN_PKT_STRIP; - } - - if (copy_to_iter(&pi, sizeof(pi), iter) != sizeof(pi)) - return -EFAULT; - } - - if (vnet_hdr_sz) { - struct virtio_net_hdr gso = { 0 }; /* no info leak */ - int ret; - - if (iov_iter_count(iter) < vnet_hdr_sz) - return -EINVAL; - - ret = virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(tun), true); - if (ret) { - struct skb_shared_info *sinfo = skb_shinfo(skb); - pr_err("unexpected GSO type: " - "0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), - tun16_to_cpu(tun, gso.hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); - WARN_ON_ONCE(1); - return -EINVAL; - } - - if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso)) - return -EFAULT; - - iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); - } - - if (vlan_hlen) { - int ret; - struct { - __be16 h_vlan_proto; - __be16 h_vlan_TCI; - } veth; - - veth.h_vlan_proto = skb->vlan_proto; - veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); - - vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); - - ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset); - if (ret || !iov_iter_count(iter)) - goto done; - - ret = copy_to_iter(&veth, sizeof(veth), iter); - if (ret != sizeof(veth) || !iov_iter_count(iter)) - goto done; - } - - skb_copy_datagram_iter(skb, vlan_offset, iter, skb->len - vlan_offset); - -done: - /* caller is in process context, */ - stats = get_cpu_ptr(tun->pcpu_stats); - u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += skb->len + vlan_hlen; - u64_stats_update_end(&stats->syncp); - put_cpu_ptr(tun->pcpu_stats); - - return total; -} - -static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, - int *err) -{ - DECLARE_WAITQUEUE(wait, current); - struct sk_buff *skb = NULL; - int error = 0; - - skb = skb_array_consume(&tfile->tx_array); - if (skb) - goto out; - if (noblock) { - error = -EAGAIN; - goto out; - } - - add_wait_queue(&tfile->wq.wait, &wait); - current->state = TASK_INTERRUPTIBLE; - - while (1) { - skb = skb_array_consume(&tfile->tx_array); - if (skb) - break; - if (signal_pending(current)) { - error = -ERESTARTSYS; - break; - } - if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) { - error = -EFAULT; - break; - } - - schedule(); - } - - current->state = TASK_RUNNING; - remove_wait_queue(&tfile->wq.wait, &wait); - -out: - *err = error; - return skb; -} - -static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, - struct iov_iter *to, - int noblock) -{ - struct sk_buff *skb; - ssize_t ret; - int err; - - tun_debug(KERN_INFO, tun, "tun_do_read\n"); - - if (!iov_iter_count(to)) - return 0; - - /* Read frames from ring */ - skb = tun_ring_recv(tfile, noblock, &err); - if (!skb) - return err; - - ret = tun_put_user(tun, tfile, skb, to); - if (unlikely(ret < 0)) - kfree_skb(skb); - else - consume_skb(skb); - - return ret; -} - -static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) -{ - struct file *file = iocb->ki_filp; - struct tun_file *tfile = file->private_data; - struct tun_struct *tun = __tun_get(tfile); - ssize_t len = iov_iter_count(to), ret; - - if (!tun) - return -EBADFD; - ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK); - ret = min_t(ssize_t, ret, len); - if (ret > 0) - iocb->ki_pos = ret; - tun_put(tun); - return ret; -} - -static void tun_free_netdev(struct net_device *dev) -{ - struct tun_struct *tun = netdev_priv(dev); - - BUG_ON(!(list_empty(&tun->disabled))); - free_percpu(tun->pcpu_stats); - tun_flow_uninit(tun); - security_tun_dev_free_security(tun->security); - free_netdev(dev); -} - -static void tun_setup(struct net_device *dev) -{ - struct tun_struct *tun = netdev_priv(dev); - - tun->owner = INVALID_UID; - tun->group = INVALID_GID; - - dev->ethtool_ops = &tun_ethtool_ops; - dev->destructor = tun_free_netdev; - /* We prefer our own queue length */ - dev->tx_queue_len = TUN_READQ_SIZE; -} - -/* Trivial set of netlink ops to allow deleting tun or tap - * device with netlink. - */ -static int tun_validate(struct nlattr *tb[], struct nlattr *data[]) -{ - return -EINVAL; -} - -static struct rtnl_link_ops tun_link_ops __read_mostly = { - .kind = DRV_NAME, - .priv_size = sizeof(struct tun_struct), - .setup = tun_setup, - .validate = tun_validate, -}; - -static void tun_sock_write_space(struct sock *sk) -{ - struct tun_file *tfile; - wait_queue_head_t *wqueue; - - if (!sock_writeable(sk)) - return; - - if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) - return; - - wqueue = sk_sleep(sk); - if (wqueue && waitqueue_active(wqueue)) - wake_up_interruptible_sync_poll(wqueue, POLLOUT | - POLLWRNORM | POLLWRBAND); - - tfile = container_of(sk, struct tun_file, sk); - kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); -} - -static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) -{ - int ret; - struct tun_file *tfile = container_of(sock, struct tun_file, socket); - struct tun_struct *tun = __tun_get(tfile); - - if (!tun) - return -EBADFD; - - ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, - m->msg_flags & MSG_DONTWAIT); - tun_put(tun); - return ret; -} - -static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, - int flags) -{ - struct tun_file *tfile = container_of(sock, struct tun_file, socket); - struct tun_struct *tun = __tun_get(tfile); - int ret; - - if (!tun) - return -EBADFD; - - if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { - ret = -EINVAL; - goto out; - } - if (flags & MSG_ERRQUEUE) { - ret = sock_recv_errqueue(sock->sk, m, total_len, - SOL_PACKET, TUN_TX_TIMESTAMP); - goto out; - } - ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT); - if (ret > (ssize_t)total_len) { - m->msg_flags |= MSG_TRUNC; - ret = flags & MSG_TRUNC ? ret : total_len; - } -out: - tun_put(tun); - return ret; -} - -static int tun_peek_len(struct socket *sock) -{ - struct tun_file *tfile = container_of(sock, struct tun_file, socket); - struct tun_struct *tun; - int ret = 0; - - tun = __tun_get(tfile); - if (!tun) - return 0; - - ret = skb_array_peek_len(&tfile->tx_array); - tun_put(tun); - - return ret; -} - -/* Ops structure to mimic raw sockets with tun */ -static const struct proto_ops tun_socket_ops = { - .peek_len = tun_peek_len, - .sendmsg = tun_sendmsg, - .recvmsg = tun_recvmsg, -}; - -static struct proto tun_proto = { - .name = "tun", - .owner = THIS_MODULE, - .obj_size = sizeof(struct tun_file), -}; - -static int tun_flags(struct tun_struct *tun) -{ - return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP); -} - -static ssize_t tun_show_flags(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct tun_struct *tun = netdev_priv(to_net_dev(dev)); - return sprintf(buf, "0x%x\n", tun_flags(tun)); -} - -static ssize_t tun_show_owner(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct tun_struct *tun = netdev_priv(to_net_dev(dev)); - return uid_valid(tun->owner)? - sprintf(buf, "%u\n", - from_kuid_munged(current_user_ns(), tun->owner)): - sprintf(buf, "-1\n"); -} - -static ssize_t tun_show_group(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct tun_struct *tun = netdev_priv(to_net_dev(dev)); - return gid_valid(tun->group) ? - sprintf(buf, "%u\n", - from_kgid_munged(current_user_ns(), tun->group)): - sprintf(buf, "-1\n"); -} - -static DEVICE_ATTR(tun_flags, 0444, tun_show_flags, NULL); -static DEVICE_ATTR(owner, 0444, tun_show_owner, NULL); -static DEVICE_ATTR(group, 0444, tun_show_group, NULL); - -static struct attribute *tun_dev_attrs[] = { - &dev_attr_tun_flags.attr, - &dev_attr_owner.attr, - &dev_attr_group.attr, - NULL -}; - -static const struct attribute_group tun_attr_group = { - .attrs = tun_dev_attrs -}; - -static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) -{ - struct tun_struct *tun; - struct tun_file *tfile = file->private_data; - struct net_device *dev; - int err; - - if (tfile->detached) - return -EINVAL; - - dev = __dev_get_by_name(net, ifr->ifr_name); - if (dev) { - if (ifr->ifr_flags & IFF_TUN_EXCL) - return -EBUSY; - if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) - tun = netdev_priv(dev); - else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops) - tun = netdev_priv(dev); - else - return -EINVAL; - - if (!!(ifr->ifr_flags & IFF_MULTI_QUEUE) != - !!(tun->flags & IFF_MULTI_QUEUE)) - return -EINVAL; - - if (tun_not_capable(tun)) - return -EPERM; - err = security_tun_dev_open(tun->security); - if (err < 0) - return err; - - err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER); - if (err < 0) - return err; - - if (tun->flags & IFF_MULTI_QUEUE && - (tun->numqueues + tun->numdisabled > 1)) { - /* One or more queue has already been attached, no need - * to initialize the device again. - */ - return 0; - } - } - else { - char *name; - unsigned long flags = 0; - int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ? - MAX_TAP_QUEUES : 1; - - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - err = security_tun_dev_create(); - if (err < 0) - return err; - - /* Set dev type */ - if (ifr->ifr_flags & IFF_TUN) { - /* TUN device */ - flags |= IFF_TUN; - name = "tun%d"; - } else if (ifr->ifr_flags & IFF_TAP) { - /* TAP device */ - flags |= IFF_TAP; - name = "tap%d"; - } else - return -EINVAL; - - if (*ifr->ifr_name) - name = ifr->ifr_name; - - dev = alloc_netdev_mqs(sizeof(struct tun_struct), name, - NET_NAME_UNKNOWN, tun_setup, queues, - queues); - - if (!dev) - return -ENOMEM; -#if 0 - err = dev_get_valid_name(net, dev, name); - if (err < 0) - goto err_free_dev; -#endif - dev_net_set(dev, net); - dev->rtnl_link_ops = &tun_link_ops; - dev->ifindex = tfile->ifindex; - dev->sysfs_groups[0] = &tun_attr_group; - - tun = netdev_priv(dev); - tun->dev = dev; - tun->flags = flags; - tun->txflt.count = 0; - tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); - - tun->align = NET_SKB_PAD; - tun->filter_attached = false; - tun->sndbuf = tfile->socket.sk->sk_sndbuf; - - tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); - if (!tun->pcpu_stats) { - err = -ENOMEM; - goto err_free_dev; - } - - spin_lock_init(&tun->lock); - - err = security_tun_dev_alloc_security(&tun->security); - if (err < 0) - goto err_free_stat; - - tun_net_init(dev); - tun_flow_init(tun); - - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | - TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX; - dev->features = dev->hw_features | NETIF_F_LLTX; - dev->vlan_features = dev->features & - ~(NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - - INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file, false); - if (err < 0) - goto err_free_flow; - - err = register_netdevice(tun->dev); - if (err < 0) - goto err_detach; - } - - netif_carrier_on(tun->dev); - - tun_debug(KERN_INFO, tun, "tun_set_iff\n"); - - tun->flags = (tun->flags & ~TUN_FEATURES) | - (ifr->ifr_flags & TUN_FEATURES); - - /* Make sure persistent devices do not get stuck in - * xoff state. - */ - if (netif_running(tun->dev)) - netif_tx_wake_all_queues(tun->dev); - - strcpy(ifr->ifr_name, tun->dev->name); - return 0; - -err_detach: - tun_detach_all(dev); -err_free_flow: - tun_flow_uninit(tun); - security_tun_dev_free_security(tun->security); -err_free_stat: - free_percpu(tun->pcpu_stats); -err_free_dev: - free_netdev(dev); - return err; -} - -static void tun_get_iff(struct net *net, struct tun_struct *tun, - struct ifreq *ifr) -{ - tun_debug(KERN_INFO, tun, "tun_get_iff\n"); - - strcpy(ifr->ifr_name, tun->dev->name); - - ifr->ifr_flags = tun_flags(tun); - -} - -/* This is like a cut-down ethtool ops, except done via tun fd so no - * privs required. */ -static int set_offload(struct tun_struct *tun, unsigned long arg) -{ - netdev_features_t features = 0; - - if (arg & TUN_F_CSUM) { - features |= NETIF_F_HW_CSUM; - arg &= ~TUN_F_CSUM; - - if (arg & (TUN_F_TSO4|TUN_F_TSO6)) { - if (arg & TUN_F_TSO_ECN) { - features |= NETIF_F_TSO_ECN; - arg &= ~TUN_F_TSO_ECN; - } - if (arg & TUN_F_TSO4) - features |= NETIF_F_TSO; - if (arg & TUN_F_TSO6) - features |= NETIF_F_TSO6; - arg &= ~(TUN_F_TSO4|TUN_F_TSO6); - } - - if (arg & TUN_F_UFO) { - features |= NETIF_F_UFO; - arg &= ~TUN_F_UFO; - } - } - - /* This gives the user a way to test for new features in future by - * trying to set them. */ - if (arg) - return -EINVAL; - - tun->set_features = features; - netdev_update_features(tun->dev); - - return 0; -} - -static void tun_detach_filter(struct tun_struct *tun, int n) -{ - int i; - struct tun_file *tfile; - - for (i = 0; i < n; i++) { - tfile = rtnl_dereference(tun->tfiles[i]); - lock_sock(tfile->socket.sk); - sk_detach_filter(tfile->socket.sk); - release_sock(tfile->socket.sk); - } - - tun->filter_attached = false; -} - -static int tun_attach_filter(struct tun_struct *tun) -{ - int i, ret = 0; - struct tun_file *tfile; - - for (i = 0; i < tun->numqueues; i++) { - tfile = rtnl_dereference(tun->tfiles[i]); - lock_sock(tfile->socket.sk); - ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); - release_sock(tfile->socket.sk); - if (ret) { - tun_detach_filter(tun, i); - return ret; - } - } - - tun->filter_attached = true; - return ret; -} - -static void tun_set_sndbuf(struct tun_struct *tun) -{ - struct tun_file *tfile; - int i; - - for (i = 0; i < tun->numqueues; i++) { - tfile = rtnl_dereference(tun->tfiles[i]); - tfile->socket.sk->sk_sndbuf = tun->sndbuf; - } -} - -static int tun_set_queue(struct file *file, struct ifreq *ifr) -{ - struct tun_file *tfile = file->private_data; - struct tun_struct *tun; - int ret = 0; - - rtnl_lock(); - - if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { - tun = tfile->detached; - if (!tun) { - ret = -EINVAL; - goto unlock; - } - ret = security_tun_dev_attach_queue(tun->security); - if (ret < 0) - goto unlock; - ret = tun_attach(tun, file, false); - } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { - tun = rtnl_dereference(tfile->tun); - if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) - ret = -EINVAL; - else - __tun_detach(tfile, false); - } else - ret = -EINVAL; - -unlock: - rtnl_unlock(); - return ret; -} - -static long __tun_chr_ioctl(struct file *file, unsigned int cmd, - unsigned long arg, int ifreq_len) -{ - struct tun_file *tfile = file->private_data; - struct tun_struct *tun; - void __user* argp = (void __user*)arg; - struct ifreq ifr; - kuid_t owner; - kgid_t group; - int sndbuf; - int vnet_hdr_sz; - unsigned int ifindex; - int le; - int ret; - - if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) { - if (copy_from_user(&ifr, argp, ifreq_len)) - return -EFAULT; - } else { - memset(&ifr, 0, sizeof(ifr)); - } - if (cmd == TUNGETFEATURES) { - /* Currently this just means: "what IFF flags are valid?". - * This is needed because we never checked for invalid flags on - * TUNSETIFF. - */ - return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES, - (unsigned int __user*)argp); - } else if (cmd == TUNSETQUEUE) - return tun_set_queue(file, &ifr); - - ret = 0; - rtnl_lock(); - - tun = __tun_get(tfile); - if (cmd == TUNSETIFF && !tun) { - ifr.ifr_name[IFNAMSIZ-1] = '\0'; - - ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr); - - if (ret) - goto unlock; - - if (copy_to_user(argp, &ifr, ifreq_len)) - ret = -EFAULT; - goto unlock; - } - if (cmd == TUNSETIFINDEX) { - ret = -EPERM; - if (tun) - goto unlock; - - ret = -EFAULT; - if (copy_from_user(&ifindex, argp, sizeof(ifindex))) - goto unlock; - - ret = 0; - tfile->ifindex = ifindex; - goto unlock; - } - - ret = -EBADFD; - if (!tun) - goto unlock; - - tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %u\n", cmd); - - ret = 0; - switch (cmd) { - case TUNGETIFF: - tun_get_iff(current->nsproxy->net_ns, tun, &ifr); - - if (tfile->detached) - ifr.ifr_flags |= IFF_DETACH_QUEUE; - if (!tfile->socket.sk->sk_filter) - ifr.ifr_flags |= IFF_NOFILTER; - - if (copy_to_user(argp, &ifr, ifreq_len)) - ret = -EFAULT; - break; - - case TUNSETNOCSUM: - /* Disable/Enable checksum */ - - /* [unimplemented] */ - tun_debug(KERN_INFO, tun, "ignored: set checksum %s\n", - arg ? "disabled" : "enabled"); - break; - - case TUNSETPERSIST: - /* Disable/Enable persist mode. Keep an extra reference to the - * module to prevent the module being unprobed. - */ - if (arg && !(tun->flags & IFF_PERSIST)) { - tun->flags |= IFF_PERSIST; - __module_get(THIS_MODULE); - } - if (!arg && (tun->flags & IFF_PERSIST)) { - tun->flags &= ~IFF_PERSIST; - module_put(THIS_MODULE); - } - - tun_debug(KERN_INFO, tun, "persist %s\n", - arg ? "enabled" : "disabled"); - break; - - case TUNSETOWNER: - /* Set owner of the device */ - owner = make_kuid(current_user_ns(), arg); - if (!uid_valid(owner)) { - ret = -EINVAL; - break; - } - tun->owner = owner; - tun_debug(KERN_INFO, tun, "owner set to %u\n", - from_kuid(&init_user_ns, tun->owner)); - break; - - case TUNSETGROUP: - /* Set group of the device */ - group = make_kgid(current_user_ns(), arg); - if (!gid_valid(group)) { - ret = -EINVAL; - break; - } - tun->group = group; - tun_debug(KERN_INFO, tun, "group set to %u\n", - from_kgid(&init_user_ns, tun->group)); - break; - - case TUNSETLINK: - /* Only allow setting the type when the interface is down */ - if (tun->dev->flags & IFF_UP) { - tun_debug(KERN_INFO, tun, - "Linktype set failed because interface is up\n"); - ret = -EBUSY; - } else { - tun->dev->type = (int) arg; - tun_debug(KERN_INFO, tun, "linktype set to %d\n", - tun->dev->type); - ret = 0; - } - break; - -#ifdef TUN_DEBUG - case TUNSETDEBUG: - tun->debug = arg; - break; -#endif - case TUNSETOFFLOAD: - ret = set_offload(tun, arg); - break; - - case TUNSETTXFILTER: - /* Can be set only for TAPs */ - ret = -EINVAL; - if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) - break; - ret = update_filter(&tun->txflt, (void __user *)arg); - break; - - case SIOCGIFHWADDR: - /* Get hw address */ - memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN); - ifr.ifr_hwaddr.sa_family = tun->dev->type; - if (copy_to_user(argp, &ifr, ifreq_len)) - ret = -EFAULT; - break; - - case SIOCSIFHWADDR: - /* Set hw address */ - tun_debug(KERN_DEBUG, tun, "set hw address: %pM\n", - ifr.ifr_hwaddr.sa_data); - - ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr); - break; - - case TUNGETSNDBUF: - sndbuf = tfile->socket.sk->sk_sndbuf; - if (copy_to_user(argp, &sndbuf, sizeof(sndbuf))) - ret = -EFAULT; - break; - - case TUNSETSNDBUF: - if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) { - ret = -EFAULT; - break; - } - if (sndbuf <= 0) { - ret = -EINVAL; - break; - } - - tun->sndbuf = sndbuf; - tun_set_sndbuf(tun); - break; - - case TUNGETVNETHDRSZ: - vnet_hdr_sz = tun->vnet_hdr_sz; - if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz))) - ret = -EFAULT; - break; - - case TUNSETVNETHDRSZ: - if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) { - ret = -EFAULT; - break; - } - if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) { - ret = -EINVAL; - break; - } - - tun->vnet_hdr_sz = vnet_hdr_sz; - break; - - case TUNGETVNETLE: - le = !!(tun->flags & TUN_VNET_LE); - if (put_user(le, (int __user *)argp)) - ret = -EFAULT; - break; - - case TUNSETVNETLE: - if (get_user(le, (int __user *)argp)) { - ret = -EFAULT; - break; - } - if (le) - tun->flags |= TUN_VNET_LE; - else - tun->flags &= ~TUN_VNET_LE; - break; - - case TUNGETVNETBE: - ret = tun_get_vnet_be(tun, argp); - break; - - case TUNSETVNETBE: - ret = tun_set_vnet_be(tun, argp); - break; - - case TUNATTACHFILTER: - /* Can be set only for TAPs */ - ret = -EINVAL; - if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) - break; - ret = -EFAULT; - if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog))) - break; - - ret = tun_attach_filter(tun); - break; - - case TUNDETACHFILTER: - /* Can be set only for TAPs */ - ret = -EINVAL; - if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) - break; - ret = 0; - tun_detach_filter(tun, tun->numqueues); - break; - - case TUNGETFILTER: - ret = -EINVAL; - if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) - break; - ret = -EFAULT; - if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog))) - break; - ret = 0; - break; - - default: - ret = -EINVAL; - break; - } - -unlock: - rtnl_unlock(); - if (tun) - tun_put(tun); - return ret; -} - -static long tun_chr_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - return __tun_chr_ioctl(file, cmd, arg, sizeof (struct ifreq)); -} - -#ifdef CONFIG_COMPAT -static long tun_chr_compat_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - case TUNSETIFF: - case TUNGETIFF: - case TUNSETTXFILTER: - case TUNGETSNDBUF: - case TUNSETSNDBUF: - case SIOCGIFHWADDR: - case SIOCSIFHWADDR: - arg = (unsigned long)compat_ptr(arg); - break; - default: - arg = (compat_ulong_t)arg; - break; - } - - /* - * compat_ifreq is shorter than ifreq, so we must not access beyond - * the end of that structure. All fields that are used in this - * driver are compatible though, we don't need to convert the - * contents. - */ - return __tun_chr_ioctl(file, cmd, arg, sizeof(struct compat_ifreq)); -} -#endif /* CONFIG_COMPAT */ - -static int tun_chr_fasync(int fd, struct file *file, int on) -{ - struct tun_file *tfile = file->private_data; - int ret; - - if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0) - goto out; - - if (on) { - __f_setown(file, task_pid(current), PIDTYPE_PID, 0); - tfile->flags |= TUN_FASYNC; - } else - tfile->flags &= ~TUN_FASYNC; - ret = 0; -out: - return ret; -} - -static int tun_chr_open(struct inode *inode, struct file * file) -{ - struct net *net = current->nsproxy->net_ns; - struct tun_file *tfile; - - DBG1(KERN_INFO, "tunX: tun_chr_open\n"); - - tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &tun_proto, 0); - if (!tfile) - return -ENOMEM; - RCU_INIT_POINTER(tfile->tun, NULL); - tfile->flags = 0; - tfile->ifindex = 0; - - init_waitqueue_head(&tfile->wq.wait); - RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq); - - tfile->socket.file = file; - tfile->socket.ops = &tun_socket_ops; - - sock_init_data(&tfile->socket, &tfile->sk); - - tfile->sk.sk_write_space = tun_sock_write_space; - tfile->sk.sk_sndbuf = INT_MAX; - - file->private_data = tfile; - INIT_LIST_HEAD(&tfile->next); - - sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); - - memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); - - return 0; -} - -static int tun_chr_close(struct inode *inode, struct file *file) -{ - struct tun_file *tfile = file->private_data; - - tun_detach(tfile, true); - - return 0; -} - -#ifdef CONFIG_PROC_FS -static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f) -{ - struct tun_struct *tun; - struct ifreq ifr; - - memset(&ifr, 0, sizeof(ifr)); - - rtnl_lock(); - tun = tun_get(f); - if (tun) - tun_get_iff(current->nsproxy->net_ns, tun, &ifr); - rtnl_unlock(); - - if (tun) - tun_put(tun); - - seq_printf(m, "iff:\t%s\n", ifr.ifr_name); -} -#endif - -static const struct file_operations tun_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read_iter = tun_chr_read_iter, - .write_iter = tun_chr_write_iter, - .poll = tun_chr_poll, - .unlocked_ioctl = tun_chr_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = tun_chr_compat_ioctl, -#endif - .open = tun_chr_open, - .release = tun_chr_close, - .fasync = tun_chr_fasync, -#ifdef CONFIG_PROC_FS - .show_fdinfo = tun_chr_show_fdinfo, -#endif -}; - -static struct miscdevice tun_miscdev = { - .minor = TUN_MINOR1, - .name = "bf_tun", - .nodename = "net/bf_tun", - .fops = &tun_fops, -}; - -/* ethtool interface */ - -static int tun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - cmd->supported = 0; - cmd->advertising = 0; - ethtool_cmd_speed_set(cmd, SPEED_10); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_TP; - cmd->phy_address = 0; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_DISABLE; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; - return 0; -} - -static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) -{ - struct tun_struct *tun = netdev_priv(dev); - - strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); - - switch (tun->flags & TUN_TYPE_MASK) { - case IFF_TUN: - strlcpy(info->bus_info, "tun", sizeof(info->bus_info)); - break; - case IFF_TAP: - strlcpy(info->bus_info, "tap", sizeof(info->bus_info)); - break; - } -} - -static u32 tun_get_msglevel(struct net_device *dev) -{ -#ifdef TUN_DEBUG - struct tun_struct *tun = netdev_priv(dev); - return tun->debug; -#else - return -EOPNOTSUPP; -#endif -} - -static void tun_set_msglevel(struct net_device *dev, u32 value) -{ -#ifdef TUN_DEBUG - struct tun_struct *tun = netdev_priv(dev); - tun->debug = value; -#endif -} - -static const struct ethtool_ops tun_ethtool_ops = { - .get_settings = tun_get_settings, - .get_drvinfo = tun_get_drvinfo, - .get_msglevel = tun_get_msglevel, - .set_msglevel = tun_set_msglevel, - .get_link = ethtool_op_get_link, - .get_ts_info = ethtool_op_get_ts_info, -}; - -static int tun_queue_resize(struct tun_struct *tun) -{ - struct net_device *dev = tun->dev; - struct tun_file *tfile; - struct skb_array **arrays; - int n = tun->numqueues + tun->numdisabled; - int ret, i; - - arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL); - if (!arrays) - return -ENOMEM; - - for (i = 0; i < tun->numqueues; i++) { - tfile = rtnl_dereference(tun->tfiles[i]); - arrays[i] = &tfile->tx_array; - } - list_for_each_entry(tfile, &tun->disabled, next) - arrays[i++] = &tfile->tx_array; - - ret = skb_array_resize_multiple(arrays, n, - dev->tx_queue_len, GFP_KERNEL); - - kfree(arrays); - return ret; -} - -static int tun_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct tun_struct *tun = netdev_priv(dev); - - if (dev->rtnl_link_ops != &tun_link_ops) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_CHANGE_TX_QUEUE_LEN: - if (tun_queue_resize(tun)) - return NOTIFY_BAD; - break; - default: - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block tun_notifier_block __read_mostly = { - .notifier_call = tun_device_event, -}; - -static int __init tun_init(void) -{ - int ret = 0; - - pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION); - pr_info("%s\n", DRV_COPYRIGHT); - - ret = rtnl_link_register(&tun_link_ops); - if (ret) { - pr_err("Can't register link_ops\n"); - goto err_linkops; - } - - ret = misc_register(&tun_miscdev); - if (ret) { - pr_err("Can't register misc device %d\n", TUN_MINOR1); - goto err_misc; - } - - register_netdevice_notifier(&tun_notifier_block); - return 0; -err_misc: - rtnl_link_unregister(&tun_link_ops); -err_linkops: - return ret; -} - -static void tun_cleanup(void) -{ - misc_deregister(&tun_miscdev); - rtnl_link_unregister(&tun_link_ops); - unregister_netdevice_notifier(&tun_notifier_block); -} - -/* Get an underlying socket object from tun file. Returns error unless file is - * attached to a device. The returned object works like a packet socket, it - * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for - * holding a reference to the file for as long as the socket is in use. */ -struct socket *tun_get_socket(struct file *file) -{ - struct tun_file *tfile; - if (file->f_op != &tun_fops) - return ERR_PTR(-EINVAL); - tfile = file->private_data; - if (!tfile) - return ERR_PTR(-EBADFD); - return &tfile->socket; -} -EXPORT_SYMBOL_GPL(tun_get_socket); - -module_init(tun_init); -module_exit(tun_cleanup); -MODULE_DESCRIPTION(DRV_DESCRIPTION); -MODULE_AUTHOR(DRV_COPYRIGHT); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_MISCDEV(TUN_MINOR1); -MODULE_ALIAS("devname:net/bf_tun");