/* **********************************************************
 * Copyright 1999 VMware, Inc.  All rights reserved. -- VMware Confidential
 * **********************************************************/

/* 
 * vmxnet.c: A virtual network driver for VMware.
 */

#include "driver-config.h"

#include <linux/config.h>
#include <linux/module.h>

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 9)
#include <linux/moduleparam.h>
#endif
   
#include "compat_slab.h"
#include "compat_spinlock.h"
#include "compat_pci.h"
#include "compat_init.h"
#include <asm/dma.h>

#include "compat_netdevice.h"
#include <linux/etherdevice.h>
#include "compat_ioport.h"
#ifndef KERNEL_2_1
#   include <linux/delay.h>
#endif
#include "compat_interrupt.h"

#ifndef VMX86_TOOLS
#define VMX86_TOOLS
#endif
#include "vm_version.h"
#include "vm_basic_types.h"
#include "vmnet_def.h"
#include "vmxnet_def.h"
#include "vmxnet2_def.h"
#include "vm_device_version.h"
#include "vmxnetInt.h"
#include "net.h"

#ifdef DEBUG_OFFLOADING
#include <net/checksum.h>
static int vmxnetForceTcpCsumOffload = 1;
#endif

static int vmxnet_debug = 1;

#define VMXNET_WATCHDOG_TIMEOUT (5 * HZ) 

#if defined(CONFIG_NET_POLL_CONTROLLER) || defined(HAVE_POLL_CONTROLLER)
#define VMW_HAVE_POLL_CONTROLLER
#endif

static int vmxnet_open(struct net_device *dev);
static int vmxnet_start_tx(struct sk_buff *skb, struct net_device *dev);
static compat_irqreturn_t vmxnet_interrupt(int irq, void *dev_id, 
					   struct pt_regs * regs);
#ifdef VMW_HAVE_POLL_CONTROLLER
static void vmxnet_netpoll(struct net_device *dev);
#endif
static int vmxnet_close(struct net_device *dev);
static void vmxnet_set_multicast_list(struct net_device *dev);
static int vmxnet_set_mac_address(struct net_device *dev, void *addr);
static struct net_device_stats *vmxnet_get_stats(struct net_device *dev);

static int vmxnet_probe_device(struct pci_dev *pdev, const struct pci_device_id *id);
static void vmxnet_remove_device(struct pci_dev *pdev);

#ifdef MODULE
static int debug = -1;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 9)
   module_param(debug, int, 0444);
#else
   MODULE_PARM(debug, "i");
#endif

#ifdef DEBUG_OFFLOADING
static int forceTcpCsumOffload = 0;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 9)
   module_param(forceTcpCsumOffload, int, 0444);
#else
   MODULE_PARM(forceTcpCsumOffload, "i");
#endif
#endif
#endif

/* Data structure used when determining what hardware the driver supports. */

static const struct pci_device_id vmxnet_chips[] =
   {
      {
         PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_NET),
         .driver_data = VMXNET_CHIP,
      },
      {
         PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE),
         .driver_data = LANCE_CHIP,
      },
      {
         0,
      },
   };

static struct pci_driver vmxnet_driver = {
					    .name = "vmxnet",
                                            .id_table = vmxnet_chips,
                                            .probe = vmxnet_probe_device,
                                            .remove = vmxnet_remove_device,
                                         };

/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_init --
 *
 *      Initialization, called by Linux when the module is loaded.
 *
 * Results:
 *      Returns 0 for success, negative errno value otherwise.
 *
 * Side effects:
 *      See vmxnet_probe_device, which does all the work.
 *
 *-----------------------------------------------------------------------------
 */

static int
vmxnet_init(void)
{
   int err;

   if (vmxnet_debug > 0) {
      vmxnet_debug = debug;
   }

   printk(KERN_INFO 
          "VMware vmxnet virtual NIC driver release %d.%d.%d build-%d\n",
          PRODUCT_VERSION);

#ifdef DEBUG_OFFLOADING
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
   if (vmxnetForceTcpCsumOffload > 0) {
      vmxnetForceTcpCsumOffload = forceTcpCsumOffload;
      if (vmxnetForceTcpCsumOffload > 0) {
         printk("Forcing TCP Checksum Offloading\n");
      }
   }
#else
   printk("Cannot force TCP Checksum Offloading\n");
   vmxnetForceTcpCsumOffload = 0;
#endif
#endif

   err = pci_register_driver(&vmxnet_driver);
   if (err < 0) {
      return err;
   }
   return 0;
}


/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_exit --
 *
 *      Cleanup, called by Linux when the module is unloaded.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Unregisters all vmxnet devices with Linux and frees memory.
 *
 *-----------------------------------------------------------------------------
 */

static void
vmxnet_exit(void)
{
   pci_unregister_driver(&vmxnet_driver);
}


#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,43)
/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_tx_timeout --
 *
 *      Network device tx_timeout routine.  Called by Linux when the tx
 *      queue has been stopped for more than dev->watchdog_timeo jiffies.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Tries to restart the transmit queue.
 *
 *-----------------------------------------------------------------------------
 */
static void
vmxnet_tx_timeout(struct net_device *dev)
{
   netif_wake_queue(dev);
}
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,43) */


/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_probe_device --
 *
 *      Most of the initialization at module load time is done here.
 *
 * Results:
 *      Returns 0 for success, an error otherwise.
 *
 * Side effects:
 *      Switches device from vlance to vmxnet mode, creates ethernet
 *      structure for device, and registers device with network stack.
 *
 *-----------------------------------------------------------------------------
 */

static int
vmxnet_probe_device(struct pci_dev             *pdev, // IN: vmxnet PCI device
                    const struct pci_device_id *id)   // IN: matching device ID
{
   struct Vmxnet_Private *lp;
   struct net_device *dev;
   unsigned int ioaddr, reqIOAddr, reqIOSize;
   unsigned int irq_line;
   /* VMware's version of the magic number */
   unsigned int low_vmware_version;
   unsigned int numRxBuffers;
   unsigned int numTxBuffers;
   Bool morphed = FALSE;
   int i;
   unsigned int driverDataSize;

   i = compat_pci_enable_device(pdev);
   if (i) {
      printk(KERN_ERR "Cannot enable vmxnet adapter %s: error %d\n",
             compat_pci_name(pdev), i);
      return i;
   }
   irq_line = pdev->irq;
   ioaddr = compat_pci_resource_start(pdev, 0);

   reqIOAddr = ioaddr;
   /* Found adapter, adjust ioaddr to match the adapter we found. */
   if (id->driver_data == VMXNET_CHIP) {
      reqIOSize = VMXNET_CHIP_IO_RESV_SIZE;
   } else {
      /*
       * Since this is a vlance adapter we can only use it if
       * its I/0 space is big enough for the adapter to be
       * capable of morphing. This is the first requirement
       * for this adapter to potentially be morphable. The
       * layout of a morphable LANCE adapter is
       *
       * I/O space:
       *
       * |------------------|
       * | LANCE IO PORTS   |
       * |------------------|
       * | MORPH PORT       |
       * |------------------|
       * | VMXNET IO PORTS  |
       * |------------------|
       *
       * VLance has 8 ports of size 4 bytes, the morph port is 4 bytes, and
       * Vmxnet has 10 ports of size 4 bytes.
       *
       * We shift up the ioaddr with the size of the LANCE I/O space since
       * we want to access the vmxnet ports. We also shift the ioaddr up by
       * the MORPH_PORT_SIZE so other port access can be independent of
       * whether we are Vmxnet or a morphed VLance. This means that when
       * we want to access the MORPH port we need to subtract the size
       * from ioaddr to get to it.
       */

      ioaddr += LANCE_CHIP_IO_RESV_SIZE + MORPH_PORT_SIZE;
      reqIOSize = LANCE_CHIP_IO_RESV_SIZE + MORPH_PORT_SIZE +
                  VMXNET_CHIP_IO_RESV_SIZE;
   }
   /* Do not attempt to morph non-morphable AMD PCnet */
   if (reqIOSize > compat_pci_resource_len(pdev, 0)) {
      printk(KERN_INFO "vmxnet: Device in slot %s is not supported by this driver.\n",
             compat_pci_name(pdev));
      goto pci_disable;
   }

   /*
    * Request I/O region with adjusted base address and size. The adjusted
    * values are needed and used if we release the region in case of failure.
    */

   if (!compat_request_region(reqIOAddr, reqIOSize, VMXNET_CHIP_NAME)) {
      printk(KERN_INFO "vmxnet: Another driver already loaded for device in slot %s.\n",
             compat_pci_name(pdev));
      goto pci_disable;
   }

   /* Morph the underlying hardware if we found a VLance adapter. */
   if (id->driver_data == LANCE_CHIP) {
      uint16 magic;

      /* Read morph port to verify that we can morph the adapter. */

      magic = inw(ioaddr - MORPH_PORT_SIZE);
      if (magic != LANCE_CHIP &&
          magic != VMXNET_CHIP) {
         printk(KERN_ERR "Invalid magic, read: 0x%08X\n", magic);
         goto release_reg;
      }

      /* Morph adapter. */

      outw(VMXNET_CHIP, ioaddr - MORPH_PORT_SIZE);
      morphed = TRUE;

      /* Verify that we morphed correctly. */

      magic = inw(ioaddr - MORPH_PORT_SIZE);
      if (magic != VMXNET_CHIP) {
         printk(KERN_ERR "Couldn't morph adapter. Invalid magic, read: 0x%08X\n",
                magic);
         goto morph_back;
      }
   }

   printk(KERN_INFO "Found vmxnet/PCI at %#x, irq %u.\n", ioaddr, irq_line);

   low_vmware_version = inl(ioaddr + VMXNET_LOW_VERSION);
   if ((low_vmware_version & 0xffff0000) != (VMXNET2_MAGIC & 0xffff0000)) {
      printk(KERN_ERR "Driver version 0x%08X doesn't match %s version 0x%08X\n",
             VMXNET2_MAGIC, PRODUCT_GENERIC_NAME, low_vmware_version);
      goto morph_back;
   } else {
      /*
       * The low version looked OK so get the high version and make sure that
       * our version is supported.
       */
      unsigned int high_vmware_version = inl(ioaddr + VMXNET_HIGH_VERSION);
      if ((VMXNET2_MAGIC < low_vmware_version) ||
          (VMXNET2_MAGIC > high_vmware_version)) {
         printk(KERN_ERR
                "Driver version 0x%08X doesn't match %s version 0x%08X, 0x%08X\n",
                VMXNET2_MAGIC, PRODUCT_GENERIC_NAME,
                low_vmware_version, high_vmware_version);
         goto morph_back;
      }
   }

   dev = compat_alloc_etherdev(sizeof *lp);
   if (!dev) {
      printk(KERN_ERR "Unable to allocate ethernet device\n");
      goto morph_back;
   }

   dev->base_addr = ioaddr;

   outl(VMXNET_CMD_GET_NUM_RX_BUFFERS, dev->base_addr + VMXNET_COMMAND_ADDR);
   numRxBuffers = inl(dev->base_addr + VMXNET_COMMAND_ADDR);
   if (numRxBuffers == 0 || numRxBuffers > VMXNET2_MAX_NUM_RX_BUFFERS) {
      numRxBuffers = VMXNET2_DEFAULT_NUM_RX_BUFFERS;
   }

   outl(VMXNET_CMD_GET_NUM_TX_BUFFERS, dev->base_addr + VMXNET_COMMAND_ADDR);
   numTxBuffers = inl(dev->base_addr + VMXNET_COMMAND_ADDR);
   if (numTxBuffers == 0 || numTxBuffers > VMXNET2_MAX_NUM_TX_BUFFERS) {
      numTxBuffers = VMXNET2_DEFAULT_NUM_TX_BUFFERS;
   }

   driverDataSize =
            sizeof(Vmxnet2_DriverData) +
            (numRxBuffers + 1) * sizeof(Vmxnet2_RxRingEntry) + /* +1 for dummy rxRing2 */
            numTxBuffers * sizeof(Vmxnet2_TxRingEntry);
   printk(KERN_DEBUG
          "vmxnet: numRxBuffers=(%d*%d) numTxBuffers=(%d*%d) driverDataSize=%d\n",
          numRxBuffers, (uint32)sizeof(Vmxnet2_RxRingEntry),
          numTxBuffers, (uint32)sizeof(Vmxnet2_TxRingEntry),
          driverDataSize);
   lp = dev->priv;
   lp->ddAllocated = kmalloc(driverDataSize + 15, GFP_DMA | GFP_KERNEL);

   if (!lp->ddAllocated) {
      printk(KERN_ERR "Unable to allocate memory for driver data\n");
      goto free_dev;
   }
   if ((uintptr_t)virt_to_bus(lp->ddAllocated) > SHARED_MEM_MAX) {
      printk(KERN_ERR
             "Unable to initialize driver data, address outside of shared area (0x%p)\n",
             (void*)virt_to_bus(lp->ddAllocated));
      goto free_dev_dd;
   }

   /* Align on paragraph boundary */
   lp->dd = (Vmxnet2_DriverData*)(((unsigned long)lp->ddAllocated + 15) & ~15UL);
   memset(lp->dd, 0, driverDataSize);
   spin_lock_init(&lp->txLock);
   lp->numRxBuffers = numRxBuffers;
   lp->numTxBuffers = numTxBuffers;
   /* So that the vmkernel can check it is compatible */
   lp->dd->magic = VMXNET2_MAGIC;
   lp->dd->length = driverDataSize;
   lp->name = VMXNET_CHIP_NAME;

   /*
    * Store whether we are morphed so we can figure out how to
    * clean up when we unload.
    */
   lp->morphed = morphed;

   outl(VMXNET_CMD_GET_FEATURES, dev->base_addr + VMXNET_COMMAND_ADDR);
   lp->features = inl(dev->base_addr + VMXNET_COMMAND_ADDR);

   outl(VMXNET_CMD_GET_CAPABILITIES, dev->base_addr + VMXNET_COMMAND_ADDR);
   lp->capabilities = inl(dev->base_addr + VMXNET_COMMAND_ADDR);

   if (lp->capabilities & VMNET_CAP_VMXNET_APROM) {
      for (i = 0; i < ETH_ALEN; i++) {
         dev->dev_addr[i] = inb(ioaddr + VMXNET_APROM_ADDR + i);
      }
      for (i = 0; i < ETH_ALEN; i++) {
         outb(dev->dev_addr[i], ioaddr + VMXNET_MAC_ADDR + i);
      }
   } else {
      /*
       * Be backwards compatible and use the MAC address register to
       * get MAC address.
       */
      for (i = 0; i < ETH_ALEN; i++) {
         dev->dev_addr[i] = inb(ioaddr + VMXNET_MAC_ADDR + i);
      }
   }

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
#if 0 /* vmkernel xsum offload is busted for something rhas30 does */
   if (lp->capabilities & VMXNET_CAP_IP_CSUM) {
      dev->features |= NETIF_F_IP_CSUM;
   }
   if (lp->capabilities & VMXNET_CAP_HW_CSUM) {
      dev->features |= NETIF_F_HW_CSUM;
   }
#endif /* vmkernel xsum offload is busted for something rhas30 does */
#endif

   dev->irq = irq_line;

   dev->open = &vmxnet_open;
   dev->hard_start_xmit = &vmxnet_start_tx;
   dev->stop = &vmxnet_close;
   dev->get_stats = &vmxnet_get_stats;
   dev->set_multicast_list = &vmxnet_set_multicast_list;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,43)
   dev->tx_timeout = &vmxnet_tx_timeout;
   dev->watchdog_timeo = VMXNET_WATCHDOG_TIMEOUT;
#endif
#ifdef VMW_HAVE_POLL_CONTROLLER
   dev->poll_controller = vmxnet_netpoll;
#endif

   /* Do this after ether_setup(), which sets the default value. */
   dev->set_mac_address = &vmxnet_set_mac_address;

   COMPAT_SET_MODULE_OWNER(dev);

   if (register_netdev(dev)) {
      printk(KERN_ERR "Unable to register %s device\n",
             PRODUCT_GENERIC_NAME);
      goto free_dev_dd;
   }

   /* Do this after register_netdev(), which sets device name */
   printk(KERN_DEBUG "%s: %s at %#3lx assigned IRQ %d.\n",
          dev->name, lp->name, dev->base_addr, dev->irq);

   pci_set_drvdata(pdev, dev);
   return 0;

free_dev_dd:;
   kfree(lp->ddAllocated);
free_dev:;
   compat_free_netdev(dev);
morph_back:;
   if (morphed) {
      /* Morph back to LANCE hw. */
      outw(LANCE_CHIP, ioaddr - MORPH_PORT_SIZE);
   }
release_reg:;
   release_region(reqIOAddr, reqIOSize);
pci_disable:;
   compat_pci_disable_device(pdev);
   return -EBUSY;
}


/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_remove_device --
 *
 *      Cleanup, called for each device on unload.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Unregisters vmxnet device with Linux and frees memory.
 *
 *-----------------------------------------------------------------------------
 */
static void
vmxnet_remove_device(struct pci_dev* pdev)
{
   struct net_device *dev = pci_get_drvdata(pdev);
   struct Vmxnet_Private *lp = dev->priv;

   unregister_netdev(dev);

   /* Unmorph adapter if it was morphed. */

   if (lp->morphed) {
      uint16 magic;

      /* Read morph port to verify that we can morph the adapter. */

      magic = inw(dev->base_addr - MORPH_PORT_SIZE);
      if (magic != VMXNET_CHIP) {
         printk(KERN_ERR "Adapter not morphed. read magic: 0x%08X\n", magic);
      }

      /* Morph adapter back to LANCE. */

      outw(LANCE_CHIP, dev->base_addr - MORPH_PORT_SIZE);

      /* Verify that we unmorphed correctly. */

      magic = inw(dev->base_addr - MORPH_PORT_SIZE);
      if (magic != LANCE_CHIP) {
         printk(KERN_ERR "Couldn't unmorph adapter. Invalid magic, read: 0x%08X\n",
                magic);
      }

      release_region(dev->base_addr -
                     (LANCE_CHIP_IO_RESV_SIZE + MORPH_PORT_SIZE),
                     VMXNET_CHIP_IO_RESV_SIZE +
                     (LANCE_CHIP_IO_RESV_SIZE + MORPH_PORT_SIZE));
   } else {
      release_region(dev->base_addr, VMXNET_CHIP_IO_RESV_SIZE);
   }

   kfree(lp->ddAllocated);
   compat_free_netdev(dev);
   compat_pci_disable_device(pdev);
}


/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_init_ring --
 *
 *      Initializes buffer rings in Vmxnet_Private structure.  Allocates skbs
 *      to receive into.  Called by vmxnet_open.
 *
 * Results:
 *      0 on success; -1 on failure to allocate skbs.
 *
 * Side effects:
 *
 *-----------------------------------------------------------------------------
 */
static int
vmxnet_init_ring(struct net_device *dev)
{
   struct Vmxnet_Private *lp = (Vmxnet_Private *)dev->priv;
   Vmxnet2_DriverData *dd = lp->dd;
   int i;
   size_t offset;

   offset = sizeof(*dd);

   dd->rxRingLength = lp->numRxBuffers;
   dd->rxRingOffset = offset;
   lp->rxRing = (Vmxnet2_RxRingEntry *)((uintptr_t)dd + offset);
   offset += lp->numRxBuffers * sizeof(Vmxnet2_RxRingEntry);
   
   // dummy rxRing2
   dd->rxRingLength2 = 1;
   dd->rxRingOffset2 = offset;
   offset += sizeof(Vmxnet2_RxRingEntry);

   dd->txRingLength = lp->numTxBuffers;
   dd->txRingOffset = offset;
   lp->txRing = (Vmxnet2_TxRingEntry *)((uintptr_t)dd + offset);
   offset += lp->numTxBuffers * sizeof(Vmxnet2_TxRingEntry);

   printk(KERN_DEBUG "vmxnet_init_ring: offset=%"FMT64"d length=%d\n", 
	  (uint64)offset, dd->length);

   for (i = 0; i < lp->numRxBuffers; i++) {
      lp->rxSkbuff[i] = dev_alloc_skb (PKT_BUF_SZ);
      if (lp->rxSkbuff[i] == NULL) {
	 // there is not much we can do at this point
	 printk (KERN_ERR "%s: vmxnet_init_ring dev_alloc_skb failed.\n", dev->name);
	 return -ENOMEM;
      }

      lp->rxRing[i].paddr = le32_to_cpu(virt_to_bus(lp->rxSkbuff[i]->tail));
      lp->rxRing[i].bufferLength = le16_to_cpu(PKT_BUF_SZ);
      lp->rxRing[i].actualLength = 0;
      lp->rxRingBuffPtr[i] = &lp->rxSkbuff[i];
      lp->rxRing[i].ownership = VMXNET2_OWNERSHIP_NIC;
   }
   
   // dummy rxRing2 tacked on to the end, with a single unusable entry
   lp->rxRing[i].paddr = 0;
   lp->rxRing[i].bufferLength = 0;
   lp->rxRing[i].actualLength = 0;
   lp->rxRingBuffPtr[i] = NULL;
   lp->rxRing[i].ownership = VMXNET2_OWNERSHIP_DRIVER;

   dd->rxDriverNext = 0;

   for (i = 0; i < lp->numTxBuffers; i++) {
      lp->txRing[i].ownership = VMXNET2_OWNERSHIP_DRIVER;
      lp->txRingBuffPtr[i] = NULL;
      lp->txRing[i].sg.sg[0].addrHi = 0;
   }

   dd->txDriverCur = dd->txDriverNext = 0;
   dd->savedRxNICNext = dd->savedRxNICNext2 = dd->savedTxNICNext = 0;
   dd->txStopped = FALSE;

   return 0;
}

/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_open --
 *
 *      Network device open routine.  Called by Linux when the interface is
 *      brought up.
 *
 * Results:
 *      0 on success; else negative errno value.
 *
 * Side effects:
 *      Allocates an IRQ if not already allocated.  Sets our Vmxnet_Private
 *      structure to be the shared area with the lower layer.
 *
 *-----------------------------------------------------------------------------
 */
static int
vmxnet_open(struct net_device *dev)
{
   struct Vmxnet_Private *lp = (Vmxnet_Private *)dev->priv;
   unsigned int ioaddr = dev->base_addr;
   u32 paddr;

   if (dev->irq == 0 ||	request_irq(dev->irq, &vmxnet_interrupt,
			            SA_SHIRQ, lp->name, (void *)dev)) {
      return -EAGAIN;
   }

   if (vmxnet_debug > 1) {
      printk(KERN_DEBUG "%s: vmxnet_open() irq %d lp %#x.\n",
	     dev->name, dev->irq,
	     (u32) virt_to_bus(lp));
   }

   if (vmxnet_init_ring(dev)) {
      return -ENOMEM;
   }

   paddr = le32_to_cpu(virt_to_bus(lp->dd));

   outl(paddr, ioaddr + VMXNET_INIT_ADDR);
   outl(lp->dd->length, ioaddr + VMXNET_INIT_LENGTH);

   lp->dd->txStopped = FALSE;
   netif_start_queue(dev);

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,43)
   dev->interrupt = 0;
   dev->start = 1;
#endif

   lp->devOpen = TRUE;

   COMPAT_NETDEV_MOD_INC_USE_COUNT;

   return 0;
}

/*
 *-----------------------------------------------------------------------------
 *
 * check_tx_queue --
 *
 *      Loop through the tx ring looking for completed transmits.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */
static void
check_tx_queue(struct net_device *dev)
{
   Vmxnet_Private *lp = (Vmxnet_Private *)dev->priv;
   Vmxnet2_DriverData *dd = lp->dd;
   while (1) {
      Vmxnet2_TxRingEntry *xre = &lp->txRing[dd->txDriverCur];
      if (xre->ownership != VMXNET2_OWNERSHIP_DRIVER || 
	  lp->txRingBuffPtr[dd->txDriverCur] == NULL) {
	 break;
      }

      compat_dev_kfree_skb_irq(lp->txRingBuffPtr[dd->txDriverCur], FREE_WRITE);

      lp->txRingBuffPtr[dd->txDriverCur] = NULL;

      lp->numTxPending--;

      if (dd->debugLevel > 0) {
	 printk(KERN_DEBUG 
                "check_tx_queue: returned packet, numTxPending %d next %d cur %d\n",
		lp->numTxPending, dd->txDriverNext, dd->txDriverCur);
      }

      VMXNET_INC(dd->txDriverCur, dd->txRingLength);

      if (netif_queue_stopped(dev)) {
	 netif_wake_queue(dev);
      }
      dd->txStopped = FALSE;
   }
}

/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_tx --
 *
 *      Network device hard_start_xmit helper routine.  This is called by
 *	the drivers hard_start_xmit routine when it wants to send a packet.
 *
 * Results:
 *      VMXNET_CALL_TRANSMIT:	The driver should ask the virtual NIC to
 *				transmit a packet.
 *      VMXNET_DEFER_TRANSMIT:	This transmit is deferred because of
 *				transmit clustering.
 *      VMXNET_STOP_TRANSMIT:	We ran out of queue space so the caller
 *				should stop transmitting.
 *
 * Side effects:
 *	The drivers tx ring may get modified.
 *
 *-----------------------------------------------------------------------------
 */
static Vmxnet_TxStatus
vmxnet_tx(struct sk_buff *skb, struct net_device *dev, Bool isCOS)
{
   Vmxnet_TxStatus status = VMXNET_DEFER_TRANSMIT;
   struct Vmxnet_Private *lp = (struct Vmxnet_Private *)dev->priv;
   Vmxnet2_DriverData *dd = lp->dd;
   unsigned long flags;
   Vmxnet2_TxRingEntry *xre;

   spin_lock_irqsave(&lp->txLock, flags);

   xre = &lp->txRing[dd->txDriverNext];
   xre->flags = 0;

#ifdef DEBUG_OFFLOADING
   if (vmxnetForceTcpCsumOffload && (skb->nh.iph->protocol == IPPROTO_TCP)) {
      uint16 ipHdrLen = (skb->nh.iph->ihl) << 2;
      uint16 tcpLen = ntohs(skb->nh.iph->tot_len) - ipHdrLen;
      if (tcpLen > 20) {
         skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
                                               skb->nh.iph->daddr,
		                               tcpLen,
		                               IPPROTO_TCP,
		                               0);
         skb->ip_summed = CHECKSUM_HW;
      } 
   } 
#endif

   if (skb->ip_summed == CHECKSUM_HW) {
      xre->flags |= VMXNET2_TX_HW_XSUM;
   } else {
      xre->flags &= ~VMXNET2_TX_HW_XSUM;
   }

   if (lp->txRingBuffPtr[dd->txDriverNext] != NULL) {
      dd->txStopped = TRUE;
      netif_stop_queue(dev);
      status = VMXNET_STOP_TRANSMIT;
      if (dd->debugLevel > 0) {
	 printk(KERN_DEBUG "Stopping transmit\n");
      }
   } else {
      if (isCOS) {
	 xre->sg.addrType = NET_SG_MACH_ADDR;
	 xre->flags = 0;
      } else {
	 xre->sg.addrType = NET_SG_PHYS_ADDR;
	 xre->flags |= VMXNET2_TX_CAN_KEEP;	 
      }
      xre->sg.length = 1;
      xre->sg.sg[0].length = le16_to_cpu(skb->len);   
      xre->sg.sg[0].addrLow = le32_to_cpu(virt_to_bus(skb->data));      

      if (lp->numTxPending > dd->txRingLength - 5) {
	 if (dd->debugLevel > 0) {
	    printk(KERN_DEBUG "Queue low\n");
	 }
	 xre->flags |= VMXNET2_TX_RING_LOW;
	 status = VMXNET_CALL_TRANSMIT;
      }
      lp->txRingBuffPtr[dd->txDriverNext] = skb;
      xre->ownership = VMXNET2_OWNERSHIP_NIC;

      VMXNET_INC(dd->txDriverNext, dd->txRingLength);

      dev->trans_start = jiffies;

      dd->txNumDeferred++;
      if (isCOS || (dd->txNumDeferred >= dd->txClusterLength)) {
	 dd->txNumDeferred = 0;
	 status = VMXNET_CALL_TRANSMIT;
      }

      lp->stats.tx_packets++;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,0)
      lp->stats.tx_bytes += skb->len;
#endif
      lp->numTxPending++;
   }

   check_tx_queue(dev);

   spin_unlock_irqrestore(&lp->txLock, flags);

   return status;
}

/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_start_tx --
 *
 *      Network device hard_start_xmit routine.  Called by Linux when it has
 *      a packet for us to transmit.
 *
 * Results:
 *      0 on success; 1 if no resources.
 *
 * Side effects:
 *
 *-----------------------------------------------------------------------------
 */
static int
vmxnet_start_tx(struct sk_buff *skb, struct net_device *dev)
{
   int retVal = 0;
   Vmxnet_TxStatus xs = vmxnet_tx(skb, dev, FALSE);
   switch (xs) {
   case VMXNET_CALL_TRANSMIT:
      inl(dev->base_addr + VMXNET_TX_ADDR);
      break;
   case VMXNET_DEFER_TRANSMIT:
      break;
   case VMXNET_STOP_TRANSMIT:
      retVal = 1;
      break;
   }

   return retVal;
}

/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_rx --
 *
 *      Receive a packet.
 *
 * Results:
 *      0
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */
static int
vmxnet_rx(struct net_device *dev)
{
   Vmxnet_Private *lp = (Vmxnet_Private *)dev->priv;
   Vmxnet2_DriverData *dd = lp->dd;

   if (!lp->devOpen) {
      return 0;
   }

   while (1) {
      short pkt_len;
      struct sk_buff *skb;
      Vmxnet2_RxRingEntry *rre;
      rre = &lp->rxRing[dd->rxDriverNext];
      if (rre->ownership != VMXNET2_OWNERSHIP_DRIVER) {
	 break;
      }
      pkt_len = rre->actualLength;

      if (pkt_len < ETH_MIN_FRAME_LEN) {
         if (pkt_len != 0) {
	    printk(KERN_DEBUG "%s: Runt pkt (%d bytes)!\n", dev->name, pkt_len);
         }
	 lp->stats.rx_errors++;
      } else {
	 struct sk_buff *newskb = dev_alloc_skb (PKT_BUF_SZ);
	 if (newskb != NULL) {
	    struct sk_buff **rxSkbuff = lp->rxRingBuffPtr[dd->rxDriverNext];
	    skb = *rxSkbuff;
	    skb_put (skb, pkt_len);

	    newskb->dev = dev;
	    *rxSkbuff = newskb;	    
	    rre->paddr = le32_to_cpu(virt_to_bus(newskb->tail));
	 } else {
	    skb = NULL;
	 }

	 if (skb == NULL) {
	    printk(KERN_DEBUG "%s: Memory squeeze, dropping packet.\n", dev->name);
	 } else {
            if (rre->flags & VMXNET2_RX_HW_XSUM_OK) {
               // XXX this is heavy handed, should probably use CHECKSUM_HW
               skb->ip_summed = CHECKSUM_UNNECESSARY;
            }
	    skb->dev = dev;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,0)
	    lp->stats.rx_bytes += skb->len;
#endif
	    skb->protocol = eth_type_trans(skb, dev);
	    netif_rx(skb);
	    lp->stats.rx_packets++;
	 }
      }

      rre->ownership = VMXNET2_OWNERSHIP_NIC;
      VMXNET_INC(dd->rxDriverNext, dd->rxRingLength);
   }

   return 0;
}

/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_interrupt --
 *
 *      Interrupt handler.  Calls vmxnet_rx to receive a packet.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */
static compat_irqreturn_t
vmxnet_interrupt(int irq, void *dev_id, struct pt_regs * regs)
{
   struct net_device *dev = (struct net_device *)dev_id;
   struct Vmxnet_Private *lp;

   if (dev == NULL) {
      printk (KERN_DEBUG "vmxnet_interrupt(): irq %d for unknown device.\n", irq);
      return COMPAT_IRQ_NONE;
   }

   lp = (struct Vmxnet_Private *)dev->priv;
   outl(VMXNET_CMD_INTR_ACK, dev->base_addr + VMXNET_COMMAND_ADDR);

   // printk(KERN_DEBUG "vmxnet_interrupt\n");

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,43)
   if (dev->interrupt) {
      printk(KERN_DEBUG "%s: Re-entering the interrupt handler.\n", dev->name);
   }
   dev->interrupt = 1;
#endif

   vmxnet_rx(dev);

   if (lp->numTxPending > 0) {
      spin_lock(&lp->txLock);
      check_tx_queue(dev);
      spin_unlock(&lp->txLock);
   }

   if (netif_queue_stopped(dev) && !lp->dd->txStopped) {
      netif_wake_queue(dev);
   }

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,43)
   dev->interrupt = 0;
#endif
   return COMPAT_IRQ_HANDLED;
}


#ifdef VMW_HAVE_POLL_CONTROLLER
/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_netpoll --
 *
 *      Poll network controller.  We reuse hardware interrupt for this.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Packets received/transmitted/whatever.
 *
 *-----------------------------------------------------------------------------
 */
static void
vmxnet_netpoll(struct net_device *dev)
{
   disable_irq(dev->irq);
   vmxnet_interrupt(dev->irq, dev, NULL);
   enable_irq(dev->irq);
}
#endif /* VMW_HAVE_POLL_CONTROLLER */


/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_close --
 *
 *      Network device stop (close) routine.  Called by Linux when the
 *      interface is brought down.
 *
 * Results:
 *      0 for success (always).
 *
 * Side effects:
 *      Flushes pending transmits.  Frees IRQs and shared memory area.
 *
 *-----------------------------------------------------------------------------
 */
static int
vmxnet_close(struct net_device *dev)
{
   unsigned int ioaddr = dev->base_addr;
   Vmxnet_Private *lp = (Vmxnet_Private *)dev->priv;
   int i;
   unsigned long flags;

   if (vmxnet_debug > 1) {
      printk(KERN_DEBUG "%s: Shutting down ethercard\n", dev->name);
   }

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,43)
   dev->start = 0;
#endif

   netif_stop_queue(dev);

   lp->devOpen = FALSE;

   spin_lock_irqsave(&lp->txLock, flags);
   if (lp->numTxPending > 0) {
      //Wait absurdly long (2sec) for all the pending packets to be returned.
      printk(KERN_DEBUG "vmxnet_close: Pending tx = %d\n", lp->numTxPending); 
      for (i = 0; i < 200 && lp->numTxPending > 0; i++) {
	 outl(VMXNET_CMD_CHECK_TX_DONE, dev->base_addr + VMXNET_COMMAND_ADDR);
	 udelay(10000);
	 check_tx_queue(dev);
      }

      //This is possiblly caused by a faulty physical driver. 
      //Will go ahead and free these skb's anyways (possibly dangerous,
      //but seems to work in practice)
      if (lp->numTxPending > 0) {
         printk(KERN_EMERG "vmxnet_close: failed to finish all pending tx.\n"
                "This virtual machine may be in an inconsistent state.\n");
         lp->numTxPending = 0;
      }
   }
   spin_unlock_irqrestore(&lp->txLock, flags);
   
   outl(0, ioaddr + VMXNET_INIT_ADDR);

   free_irq(dev->irq, dev);

   for (i = 0; i < lp->dd->txRingLength; i++) {
      if (lp->txRingBuffPtr[i] != NULL) {
	 compat_dev_kfree_skb((struct sk_buff *)lp->txRingBuffPtr[i], FREE_WRITE);
	 lp->txRingBuffPtr[i] = NULL;
      }
   }

   for (i = 0; i < lp->numRxBuffers; i++) {
      if (lp->rxSkbuff[i] != NULL) {
	 compat_dev_kfree_skb(lp->rxSkbuff[i], FREE_WRITE);
	 lp->rxSkbuff[i] = NULL;
      }
   }

   COMPAT_NETDEV_MOD_DEC_USE_COUNT;

   return 0;
}

/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_load_multicast --
 *
 *      Load the multicast filter.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *
 *-----------------------------------------------------------------------------
 */
static void 
vmxnet_load_multicast (struct net_device *dev)
{
    Vmxnet_Private *lp = (Vmxnet_Private *) dev->priv;
    volatile u16 *mcast_table = (u16 *)lp->dd->LADRF;
    struct dev_mc_list *dmi = dev->mc_list;
    char *addrs;
    int i, j, bit, byte;
    u32 crc, poly = CRC_POLYNOMIAL_LE;

    /* clear the multicast filter */
    lp->dd->LADRF[0] = 0;
    lp->dd->LADRF[1] = 0;

    /* Add addresses */
    for (i = 0; i < dev->mc_count; i++){
	addrs = dmi->dmi_addr;
	dmi   = dmi->next;

	/* multicast address? */
	if (!(*addrs & 1))
	    continue;

	crc = 0xffffffff;
	for (byte = 0; byte < 6; byte++) {
	    for (bit = *addrs++, j = 0; j < 8; j++, bit >>= 1) {
		int test;

		test = ((bit ^ crc) & 0x01);
		crc >>= 1;

		if (test) {
		    crc = crc ^ poly;
		}
	    }
	 }

	 crc = crc >> 26;
	 mcast_table [crc >> 4] |= 1 << (crc & 0xf);
    }
    return;
}

/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_set_multicast_list --
 *
 *      Network device set_multicast_list routine.  Called by Linux when the
 *      set of addresses to listen to changes, including both the multicast
 *      list and the broadcast, promiscuous, multicast, and allmulti flags.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Informs lower layer of the changes.
 *
 *-----------------------------------------------------------------------------
 */
static void 
vmxnet_set_multicast_list(struct net_device *dev)
{
   unsigned int ioaddr = dev->base_addr;
   Vmxnet_Private *lp = (Vmxnet_Private *)dev->priv;

   if (dev->flags & IFF_PROMISC) {
      printk(KERN_DEBUG "%s: Promiscuous mode enabled.\n", dev->name);
      lp->dd->ifflags = VMXNET_IFF_PROMISC; 
   } else {
      lp->dd->ifflags = VMXNET_IFF_BROADCAST | VMXNET_IFF_MULTICAST;
      if (dev->flags & IFF_ALLMULTI) {
	 lp->dd->LADRF[0] = 0xffffffff;
	 lp->dd->LADRF[1] = 0xffffffff;
      } else {
	 vmxnet_load_multicast(dev);
      }
      outl(VMXNET_CMD_UPDATE_LADRF, ioaddr + VMXNET_COMMAND_ADDR);	       
   }

   outl(VMXNET_CMD_UPDATE_IFF, ioaddr + VMXNET_COMMAND_ADDR);
}

/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_set_mac_address --
 *
 *      Network device set_mac_address routine.  Called by Linux when someone
 *      asks to change the interface's MAC address.
 *
 * Results:
 *      0 for success; -EBUSY if interface is up.
 *
 * Side effects:
 *
 *-----------------------------------------------------------------------------
 */
static int
vmxnet_set_mac_address(struct net_device *dev, void *p)
{
   struct sockaddr *addr=p;
   unsigned int ioaddr = dev->base_addr;
   int i;

   if (netif_running(dev))
      return -EBUSY;

   memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);

   for (i = 0; i < ETH_ALEN; i++) {
      outb(addr->sa_data[i], ioaddr + VMXNET_MAC_ADDR + i);
   }
   return 0;
}

/*
 *-----------------------------------------------------------------------------
 *
 * vmxnet_get_stats --
 *
 *      Network device get_stats routine.  Called by Linux when interface
 *      statistics are requested.
 *
 * Results:
 *      Returns a pointer to our private stats structure.
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */
static struct net_device_stats *
vmxnet_get_stats(struct net_device *dev)
{
   Vmxnet_Private *lp = (Vmxnet_Private *)dev->priv;

   return &lp->stats;
}

module_init(vmxnet_init);
module_exit(vmxnet_exit);
MODULE_DEVICE_TABLE(pci, vmxnet_chips);
