/* **********************************************************
 * Copyright 1998 VMware, Inc.  All rights reserved. -- VMware Confidential
 * **********************************************************/

/*
 * vmx86.c --
 *
 *     Platform independent routines for creating/destroying/running
 *     virtual machine monitors.
 *
 */

#ifdef linux
/* Must come before any kernel header file --hpreg */
#   include "driver-config.h"

#   include <linux/string.h> /* memset() in the kernel */
#   include <linux/sched.h> /* jiffies from the kernel */
#elif defined(WINNT_DDK)
#   include <string.h>
#else
#   error "Unknown platform"
#endif

#include "vmware.h"
#include "machine.h"
#include "vmx86.h"
#include "task.h"
#include "initblock.h"
#include "vm_asm.h"
#include "iocontrols.h"
#ifdef USE_PERFCTRS_HOSTED
#   include "perfctr.h"
#endif
#include "hostif.h"
#include "memtrack.h"
#include "hash.h"
#if defined(_WIN64)
#include "vmmon-asm-x86-64.h"
#endif
#include "x86vt.h"
#if defined(linux)
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
    (!defined(VM_X86_64) || LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 9))
#include <asm/timex.h>
#define VMW_HAS_CPUKHZ
#endif
#endif

/*
 * Keep track of the virtual machines that have been
 * created using the following structures.
 *
 */

static VMDriver *vmDriverList = NULL;

static LockedPageLimit lockedPageLimit;

/* Percentage of guest "paged" memory that must fit within the hard limit. */
static unsigned minVmMemPct;

/* Number of pages actually locked by all virtual machines */
static unsigned numLockedPages;

/* Total virtual machines on this host */
static unsigned vmCount;

/* Track the state of a shared page */
typedef struct COWPage {
   struct COWPage *next;
   uint64 key;            // Hash of this MPN
   MPN sharedMPN;         // Anon MPN shared between vms
   uint32 ref[MAX_VMS];   // Per-vm ref count 
   Bool locked;           // Keep entry even when refcount -> 0
} COWPage;

typedef struct COWHint {
   struct COWHint *next;
   uint64 key;            // Hash of origMPN
   BPN bpn;               // Guest BPN
   MPN origMPN;           // Guest MPN
   VMDriver *vm;          // VM containing BPN/MPN pair
} COWHint;

/* Chaining hash tables to store pages and hashes */ 
#define COW_HINT_TABLE_SIZE 0x1000
#define COW_MAX_HINTS       0x100000
#define COW_HINT_TABLE_MASK (COW_HINT_TABLE_SIZE - 1)

#define COW_PAGE_TABLE_SIZE 0x1000
#define COW_PAGE_TABLE_MASK (COW_PAGE_TABLE_SIZE - 1)

// cow page table walk flags
typedef enum {
   COWMatchKey,
   COWMatchStale,
   COWMatchLocked
} COWPageWalkArgs;

static COWPage *cowPageTable[COW_PAGE_TABLE_SIZE];
static COWHint *cowHintTable[COW_HINT_TABLE_SIZE];

static struct {
   uint32 numHints;
   uint32 numBreaks;       // number of times COW was broken
   uint32 totalUniqueMPNs; // total MPNs ever used as shared pages
   uint32 uniqueMPNs;      // current count of unique shared pages
} cowState;

/* temp array to hold cow page content */
static char cowBuf[PAGE_SIZE];
static char cowContents[PAGE_SIZE];

/* 
 * Allocation of ref locations insdie the COWPage struct.
 * We implement a list of allocated cowID's using an array.  
 * The array is initialized with the values 1...MAX_VMS-1, INVALID_COWID.
 * cowIDsAllocated holds the last id given out and cowIDsUnused
 * holds the next id to give out.  
 */

#define INVALID_COWID (-1)
static int cowIDList[MAX_VMS];
static int cowIDsAllocated;
static int cowIDsUnused;

/* Max rate requested for fast clock by any virtual machine. */
static unsigned globalFastClockRate;


/*
 *----------------------------------------------------------------------
 *
 * Vmx86LockedPageLimit --
 *
 *       There are three limits controlling how many pages we can lock on 
 *       a host:  
 *
 *       lockedPageLimit.configured is controlled by UI,  
 *       lockedPageLimit.dynamic is controlled by authd's hardLimitMonitor,
 *       lockedPageLimit.host is calculated dynamically based on kernel stats 
 *       by vmmon using kernel stats.
 *
 *       We can lock the MIN of these values.
 *
 * Results:
 *       Number of pages to lock on this host.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static INLINE unsigned
Vmx86LockedPageLimit(const VMDriver* vm)
{
   ASSERT(HostIF_GlobalLockIsHeld());

   lockedPageLimit.host = HostIF_EstimateLockedPageLimit(vm, numLockedPages);
 
   return MIN(MIN(lockedPageLimit.configured, lockedPageLimit.dynamic),
              lockedPageLimit.host);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86LockedPageLimitForAdmissonControl --
 *
 *       There are two limits controlling how many pages we can lock on 
 *       a host:  
 *
 *       lockedPageLimit.configured is controlled by UI,  
 *       lockedPageLimit.dynamic is controled by authd's hardLimitMonitor,
 *
 *       We can lock the MIN of these values.
 *
 *       Using lockedPageLimit.host would be too pessimistic.  After admission
 *       of a new VM but before allocation/locking memory for the new VM 
 *       our memory sharing code will put pressure on other VMs and should 
 *       produce enough free pages to successfully finish poweron.
 *
 * Results:
 *       Number of pages to lock on this host.
 *
 * Side effects:
 *       None
 *
 *
 * Results:
 *       Number of pages to lock on this host.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static INLINE unsigned
Vmx86LockedPageLimitForAdmissonControl(void)
{
   ASSERT(HostIF_GlobalLockIsHeld());
   return MIN(lockedPageLimit.configured, lockedPageLimit.dynamic);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86HasFreePages --
 *
 *       Returns TRUE if the vm can lock more pages.  This is true if 
 *       we are below the host's hard memory limit and this vm has not
 *       exceeded its maximum allocation.
 *       Callers must ensure driver-wide and VM serialization
 *       typically by using HostIF_GlobalLock() and  HostIF_VMLock().
 *
 * Results:
 *       TRUE if pages can be locked, FALSE otherwise
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static INLINE Bool
Vmx86HasFreePages(VMDriver *vm, 
		  unsigned int numPages,
                  Bool checkVM)
{
   /*  
    * 1) Be careful with overflow. 
    * 2) lockedPageLimit and vm->memInfo.maxAllocation can be decreased below
    *    the current numLockedPages and vm->memInfo.locked
    * 3) lockedPageLimit.host can go lower than numLockedPages.
    */
   ASSERT(HostIF_GlobalLockIsHeld() &&
          (!checkVM || HostIF_VMLockIsHeld(vm)));

   if (checkVM) {
      /*
       * Check the per-vm limit.
       */
      ASSERT(HostIF_VMLockIsHeld(vm));
      if (vm->memInfo.admitted) {
	 if (vm->memInfo.maxAllocation <= vm->memInfo.locked) {
	    return FALSE;
	 } else if (vm->memInfo.maxAllocation - vm->memInfo.locked < numPages) {
	    return FALSE;
	 }
      }
   } else {
      /*
       * Check the global limit.
       */
      unsigned limit = Vmx86LockedPageLimit(vm);

      if (limit <= numLockedPages) {
	 return FALSE;
      } else if (limit - numLockedPages < numPages) {
	 return FALSE;
      }
   }
   return TRUE;
}



/*
 *----------------------------------------------------------------------
 *
 * Vmx86CowFreeID --
 *
 *       Mark reference count location id as unused in the COWPage ref
 *       count array.  
 *
 * Results:
 *       Clears ref count for all pages by vm with id.  
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static void
Vmx86COWFreeID(int id)
{ 
   int i,tmp;

   ASSERT(HostIF_GlobalLockIsHeld());

   /* deleting head of the list */
   if (id == cowIDsAllocated) {
      tmp = cowIDList[cowIDsAllocated];
      cowIDList[cowIDsAllocated] = cowIDsUnused;
      cowIDsAllocated = tmp;
      cowIDsUnused = id;
      return;
   }
      
   for (i = cowIDsAllocated; cowIDList[i] != INVALID_COWID; i = cowIDList[i]) {
      if (cowIDList[i] == id) {
         cowIDList[i] = cowIDList[id];
         cowIDList[id] = cowIDsUnused;
         cowIDsUnused = id;
         return;
      }
   }
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWAllocID --
 *
 *       Mark reference count location id as used in the COWPage ref
 *       count array.  
 *
 * Results:
 *       Allows vm to use returned index when ref counting COW pages.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static int 
Vmx86COWAllocID(void)
{
   int i;
   ASSERT(HostIF_GlobalLockIsHeld());
   i = cowIDsUnused;
   ASSERT(i != INVALID_COWID);
   cowIDsUnused = cowIDList[i];
   cowIDList[i] = cowIDsAllocated;
   cowIDsAllocated = i;
   return i;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWPageRef --
 *
 *       Determines the total ref count for a COW page.
 *
 * Results:
 *       ref count for all valid vms for this COW page
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static uint32
Vmx86COWPageRef(COWPage *entry)
{
   int i;
   uint32 sum = 0;
   
   if (entry != NULL) {
      for (i = cowIDsAllocated; i != INVALID_COWID; i = cowIDList[i]) {
         sum += entry->ref[i];
      }
   }
   return sum;
}

/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWPageIsStale --
 *
 *       Determines if COW page has a zero ref count and is NOT locked
 *       as a shared page.
 *
 * Results:
 *       TRUE if entry is referenced, FALSE otherwise.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static INLINE Bool
Vmx86COWPageIsStale(COWPage *entry)
{
   ASSERT(entry != NULL);
   return Vmx86COWPageRef(entry) == 0 && !entry->locked;
}

/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWPageIsReferenced --
 *
 *       Determines if COW page has a non-zero ref count for some
 *       running virtual machine.
 *
 * Results:
 *       TRUE if entry is referenced, FALSE otherwise.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static INLINE Bool
Vmx86COWPageIsReferenced(COWPage *entry)
{
   ASSERT(entry != NULL);
   return Vmx86COWPageRef(entry) != 0;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWAllocHint --
 *
 *       Create a COW hint and link it to the COW hint hash table.  
 *
 * Results:
 *       TRUE if the hint was created, FALSE otherwise.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static COWHint *
Vmx86COWAllocHint(VMDriver *vm, BPN bpn, uint64 key, MPN mpn)
{
   COWHint *entry; 
   unsigned index; 

   ASSERT(HostIF_GlobalLockIsHeld());
   if (cowState.numHints >= COW_MAX_HINTS) {
      return NULL;
   }
   entry = HostIF_AllocKernelMem(sizeof *entry, FALSE);
   if (entry != NULL) {
      index = (unsigned) key & COW_HINT_TABLE_MASK;
      entry->vm = vm;
      entry->bpn = bpn;
      entry->origMPN = mpn;
      entry->key = key;
      entry->next = cowHintTable[index];
      cowHintTable[index] = entry;
      cowState.numHints++;
   }
   return entry;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWFreeHint--
 *
 *       Deallocate a COW hint.  Prev must a pointer in the hash chain that
 *       points to hint.
 *
 * Results:
 *       Hint is deleted from hash table, kernel memory is returned. Returns
 *       the hint AFTER hint argument (possibly NULL).
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static COWHint *
Vmx86COWFreeHint(COWHint *hint, COWHint *prev, uint64 key)
{
   COWHint *nextHint = hint->next;
   ASSERT(HostIF_GlobalLockIsHeld());
   if (prev != NULL) {
      prev->next = hint->next;
   } else {
      cowHintTable[key & COW_HINT_TABLE_MASK] = hint->next;
   }
   memset(hint, 0, sizeof (COWHint));
   cowState.numHints--;
   HostIF_FreeKernelMem(hint);
   return nextHint;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWAllocPage--
 *
 *       Allocate a COW page.  Prev must a pointer in the hash chain that
 *       points to hint.
 *
 * Results:
 *       Hint is deleted from hash table, kernel memory is returned.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static COWPage *
Vmx86COWAllocPage(VMDriver *vm, uint64 key, Bool locked)
{
   COWPage *entry;
   unsigned index;
   MPN mpn;
   ASSERT(HostIF_GlobalLockIsHeld());
   /*
    * We limit the number of unique MPNs for COW to 20%
    * of host memory.  Since we use an extra MPN for cow pages
    * this limit is important to avoid swapping the host to 
    * death if the vm decides to break cow on all its pages.
    */
   if (cowState.uniqueMPNs >= Vmx86LockedPageLimit(vm) / 5 ||
       !Vmx86HasFreePages(vm, 1, FALSE) ||
       (mpn = HostIF_COWAllocPage()) == INVALID_MPN) {
      return NULL;
   }
   entry = HostIF_AllocKernelMem(sizeof *entry, FALSE);
   if (entry != NULL) {
      index = (unsigned) key & COW_PAGE_TABLE_MASK;
      entry->next = cowPageTable[index];
      cowPageTable[index] = entry;
      entry->key = key;
      memset((char *)entry->ref, 0, 4 * MAX_VMS);
      cowState.uniqueMPNs++;      
      cowState.totalUniqueMPNs++;
      numLockedPages++;
      entry->sharedMPN = mpn;
      entry->locked = locked;
   } else {
      HostIF_COWFreePage(mpn);
   }
   return entry;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWFreePage--
 *
 *       Deallocate a COW page.  Prev must a pointer in the hash chain that
 *       points to page.
 *
 * Results:
 *       Page is deleted from hash table, kernel memory is returned.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static void
Vmx86COWFreePage(COWPage *page, COWPage *prev, uint64 key)
{
   ASSERT(HostIF_GlobalLockIsHeld());
   ASSERT(!Vmx86COWPageIsReferenced(page));
   ASSERT(numLockedPages > 0);
   numLockedPages--;
   HostIF_COWFreePage(page->sharedMPN);
   if (prev != NULL) {
      prev->next = page->next;
   } else {
      cowPageTable[key & COW_PAGE_TABLE_MASK] = page->next;
   }
   memset(page, 0, sizeof (COWPage));
   cowState.uniqueMPNs--;
   HostIF_FreeKernelMem(page);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWMPNToKey --
 *
 *       Compute the hash of machine page MPN.
 *
 * Results:
 *       Reads mpn into buffer provided and computes 
 *       64 bit hash of page for use in COW.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static int
Vmx86COWMPNToKey(MPN mpn, char *buf, uint64 *key)
{
   int retval;
   retval = HostIF_ReadPage(mpn, (void *)buf, TRUE);
   if (retval == 0) {
      *key = Hash_Page(buf);
   }
   return retval;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWStats --
 *
 *       Dump stats about page sharing.  We identify the pages with
 *       the highest ref counts and report them to the vmx.
 *
 * Results:
 *       COW sharing statistics are filled into info.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static void
Vmx86COWStats(VMMemCOWInfo *info)
{
   COWPage *hot[VMMEM_COW_HOT_PAGES];
   COWPage *entry;
   COWPage *tmp;
   int i;
   int j,k;
   uint32 totalRef = 0;


   for (i = 0; i < VMMEM_COW_HOT_PAGES; i++) {
      hot[i] = NULL;
   }
   
   for (i = 0; i < COW_PAGE_TABLE_SIZE; i++) {
      entry = cowPageTable[i];
      while (entry != NULL) {
         if (Vmx86COWPageRef(entry) > Vmx86COWPageRef(hot[0])) {
            hot[0] = entry;
            for (j = VMMEM_COW_HOT_PAGES - 1; j >= 0; j--) {
               for (k = 0; k < j; k++) {
                  if (Vmx86COWPageRef(hot[k]) > Vmx86COWPageRef(hot[k + 1])) {
                     tmp = hot[k];
                     hot[k] = hot[k + 1];
                     hot[k + 1] = tmp;
                  }
               }
            }
         }
         totalRef += Vmx86COWPageRef(entry);
         entry = entry->next;
      }
   }
   for (i = 0; i < VMMEM_COW_HOT_PAGES; i++) {
      if (hot[i] != NULL) {
         info->hot[i].mpn = hot[i]->sharedMPN;
         info->hot[i].key = hot[i]->key;
         info->hot[i].ref = Vmx86COWPageRef(hot[i]);
      } else {
         info->hot[i].mpn = INVALID_MPN;
         info->hot[i].ref = 0;
         info->hot[i].key = 0;
      }
   }
   info->numRef = totalRef;
   info->numHints = cowState.numHints;
   info->uniqueMPNs = cowState.uniqueMPNs;
   info->numBreaks = cowState.numBreaks;
   info->totalUniqueMPNs = cowState.totalUniqueMPNs;
}

   
/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWPageTableWalk --
 *
 *       Walk the table of cow pages looking for key.
 *
 * Results:
 *       TRUE if hash is found.  Prev and entry pointers are set correctly.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static Bool
Vmx86COWPageTableWalk(uint64 key, COWPage **entry, COWPage **prev, 
                      COWPageWalkArgs args)
{
   ASSERT(HostIF_GlobalLockIsHeld());
   *entry = cowPageTable[key & COW_PAGE_TABLE_MASK];
   *prev = NULL;
   while (*entry != NULL) {
      if ((args == COWMatchKey && (*entry)->key == key) || 
          (args == COWMatchStale && Vmx86COWPageIsStale(*entry)) ||
          (args == COWMatchLocked && (*entry)->locked)) {
         return TRUE;
      }
      *prev = *entry;
      *entry = (*entry)->next;
   }
   return FALSE;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWHintTableWalk --
 *
 *       Walk the table of cow hints looking for key.
 *
 * Results:
 *       TRUE if hash is found.  Prev and entry pointers are set.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static Bool
Vmx86COWHintTableWalk(VMDriver *vm, uint64 key,
                      COWHint **entry, COWHint **prev,
                      Bool matchVM, Bool matchKey)
{
   ASSERT(HostIF_GlobalLockIsHeld());
   *entry = cowHintTable[key & COW_HINT_TABLE_MASK];
   *prev = NULL;
   while (*entry != NULL) {
      if (!HostIF_IsLockedByMPN((*entry)->vm, (*entry)->origMPN)) {
         *entry = Vmx86COWFreeHint(*entry, *prev, key);
      } else {
         if (((*entry)->key == key || !matchKey) &&
             ((*entry)->vm == vm || !matchVM)) {
            return TRUE;
         }
         *prev = *entry;
         *entry = (*entry)->next;
      }
   }
   return FALSE;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_COWRemoveHintLocked --
 *
 *       Searches for the COW hint for this mpn and and removes it from the hint 
 *       table.
 *
 * Results:
 *       Hint is removed.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

void
Vmx86RemoveHintLocked(VMDriver *vm, MPN mpn)
{
   COWHint *entry, *prev;
   uint64 key;

   ASSERT(HostIF_GlobalLockIsHeld());
   if (Vmx86COWMPNToKey(mpn, cowBuf, &key) == 0 &&
       Vmx86COWHintTableWalk(vm, key, &entry, &prev, TRUE, TRUE)) {
      // we found the key in the table
      if (entry->origMPN == mpn) {
         Vmx86COWFreeHint(entry, prev, key);
      }
   }
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_COWRemoveHint --
 *
 *       Acquires the global vmmon lock before removing a COW hint for
 *       mpn.
 *
 * Results:
 *       Hint is removed.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_COWRemoveHint(VMDriver *vm, MPN mpn)
{
   HostIF_GlobalLock(28);
   Vmx86RemoveHintLocked(vm, mpn);
   HostIF_GlobalUnlock(28);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWSharePage --
 *
 *       Attempt to guest page "bpnToShare" which corresponds to 
 *       machine page mpn.  We first try to share with an already
 *       existing COW page.  If no matching page exists, we check
 *       the hint table for a potential sharing opportunity.  If 
 *       no matching hint exists, we add a hint for this page for
 *       future sharing opportunities.  
 *
 * Results:
 *       Page may be shared, hint page may be added.  BpnToShare is
 *       INVALID_BPN if no sharing is possible and a hint cannot
 *       be added.  HintOnly Bool is TRUE iff a hint is added for this
 *       page.  MPN contains the potentially new MPN corresponding
 *       guest page "bpnToShare".  
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

static void
Vmx86COWSharePage(VMDriver *vm, BPN *bpnToShare, MPN *mpn,
                  Bool *hintOnly)
{
   COWPage *entry, *prev;
   COWHint *hint, *prevHint;
   uint64 key, matchKey;
   BPN bpn = *bpnToShare;
   PShare_HintUpdate hintUpdate;
   VMDriver *hintVM;
   
   ASSERT(HostIF_GlobalLockIsHeld());
   
   *bpnToShare = INVALID_BPN;
   *hintOnly = FALSE;
   
   if (bpn == INVALID_BPN) {
      // nothing to share for this page
      return;
   }

   /* We fill in cowContents here and reference it throughout this function. */
   if (Vmx86COWMPNToKey(*mpn, cowContents, &key) != 0) {
      return; // couldn't read mpn
   }

   Vmx86RemoveHintLocked(vm, *mpn);

   if (Vmx86COWPageTableWalk(key, &entry, &prev, COWMatchKey)) {
      /*
       * Match in the COW page table.  Do a full comparison and
       * share the page.  If a false match occurs, we abandon this
       * page altogether without trying to share it further.  
       * Note that we have already read *mpn into cowContents when
       * we computed the hash key.
       */
      ASSERT(entry->sharedMPN != INVALID_MPN);
      if (HostIF_ReadPage(entry->sharedMPN, cowBuf, TRUE) == 0 &&
	  memcmp(cowBuf, cowContents, PAGE_SIZE) == 0) {
         HostIF_VMLock(vm, 25);
         entry->ref[vm->cowID]++;
         vm->memInfo.shared++;
         HostIF_VMUnlock(vm, 25);
         *mpn = entry->sharedMPN;
         *bpnToShare = bpn;
      }
   } else if (Vmx86COWHintTableWalk(vm, key, &hint, &prevHint,
                                    FALSE, TRUE)) {
      /*
       * Match in the hint table.  Refresh the hint hash to make sure
       * it is still valid.  If it is, create a shared page and queue
       * an update to the hint owner to try sharing the page again.
       */
      ASSERT(HostIF_IsLockedByMPN(hint->vm, hint->origMPN));
      hintVM = hint->vm;
      if (hintVM->hintIndex < PSHARE_HINT_UPDATES_MAX &&
          Vmx86COWMPNToKey(hint->origMPN, cowBuf, &matchKey) == 0) {
         hintUpdate.bpn = hint->bpn;
         if (matchKey != hint->key) {
            hintUpdate.status = PSHARE_HINT_STALE;
            if (Vmx86COWAllocHint(vm, bpn, key, *mpn) != NULL) {
               *hintOnly = TRUE;
               *bpnToShare = bpn;
            }
         } else {
            hintUpdate.status = PSHARE_HINT_MATCH;
            if ((entry = Vmx86COWAllocPage(vm, key, FALSE)) != NULL) {
               if (HostIF_WritePage(entry->sharedMPN, cowContents, TRUE) == 0) {
                  Vmx86COWFreeHint(hint, prevHint, key);
                  HostIF_VMLock(vm, 25);
                  entry->ref[vm->cowID]++;
                  vm->memInfo.shared++;
                  HostIF_VMUnlock(vm, 25);
                  *mpn = entry->sharedMPN;
                  *bpnToShare = bpn;
               }
            }
         }
         HostIF_VMLock(hintVM, 25);
         hintVM->hintUpdate[hintVM->hintIndex++] = hintUpdate;
         HostIF_VMUnlock(hintVM, 25);
      }
   } else {
      /* 
       * No match in either the hint table or page table.
       * Add a hint for this page for future sharing.
       */
      if (Vmx86COWAllocHint(vm, bpn, key, *mpn) != NULL) {
         *hintOnly = TRUE;
         *bpnToShare = bpn;
      }
   }
}

/*
 *----------------------------------------------------------------------
 *
 * Vmx86COWFreeAllResources --
 *
 *      Clean up COW state associated with this vm.  
 *
 * Results:
 *      Free all hints and pages associated with vm.
 *
 * Side effects:
 *       None.
 *
 *----------------------------------------------------------------------
 */

static void
Vmx86COWFreeAllResources(VMDriver *vm)
{
   COWHint *hint, *prevHint;
   COWPage *entry, *prev;
   int i;

   if (vm->cowID != INVALID_COWID) {
      ASSERT(HostIF_GlobalLockIsHeld());     
      for (i = 0; i < COW_HINT_TABLE_SIZE; i++) {
         while(Vmx86COWHintTableWalk(vm, i, &hint, &prevHint, TRUE, FALSE)) {
            Vmx86COWFreeHint(hint, prevHint, hint->key);
         }
      }

      /*
       * We could reuse the cowID for this vm so we need to explicitly clear
       * the ref counts in the hash table.
       */
      for (i = 0; i < COW_PAGE_TABLE_SIZE; i++) {
         entry = cowPageTable[i];
         while (entry != NULL) {
            entry->ref[vm->cowID] = 0;
            entry = entry->next;
         }
      }

      /*
       * Now we can reap unreferenced pages.
       */
      for (i = 0; i < COW_PAGE_TABLE_SIZE; i++) {
         while(Vmx86COWPageTableWalk(i, &entry, &prev, COWMatchStale)) {
            Vmx86COWFreePage(entry, prev, entry->key);
         }
      }
   }
}


#ifdef VMX86_DEBUG

/*
 *----------------------------------------------------------------------
 *
 * Vmx86VMIsRegistered --
 *
 *      Check if "vm" is on the list of VMDrivers.
 *
 * Results:
 *      Return TRUE iff "vm" is on the list of VMDrivers.
 *
 * Side effects:
 *      none.
 *
 *----------------------------------------------------------------
 */

static Bool
Vmx86VMIsRegistered(VMDriver *vm, Bool needsLock) 
{
   VMDriver *tmp;
   Bool      found = FALSE;

   ASSERT(needsLock || HostIF_GlobalLockIsHeld());

   if (needsLock) {
      HostIF_GlobalLock(5);
   }

   for (tmp = vmDriverList; tmp != NULL; tmp = tmp->nextDriver) {
      if (tmp == vm) {
         found = TRUE;
         break;
      }
   }

   if (needsLock) {
      HostIF_GlobalUnlock(5);
   }

   return found;
}

#endif

/*
 *----------------------------------------------------------------------
 *
 * Vmx86RegisterVMOnList --
 *
 *      Add a VM to the list of registered VMs and increment
 *      the count of VMs.
 *
 * Results:
 *      none.
 *
 * Side effects:
 *      Add VM to linked list.
 *	Increment count of VMs.
 *
 *----------------------------------------------------------------
 */

static void
Vmx86RegisterVMOnList(VMDriver *vm) 
{
   VMDriver **vmp;

   ASSERT(HostIF_GlobalLockIsHeld());
   vmCount++;
   ASSERT(vm->cowID == INVALID_COWID);
   vm->cowID = Vmx86COWAllocID();
   vm->count++;
   for (vmp = &vmDriverList; *vmp != NULL; vmp = &(*vmp)->nextDriver) {
      if (*vmp == vm) {
         Warning("VM %p already registered on the list of VMs.\n", vm);
         return;
      }
   }
   *vmp = vm;
}

/*
 *----------------------------------------------------------------------
 *
 * Vmx86DeleteVMFromList --
 *
 *      Delete a VM from the list of registered VMs and decrement
 *      the count of VMs. This function should be called on any
 *      VM registered on the VMDriverList before invoking 
 *      Vmx86FreeAllVMResources to free its memory.
 *
 * Results:
 *      none.
 *
 * Side effects:
 *      Remove VM from linked list.
 *	Decrement count of VMs.
 *
 *----------------------------------------------------------------
 */

static void
Vmx86DeleteVMFromList(VMDriver *vm) 
{
   VMDriver **vmp;

   ASSERT(HostIF_GlobalLockIsHeld());
   for (vmp = &vmDriverList; *vmp != vm; vmp = &(*vmp)->nextDriver) {
      if (*vmp == NULL) {
         Warning("VM %p is not on the list of registered VMs.\n", vm);
         return;
      }
   }
   *vmp = vm->nextDriver;
   vmCount--;

   Vmx86COWFreeAllResources(vm);
   Vmx86COWFreeID(vm->cowID);   
   numLockedPages -= vm->memInfo.locked;
}

/*
 *----------------------------------------------------------------------
 *
 * Vmx86FreeAllVMResources
 *
 *     Free the resources allocated for a vm that is not registered
 *     on the VMDriverList.  Except in the case of Vmx86_CreateVM(), 
 *     this should be called only after a call to Vmx86DeleteVMFromList().
 *
 * Results:
 *      None
 *
 * Side effects:
 *      Memory freed.
 *
 *----------------------------------------------------------------------
 */

static void
Vmx86FreeAllVMResources(VMDriver *vm)
{
   ASSERT(!HostIF_GlobalLockIsHeld());
   if (vm) {   
      ASSERT(!Vmx86VMIsRegistered(vm, TRUE));

#ifdef USE_PERFCTRS_HOSTED
      /*
       * Can't log in PerfCtr_Release() when we're holding the lock
       * and (in any case) the VM has be dequeued.
       */
      PerfCtr_Release(vm, FALSE);
#endif

      Vmx86_SetHostClockRate(vm, 0);

      HostIF_FreeAllResources(vm);

      if (vm->vmhost != NULL) {
         HostIF_FreeKernelMem(vm->vmhost);
      }
      HostIF_FreeKernelMem(vm);
   }
}

/*
 *----------------------------------------------------------------------
 *
 * Vmx86_COWCopyPage --
 *
 *       Break COW sharing on sharedMPN.  If sharedMPN is no longer
 *       referenced, it is returned to the host.
 *
 * Results:
 *       Decrement ref count on sharedMPN and potentially free the 
 *       shared page.
 *
 * Side effects:
 *       None.
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_COWCopyPage(VMDriver *vm, MPN sharedMPN)
{
   COWPage *entry, *prev;
   uint64 key;

   HostIF_GlobalLock(26);
   HostIF_VMLock(vm, 26);
   if (Vmx86COWMPNToKey(sharedMPN, cowBuf, &key) == 0 &&
       Vmx86COWPageTableWalk(key, &entry, &prev, COWMatchKey)) {
      if (sharedMPN == entry->sharedMPN) {
         ASSERT(entry->ref[vm->cowID] > 0);
         entry->ref[vm->cowID]--;
         vm->memInfo.shared--;
         cowState.numBreaks++;
         if (Vmx86COWPageIsStale(entry)) {
            Vmx86COWFreePage(entry, prev, key);
         }
      }
   }
   HostIF_VMUnlock(vm, 26);
   HostIF_GlobalUnlock(26);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_COWSharePages --
 *
 *      Attempt to share page list described by info.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      VCPU thread continues.
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_COWSharePages(VMDriver *vm, COWShareInfo *info)
{
   int i;
   static char buf[PAGE_SIZE];
   PShare_List *list;
   HostIF_GlobalLock(25);
   if (HostIF_ReadPage(info->pshareMPN, buf, TRUE) == 0) {
      list = (PShare_List *)buf;
      for (i = 0; i < info->numPages; i++) {
         Vmx86COWSharePage(vm, &list->bpnList[i], &list->mpnList[i],
                           &list->hintOnlyList[i]);
      }
      if (HostIF_WritePage(info->pshareMPN, buf, TRUE) != 0) {
         info->shareFailure = TRUE;
       }
   } else {
       info->shareFailure = TRUE;
   }

   // hints waiting for us from other vms?
   info->updateHints = vm->hintIndex > 0;
   HostIF_GlobalUnlock(25);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_COWGetHintUpdates --
 *
 *      Attempt to get hint updates.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Moon turns to cheese.
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_COWGetHintUpdates(VMDriver *vm, COWHintInfo *info)
{
   unsigned numHints;
   ASSERT(info->nUpdates <= PSHARE_HINT_BATCH_PAGES_MAX);
   HostIF_VMLock(vm, 23);
   numHints = MIN(vm->hintIndex, info->nUpdates);
   memcpy(info->updates, vm->hintUpdate + vm->hintIndex - numHints, 
          numHints * sizeof(PShare_HintUpdate));
   vm->hintIndex = vm->hintIndex - numHints;
   info->nUpdates = numHints;
   HostIF_VMUnlock(vm, 23);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_COWCheckPages --
 *
 *      Verifies the consistency of memory pages.  This allows the monitor
 *      react to potential memory corruption or a disagreement in cow state
 *      between the vmm and vmmon.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Warnings/messages could be spewed if memory corruption is found.
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_COWCheckPages(VMDriver *vm, COWCheckInfo *info)
{
   unsigned i;

   for(i = 0; i < info->numPages; i++) {
      COWPage *entry, *prev;
      uint64 key;
      PShare_COWCheckInfo *check = &info->check[i];
      check->checkOK = TRUE;
      HostIF_GlobalLock(29);
      if (Vmx86COWMPNToKey(check->vmmMPN, cowBuf, &key) == 0 &&
          Vmx86COWPageTableWalk(key, &entry, &prev, COWMatchKey)) {
         if (entry->sharedMPN == check->vmmMPN &&
            entry->ref[vm->cowID] > 0) {
            check->hostCOW = TRUE;
            check->hostMPN = entry->sharedMPN;
         } else {
            check->hostCOW = FALSE;
            check->hostMPN = check->vmmMPN;  
         }
      } else {
         check->hostCOW = FALSE;
         check->hostMPN = check->vmmMPN;  
      }
      HostIF_GlobalUnlock(29);
   }
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_COWGetZeroMPN --
 *
 *      Retrieve shared mpn for the zero page.  
 *
 * Results:
 *      Single zero mpn.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

MPN
Vmx86_COWGetZeroMPN(void)
{
   uint64 key;
   COWPage *entry, *prev;
   MPN mpn = INVALID_MPN;

   HostIF_GlobalLock(33);
   memset(cowBuf, 0, PAGE_SIZE);
   key = Hash_Page(cowBuf);
   if (Vmx86COWPageTableWalk(key, &entry, &prev, COWMatchKey)) {
      if (HostIF_ReadPage(entry->sharedMPN, cowContents, TRUE) == 0 &&
          memcmp(cowBuf, cowContents, PAGE_SIZE) == 0) {
         mpn = entry->sharedMPN;
      }
   }
   HostIF_GlobalUnlock(33);
   return mpn;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_InitCOWList --
 *
 *       Set up the list of remaining cowID's.  These index into the ref
 *       count field of the cow page structure.
 *
 * Results:
 *       Sets up global data.
 *
 * Side effects:
 *       None
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_InitCOWList(void)
{
   int i, ret;
   uint64 key;
   COWPage *entry;

   HostIF_GlobalLock(32);
   for (i = 0; i < MAX_VMS; i++) {
      cowIDList[i] = i + 1;
   }
   cowIDList[MAX_VMS - 1] = INVALID_COWID;
   cowIDsUnused = 0;
   cowIDsAllocated = INVALID_COWID;

   /*
    * Always keep a locked reference to the zero page.
    */
   memset(cowBuf, 0, PAGE_SIZE);
   key = Hash_Page(cowBuf);
   entry = Vmx86COWAllocPage(NULL, key, TRUE);
   if (entry) {
      ret = HostIF_WritePage(entry->sharedMPN, cowBuf, TRUE);
      ASSERT_NOT_IMPLEMENTED(ret == 0);
   }
   HostIF_GlobalUnlock(32);
}


void
Vmx86_COWCleanup(void)
{
   COWPage *entry, *prev;
   int i;
   HostIF_GlobalLock(34);
   for (i = 0; i < COW_PAGE_TABLE_SIZE; i++) {
      while(Vmx86COWPageTableWalk(i, &entry, &prev, COWMatchLocked)) {
         Vmx86COWFreePage(entry, prev, entry->key);
      }
   }

   // all entries should be gone now!
   for (i = 0; i < COW_PAGE_TABLE_SIZE; i++) {
      while (Vmx86COWPageTableWalk(i, &entry, &prev, COWMatchKey)) {
         ASSERT(FALSE);
      }
   }
   HostIF_GlobalUnlock(34);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86ReserveFreePages --
 *
 *       Returns TRUE and increases locked page counts if the vm can lock 
 *       more pages.  This is true if we are below the host's hard memory 
 *       limit and this vm has not exceeded its maximum allocation.
 *       The function is thread-safe.
 *
 * Results:
 *       TRUE if pages are reserved for locking, FALSE otherwise
 *
 * Side effects:
 *       The global lock and VM's lock are acquired and released.
 *
 *----------------------------------------------------------------------
 */

static Bool
Vmx86ReserveFreePages(VMDriver *vm, 
		      unsigned int numPages)
{
   Bool retval = FALSE;
   int retries = 3;

   ASSERT(vm);
   
   for (retries = 3; !retval && (retries > 0); retries--) {
      HostIF_GlobalLock(17);
      HostIF_VMLock(vm, 0);
      
      // Check VM's limit and don't wait.
      retval = Vmx86HasFreePages(vm, numPages, TRUE);
      if (!retval) {
         HostIF_VMUnlock(vm, 0);
         HostIF_GlobalUnlock(17);
	 break;
      } else {
	 // Wait to satisfy the global limit.
	 retval = Vmx86HasFreePages(vm, numPages, FALSE);
	 if (retval) {
	    numLockedPages += numPages;
	    vm->memInfo.locked += numPages;
            HostIF_VMUnlock(vm, 0);
	    HostIF_GlobalUnlock(17);
	    break;
	 } else {
            /*
             * There are not enough pages -- drop the locks and wait for 
             * the host and/or other VMs to produce free pages.
	     */ 
            HostIF_VMUnlock(vm, 0);
	    HostIF_GlobalUnlock(17);
	    HostIF_WaitForFreePages(10);
	 }
      }
   }
   return retval;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86UnreserveFreePages --
 *
 *       Decreases the global and VM's locked page counts. 
 *       The function is thread-safe. 
 *
 * Results:
 *       void
 *
 * Side effects:
 *       The global lock and VM's lock are acquired and released.
 *
 *----------------------------------------------------------------------
 */

static void
Vmx86UnreserveFreePages(VMDriver *vm, 
			unsigned int numPages)
{
   ASSERT(vm);

   HostIF_GlobalLock(18);
   HostIF_VMLock(vm, 1);

   ASSERT(numLockedPages >= numPages);
   ASSERT(vm->memInfo.locked >= numPages);

   numLockedPages -= numPages;
   vm->memInfo.locked -= numPages;

   HostIF_VMUnlock(vm, 1);
   HostIF_GlobalUnlock(18);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_CreateVM --
 *
 *      Allocate and initialize a driver structure for a virtual machine.
 *
 * Results:
 *      VMDriver structure or NULL on error.
 *
 * Side effects:
 *       Memory allocated.
 *
 *----------------------------------------------------------------------
 */

VMDriver *
Vmx86_CreateVM(void *uniqHandle,             // IN: Unique id for VM to be
                                             //     created
	       uintptr_t processId)          // IN: Process creating VM
{
   VMDriver *vm;
   Vcpuid v;

   vm = HostIF_AllocKernelMem(sizeof *vm, TRUE);
   if (vm == NULL) {
      return NULL;
   }
   memset(vm, 0, sizeof *vm);

   ASSERT(sizeof (uintptr_t) == sizeof vm);
   vm->id = (unsigned)(uintptr_t)vm >> 1;
   vm->uniqHandle = uniqHandle;
   vm->processID = processId;
   vm->memInfo.admitted = FALSE;
   vm->hintIndex = 0;
   vm->cowID = INVALID_COWID;
   for (v = 0; v < MAX_INITBLOCK_CPUS; v++) {
      vm->currentHostCpu[v] = INVALID_HOST_CPU;
   }

   if (HostIF_Init(vm)) {
      goto cleanup;
   }

   HostIF_GlobalLock(0);

   if (vmCount >= MAX_VMS) {
      HostIF_GlobalUnlock(0);
      goto cleanup;
   }

   Vmx86RegisterVMOnList(vm);

   HostIF_GlobalUnlock(0);

   return vm;

cleanup:
   /* 
    * The VM is not on a list, "vmCount" has not been incremented,
    * "vm->cowID" is INVALID_COWID, and either the VM's mutex hasn't
    * been initialized or we've only taken the global lock and checked
    * a counter since, so we know that the VM has not yet locked any
    * pages.
    */
   ASSERT(vm->memInfo.locked == 0);
   Vmx86FreeAllVMResources(vm);
   return NULL;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_BindVM  --
 *
 *      Bind to an existing VM.
 *
 * Results:
 *      VMDriver structure or NULL on error.
 *
 * Side effects:
 *	Increment VM reference count.
 *
 *----------------------------------------------------------------------
 */

VMDriver *
Vmx86_BindVM(int id)
{
   VMDriver *vm;

   HostIF_GlobalLock(14);
   for (vm = vmDriverList; vm != NULL; vm = vm->nextDriver) {
      if (vm->id == id) {
	 vm->count++;
	 break;
      }
   }
   HostIF_GlobalUnlock(14);
   return vm;
}

/*
 *----------------------------------------------------------------------
 *
 * Vmx86_ReleaseVM  --
 *
 *      Release a VM (either created here or from a bind).
 *
 * Results:
 *      zero if successful
 *
 * Side effects:
 *	Decrement VM reference count.
 *      Release resources (those that are left) when count reaches 0.
 *
 *----------------------------------------------------------------------
 */
int
Vmx86_ReleaseVM(VMDriver *vm)
{
   HostIF_GlobalLock(1);

   /*
    * Do reference counting first
    */

   if (--vm->count > 0) {
      HostIF_GlobalUnlock(1);
      return 0;
   }

   ASSERT(vm->count == 0);

   Vmx86DeleteVMFromList(vm);
   HostIF_GlobalUnlock(1);
   Vmx86FreeAllVMResources(vm);

   return 0;
}

/*
 *------------------------------------------------------------------------------
 *
 * Vmx86_InitVM --
 *
 *    Initializaiton of the VM.  Expects all initial arguments
 *    to be part of the InitBlock structure.
 *
 * Results:
 *    0 on success
 *    != 0 on failure
 *
 * Side effects:
 *    Many
 *
 *------------------------------------------------------------------------------
 */

int
Vmx86_InitVM(VMDriver *vm,          // IN
             InitBlock *initParams) // IN: Initial params from the VM
{
   int retval;

   if (initParams->magicNumber != INIT_BLOCK_MAGIC) {
      Warning("Bad magic number for init block 0x%x\n", initParams->magicNumber);
      return 1;
   }
   if (initParams->numVCPUs >= MAX_INITBLOCK_CPUS) {
      Warning("Too many VCPUs for init block %d\n", initParams->numVCPUs);
      return 1;
   }
   vm->numVCPUs = initParams->numVCPUs;

   HostIF_InitFP(vm);
   HostIF_InitEvent(vm);

   /*
    * Initialize the driver's part of the cross-over page used to
    * talk to the monitor
    */

   retval = Task_InitCrosspage(vm, initParams);
   if (retval) {
      Warning("Task crosspage init died with retval=%d\n", retval);
      /*
       *  Note that any clean-up of resources will be handled during
       *  power-off when Vmx86_ReleaseVM() is called as part of
       *  MonitorLoop_PowerOff(). 
       */
      return 1;
   }   

   /*
    *  Check if we want to arbitrarily fail every N VM initializations.
    *  Useful in testing PR 72482.
    */
   if (initParams->vmInitFailurePeriod != 0) {
      static uint32 counter = 0;
      if ((++counter) % initParams->vmInitFailurePeriod == 0) {
         Warning("VM initialization failed on %d iteration\n", counter);
         return 1;
      }
   }

   return 0;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_LateInitVM --
 *
 *      Do late initialization of the driver.
 *	This should be called after Vmx86_CreateVM and
 *	after all the user-level device initialization.
 *
 * Results: 
 *	non-zero on error, zero on success;
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

int
Vmx86_LateInitVM(VMDriver *vm)
{
   return 0;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_SetStartTime --
 *
 *      Initial the data structures that track the starting time of the
 *      virtual machine.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      none
 *
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_SetStartTime(VmTimeStart *st)	// OUT: return value
{
   uintptr_t flags;

   SAVE_FLAGS(flags);
   CLEAR_INTERRUPTS();

   st->count = RDTSC();
   st->time = HostIF_ReadUptime();

   RESTORE_FLAGS(flags);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_GetkHzEstimate
 *
 *      Return an estimate the of the processor's kHz rating, based on
 *      the ratio of the cycle counter and system uptime since the
 *      driver was loaded.
 *      This function could be called (on Windows) at IRQL DISPATCH_LEVEL.
 *
 * Results:
 *      Processor speed in kHz.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

uint32
Vmx86_GetkHzEstimate(VmTimeStart *st)	// IN: start time
{
   uint64 cDiff, tDiff, freq, hz;
   uintptr_t tmp;
   static uint32 kHz; 

   /* 
    * Cache and return the first result for consistency. 
    * TSC values can be changed without notification.
    * TSC frequency can be vary too (SpeedStep, slowing clock on HALT, etc.)
    */
   if (kHz != 0) {
      return kHz;
   }

   SAVE_FLAGS(tmp);
   CLEAR_INTERRUPTS();

   cDiff = RDTSC() - st->count;
   tDiff = HostIF_ReadUptime() - st->time;

   RESTORE_FLAGS(tmp);

   if (tDiff == 0) {
      goto failure;
   }

   /*
    * Compute the CPU speed in kHz, which is cDiff / (tDiff /
    * HostIF_UptimeFrequency()) / 1000.  We need to do the computation
    * carefully to avoid overflow or undue loss of precision.  Also,
    * on Linux we can't do a 64/64=64 bit division directly, as the
    * gcc stub for that is not linked into the kernel.
    */
   freq = HostIF_UptimeFrequency();
#if defined VM_X86_64 || !defined linux
   while (cDiff > ((uint64) -1) / freq) {
      cDiff >>= 1;
      tDiff >>= 1;
   }
   hz  = (cDiff * freq) / tDiff;
   kHz = (uint32) ((hz + 500) / 1000);
#else
   {
      uint32 tmpkHz;
      /* On Linux we can't do a 64/64=64 bit division, as the gcc stub
       * for that is not linked into the kernel.  We'll assume that cDiff
       * * freq fits into 64 bits and that tDiff fits into 32 bits.  This
       * is safe given the values used on Linux.
       */
      Div643264(cDiff * freq, tDiff, &hz, &tmp);
      hz += 500;
      /*
       * If result in kHz cannot fit into 32 bits, we would get a divide
       * by zero exception.
       */
      if ((uint32)(hz >> 32) >= 1000) {
         goto failure;
      }
      Div643232(hz, 1000, &tmpkHz, &tmp);
      kHz = tmpkHz;
   }
#endif
   return kHz;

failure:
#ifdef VMW_HAS_CPU_KHZ
   /* If we have some reasonable value, use it... */
   kHz = cpu_khz;
#endif
   return kHz;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_SetHostClockRate --
 *
 *      The monitor wants to poll for events at the given rate. If no VM
 *      is specified, then 'rate' is ignored and the last set rate is set
 *      again.
 *
 * Results:
 *      0 for success, host-specific error code for failure.
 *
 * Side effects:
 *      May increase the host timer interrupt rate, etc.
 *
 *----------------------------------------------------------------------
 */

int
Vmx86_SetHostClockRate(VMDriver *vm,  // IN: VM instance pointer
                       int rate)      // IN: rate in Hz
{
   unsigned newGlobalRate;
   VMDriver *cur;
   int retval = 0;

   if (!vm) {
      Log("Resetting last set host clock rate of %d\n", globalFastClockRate);
      HostIF_FastClockLock(0);
      retval = HostIF_SetFastClockRate(globalFastClockRate);
      HostIF_FastClockUnlock(0);
      return retval;
   }

   if (vm->fastClockRate == rate) {
      return retval;
   }

   vm->fastClockRate = rate;

   /*
    * Loop through all vms to find new max rate.
    */
   newGlobalRate = 0;
   HostIF_FastClockLock(2);
   HostIF_GlobalLock(19);
   for (cur = vmDriverList; cur != NULL; cur = cur->nextDriver) {
      if (cur->fastClockRate > newGlobalRate) {
         newGlobalRate = cur->fastClockRate;
      }
   }
   HostIF_GlobalUnlock(19);

   if (newGlobalRate != globalFastClockRate) {
      Log("host clock rate change request %d -> %d\n",
          globalFastClockRate, newGlobalRate);
      globalFastClockRate = newGlobalRate;
      retval = HostIF_SetFastClockRate(globalFastClockRate);
   }
   HostIF_FastClockUnlock(2);
   
   return retval;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_MonitorPollIPI --
 *
 *      Check for VCPUs that are in the monitor and need an IPI to
 *      fire their next MonitorPoll callback.  Should be called once
 *      per fast timer interrupt if the fast timer is in use.
 *      Otherwise does not need to be called at all, as the normal
 *      timer interrupts will wake up MonitorPoll often enough.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      May send IPIs.
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_MonitorPollIPI(void)
{
   VMDriver *vm;
   VmAbsoluteTS rNow = RDTSC();

   /*
    * Loop through all vms -- needs the global lock to protect vmDriverList.
    */
   HostIF_GlobalLock(21);
   for (vm = vmDriverList; vm != NULL; vm = vm->nextDriver) {
      VMCrossPage *crosspage;
      VmAbsoluteTS pNow;
      Vcpuid v, vv;
      VCPUSet firstVCS;
      VmAbsoluteTS t, firstTS;

      /*
       * Convert real TSC to pseudo TSC.  This code will have to
       * change when/if we start to support MP hosts with
       * unsynchronized TSCs (see PR 20499), but for now it's OK to
       * just use the tscAdjustment from VCPU 0's crosspage.
       */
      crosspage = vm->crosspage[0];
      if (!crosspage) {
         continue;  // VCPU is not initialized yet
      }
      pNow = rNow + crosspage->tscAdjustment;

      /*
       * Loop through VCPUs in this VM to find the most up-to-date
       * copy of <monitorPollFirstTS, monitorPollFirstVCPU>.
       */
      vv = 0;
      firstVCS = crosspage->monitorPollFirstVCS;
      firstTS = crosspage->monitorPollFirstTS;
      for (v = 1; v < vm->numVCPUs; v++) {
         crosspage = vm->crosspage[v];
         if (!crosspage) {
            continue;  // VCPU is not initialized yet
         }
         t = crosspage->monitorPollFirstTS;
         if (t && COMPARE_TS(t, >, firstTS)) {
            firstVCS = crosspage->monitorPollFirstVCS;
            firstTS = t;
            vv = v;  // remember where we found it
         }
      }
         
      /*
       * Check if it's time to send an IPI
       */
      if (!VCPUSet_IsEmpty(firstVCS) && COMPARE_TS(pNow, >, firstTS)) {
         Bool sentIPI = HostIF_IPI(vm, firstVCS, FALSE);
         if (sentIPI) {
            vm->crosspage[vv]->monitorPollFirstVCS = VCPUSet_Empty(); // done
#if defined(linux)
            // On Linux, HostIF_IPI is always a broadcast, so there is no
            // point in doing it for more than one VM.
            break;
#endif
         }
      }
   }
   HostIF_GlobalUnlock(21);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_CurrentVM --
 *
 *      Return the VMDriver structure associated with
 *      the handle
 *
 *
 * Results:
 *      VMDriver * structure.NULL if not available
 *
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */
VMDriver *
Vmx86_CurrentVM(void *uniqHandle)
{
  VMDriver *vm;

  HostIF_GlobalLock(2);

  vm = vmDriverList;

  while (vm) {
     if (vm->uniqHandle == uniqHandle) {
        break;
     }
     vm = vm->nextDriver;
  }

  HostIF_GlobalUnlock(2);

  return vm;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_GetVMForProcess  --
 *
 *      Return the VMDriver structure associated with
 *      a process
 *
 *
 * Results:
 *      VMDriver * structure.NULL if not available
 *
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

VMDriver *
Vmx86_GetVMforProcess(uintptr_t processId)
{
  VMDriver *vm;

  HostIF_GlobalLock(3);

  vm = vmDriverList;

  while (vm) {
    if (vm->processID == processId) {
       break;
    }
    vm = vm->nextDriver;
  }

  HostIF_GlobalUnlock(3);

  return vm;
}

/*
 *----------------------------------------------------------------------
 *
 * Vmx86_GetNumVMs  --
 *
 *      Return the number of VMs.
 *
 * Results:
 *      The number of VMs.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */
int32
Vmx86_GetNumVMs()
{
   return vmCount;
}

int32
Vmx86_GetTotalMemUsage()
{
   VMDriver *vm;
   int totalmem = 0;

   HostIF_GlobalLock(15);
   vm = vmDriverList;

   for (vm = vmDriverList; vm != NULL; vm = vm->nextDriver) {
      /*
       * The VM lock is not strictly necessary as the vm will
       * stay on the list until we release the global lock and
       * because of order in which "admitted" and "mainMemSize"
       * are set when each VM is admitted.
       */
      if (vm->memInfo.admitted) {
          totalmem += PAGES_2_MBYTES(ROUNDUP(vm->memInfo.mainMemSize,
                                             MBYTES_2_PAGES(1)));
      }
   }
   
   HostIF_GlobalUnlock(15);
   return totalmem;
}

static INLINE unsigned
Vmx86MinAllocationFunc(unsigned paged, unsigned nonpaged, unsigned memPct)
{
   return (memPct * paged) / 100 + nonpaged;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86MinAllocation --
 *
 *      Computes the minimum number of pages that must be allocated to a
 *      specific vm.  The minAllocation for a vm is defined as 
 *      some percentage of guest memory plus 100% of nonpagable (overhead) 
 *      memory.  
 * 
 * Results:
 *	The minAllocation for this vm.  
 *	
 *
 * Side effects:
 *      Analyzes the vm info, requiring the vm lock.
 *
 *----------------------------------------------------------------------
 */

static INLINE unsigned
Vmx86MinAllocation(VMDriver *vm, unsigned memPct) {
   ASSERT(HostIF_VMLockIsHeld(vm));
   return Vmx86MinAllocationFunc(vm->memInfo.paged, vm->memInfo.nonpaged,
                                 memPct);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86CalculateGlobalMinAllocation --
 *
 *      Computes the sum of minimum allocations of each vm assuming a given
 *      percentage of guest memory must fit within host RAM. 
 *      
 * Results:
 *	Number of pages that must fit within host ram for a given overcommit
 *      level.
 *	
 *
 * Side effects:
 *      None. The actual minAllocations of each vm are NOT updated during
 *      this computation.
 *
 *----------------------------------------------------------------------
 */

static unsigned
Vmx86CalculateGlobalMinAllocation(unsigned memPct)
{
   VMDriver *vm;
   unsigned minAllocation = 0;
   
   ASSERT(HostIF_GlobalLockIsHeld());
   /* Pages of other vms required to fit inside the hard limit. */
   for (vm = vmDriverList; vm; vm = vm->nextDriver) {  
      HostIF_VMLock(vm, 2);
      if (vm->memInfo.admitted) {
         minAllocation += Vmx86MinAllocation(vm, memPct);
      }
      HostIF_VMUnlock(vm, 2);
   }
   return minAllocation;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86UpdateMinAllocations --
 *
 *      Updates the minimum allocation for each vm based on the global
 *      overcommitment percentage. 
 * 
 * Results:
 *      minAllocations for vms are changed.
 *	
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

static INLINE_SINGLE_CALLER void
Vmx86UpdateMinAllocations(unsigned memPct)
{
   VMDriver *vm;
   ASSERT(HostIF_GlobalLockIsHeld());
   /* Pages of other vms required to fit inside the hard limit. */
   for (vm = vmDriverList; vm; vm = vm->nextDriver) {
      HostIF_VMLock(vm, 3);
      if (vm->memInfo.admitted) {
         vm->memInfo.minAllocation = Vmx86MinAllocation(vm, memPct);
      }
      HostIF_VMUnlock(vm, 3);
   }
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_SetConfiguredLockedPagesLimit --
 *
 *      Set the user defined limit on the number of pages that can
 *      be locked.  This limit can be raised at any time but lowered
 *      only if at most 1 vm is running(powering on).  This is the case
 *      to avoid having a user lower the limit as vms are running and
 *      inadvertently cause these vms to crash as they are starved of
 *      memory.  
 *      
 *
 * Results:
 *      Returns TRUE on success and FALSE on failure to set the limit
 *
 * Side effects:
 *      Hard limit may be changed.
 *
 *----------------------------------------------------------------------
 */

Bool
Vmx86_SetConfiguredLockedPagesLimit(unsigned limit)
{
   Bool retval = FALSE;

   HostIF_GlobalLock(4);
   if (limit >= lockedPageLimit.configured || vmCount == 1) {
      lockedPageLimit.configured = limit;
      retval = TRUE;
   }
   HostIF_GlobalUnlock(4);

   return retval;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_SetDynamicLockedPageLimit --
 *
 *      Set the dynamic locked page limit.  This limit is determined by
 *      authd in response to host pressure.  It can be both raised and
 *      lowered at any time.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Hard limit may be changed.
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_SetDynamicLockedPagesLimit(unsigned limit)
{
   HostIF_GlobalLock(11);
   lockedPageLimit.dynamic = limit;
   HostIF_GlobalUnlock(11);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_GetLockedPagesLimit --
 *
 *      Get the hard limit for the number of locked pages.  This is the 
 *      minimum of all the limits: host, configured, and dynamic.
 *
 * Results:
 *      None
 *
 * Side effects:
 *      Hard limit changed.
 *
 *----------------------------------------------------------------------
 */

int32
Vmx86_GetLockedPagesLimit(VMDriver* vm)
{
   return Vmx86LockedPageLimit(vm);
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_InitializeLockedPagesLimit --
 *
 *      Set the unconditional locked pages limit.
 *      Driver-wide serialization must be ensured by the caller.
 *
 * Results:
 *      Sets the page limits accordingly.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_InitializeLockedPagesLimit(unsigned host,
                                 unsigned config,
                                 unsigned dynamic)
{
   lockedPageLimit.host = host;
   lockedPageLimit.configured = config;
   lockedPageLimit.dynamic = dynamic;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_LockPage --
 *
 *      Lock a page.
 *
 * Results:
 *      None
 *
 * Side effects:
 *      Number of global and per-VM locked pages increased.
 *
 *----------------------------------------------------------------------
 */

MPN
Vmx86_LockPage(VMDriver *vm,		     // IN: VMDriver
	       void* addr,		     // IN: VA of the page to lock
	       Bool allowMultipleMPNsPerVA)  // IN: allow locking many pages with the same VA
{
   MPN mpn;

   /* Atomically check and reserve locked memory */
   if (!Vmx86ReserveFreePages(vm, 1)) {
      return PAGE_LOCK_LIMIT_EXCEEDED;
   }

   HostIF_VMLock(vm, 4);
   mpn = HostIF_LockPage(vm, addr, allowMultipleMPNsPerVA);
   HostIF_VMUnlock(vm, 4);

   if (!PAGE_LOCK_SUCCESS(mpn)) {
      Vmx86UnreserveFreePages(vm, 1);
   }

   return mpn;
}



/*
 *----------------------------------------------------------------------
 *
 * Vmx86_UnlockPage --
 *
 *      Unlock a page.
 *
 * Results:
 *      
 *
 * Side effects:
 *      Number of global and per-VM locked pages decreased.
 *
 *----------------------------------------------------------------------
 */
int
Vmx86_UnlockPage(VMDriver *vm, void *addr)
{
   int retval;
   
   HostIF_VMLock(vm, 5);
   retval = HostIF_UnlockPage(vm, addr);
   HostIF_VMUnlock(vm, 5);

   if (PAGE_LOCK_SUCCESS(retval)) {
      Vmx86UnreserveFreePages(vm, 1);
   }
   return retval;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_UnlockPageByMPN --
 *
 *      Unlock a page.
 *
 * Results:
 *      
 *
 * Side effects:
 *      Number of global and per-VM locked pages decreased.
 *
 *----------------------------------------------------------------------
 */

int
Vmx86_UnlockPageByMPN(VMDriver *vm, // IN: VMDriver
		      MPN mpn,	    // IN: the page to unlock
		      void* addr)   // IN: optional valid VA for this MPN
{
   int retval;

   HostIF_VMLock(vm, 6);
   retval = HostIF_UnlockPageByMPN(vm, mpn, addr);
   HostIF_VMUnlock(vm, 6);

   if (PAGE_LOCK_SUCCESS(retval)) {
      Vmx86UnreserveFreePages(vm, 1);
   }
   return retval;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_AllocLockedPage --
 *
 *      Allocate physical locked pages from the kernel.
 *
 *      Initially the pages are not mapped to any user or kernel 
 *      address space.
 *
 * Results:
 *      non-negative value on partial/full completion: actual number of
 *      allocated MPNs. MPNs of the allocted pages are copied to 
 *      the caller's buffer.
 *
 *	negative system specific error code on error (NTSTATUS on Windows, etc.)
 *
 * Side effects:
 *      Number of global and per-VM locked pages is increased.
 *
 *----------------------------------------------------------------------
 */

int
Vmx86_AllocLockedPages(VMDriver* vm,	     // IN: VMDriver
		       MPN32* mpns,	     // OUT: buffer for allocated MPNs
		       unsigned numPages,    // IN: number of pages to allocate
		       Bool kernelMPNBuffer) // IN: is the MPN buffer in kernel or user address space?
{
   int allocatedPages;

   if (!Vmx86ReserveFreePages(vm, numPages)) {
      return PAGE_LOCK_LIMIT_EXCEEDED;
   }
   
   HostIF_VMLock(vm, 7);
   allocatedPages = HostIF_AllocLockedPages(vm, mpns, numPages, kernelMPNBuffer); 
   HostIF_VMUnlock(vm, 7);

   if (allocatedPages < 0) {
      Vmx86UnreserveFreePages(vm, numPages);
   } else if (allocatedPages < numPages) {
      Vmx86UnreserveFreePages(vm, numPages - allocatedPages);
   }

   return allocatedPages;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_FreeLockedPages --
 *
 *      Frees physical locked pages from the kernel previosly allocated 
 *      by Vmx86_AllocLockedPages().
 *
 * Results:
 *	0 on success,
 *	non-0 system specific error code on error (NTSTATUS on Windows, etc.)
 *
 * Side effects:
 *      Number of global and per-VM locked pages is decreased.
 *
 *----------------------------------------------------------------------
 */

int
Vmx86_FreeLockedPages(VMDriver *vm,	    // IN: VM instance pointer
		      const MPN32* mpns,    // IN: user or kernel array of MPNs to free 
		      unsigned numPages,    // IN: number of pages to free
		      Bool kernelMPNBuffer) // IN: is the MPN buffer in kernel or user address space?
{
   int ret;

   HostIF_VMLock(vm, 8);
   ret = HostIF_FreeLockedPages(vm, mpns, numPages, kernelMPNBuffer);
   HostIF_VMUnlock(vm, 8);

   if (ret == 0) {
      Vmx86UnreserveFreePages(vm, numPages);
   }

   return ret;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_IsAnonPage --
 *
 *      Queries the driver to see if the mpn is an anonymous page.
 *
 * Results:
 *      True if mpn is an anonymous page, false otherwise.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

Bool
Vmx86_IsAnonPage(VMDriver *vm,       // IN: VM instance pointer
                 const MPN32 mpn)    // IN: MPN we are asking about
{
   Bool ret;

   HostIF_VMLock(vm, 16);
   ret = HostIF_IsAnonPage(vm, mpn);
   HostIF_VMUnlock(vm, 16);
   return ret;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_GetLockedPageList --
 *
 *      puts MPNs of pages that were allocated by HostIF_AllocLockedPages()
 *      into user mode buffer.
 *
 * Results:
 *	non-negative number of the MPNs in the buffer on success.
 *	negative error code on error.
 *
 * Side effects:
 *      none
 *
 *----------------------------------------------------------------------
 */

int 
Vmx86_GetLockedPageList(VMDriver* vm,          // IN: VM instance pointer
                        MPN32* mpns,           // OUT: user mode buffer for MPNs
		        unsigned int numPages) // IN: size of the buffer in MPNs 
{
   int ret;
      
   HostIF_VMLock(vm, 9);
   ret = HostIF_GetLockedPageList(vm, mpns, numPages);
   HostIF_VMUnlock(vm, 9);
   
   return ret;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_GetMemInfo --
 *
 *      Return the info about all VMs.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      VMGetMemInfoArgs is filled in.
 *
 *----------------------------------------------------------------------
 */

Bool
Vmx86_GetMemInfo(VMDriver *curVM,
                 int32 curVMOnly,
                 VMMemInfoArgs *outArgs,
                 int outArgsLength)
{
   VMDriver *vm;
   int i;
   int outSize;
   int wantedVMs;

   HostIF_GlobalLock(7);

   if (curVMOnly) {
      wantedVMs = 1;
   } else {
      wantedVMs = vmCount;
   }

   outSize = VM_GET_MEM_INFO_SIZE(wantedVMs);
   if (outSize > outArgsLength) {
      HostIF_GlobalUnlock(7);
      return FALSE;
   }
   
   outArgs->numVMs = wantedVMs;
   outArgs->numLockedPages = numLockedPages;
   outArgs->lockedPageLimit = lockedPageLimit;
   outArgs->maxLockedPages = Vmx86LockedPageLimit(curVM);
   outArgs->globalMinAllocation = Vmx86CalculateGlobalMinAllocation(minVmMemPct);
   outArgs->minVmMemPct = minVmMemPct;
   outArgs->callerIndex = -1;
   Vmx86COWStats(&outArgs->cowInfo);

   if (curVM != NULL) {
      if (wantedVMs == 1) {
         outArgs->memInfo[0] = curVM->memInfo;
         outArgs->callerIndex = 0;
      } else {
         vm = vmDriverList;
         i = 0;
         outArgs->callerIndex = -1;
         while (vm != NULL && i < vmCount) {
	    if (vm == curVM) {
	       outArgs->callerIndex = i;
	    }
            HostIF_VMLock(vm, 10);
            outArgs->memInfo[i] = vm->memInfo;
            HostIF_VMUnlock(vm, 10);
            i++;
	    vm = vm->nextDriver;
         }
      }
   }

   HostIF_GlobalUnlock(7);

   return TRUE;
}


/*
 *-----------------------------------------------------------------------------
 *
 * Vmx86_GetMemInfoCopy --
 *
 *    Return the information about all VMs by copying the data out to user
 *    memory.
 *
 *    On input, outArgs->numVMs indicates how much space has been allocated for
 *    the information. On output, it indicates how much space has been
 *    filled --hpreg
 *
 * Results:
 *    TRUE on success
 *    FALSE on failure
 *
 * Side effects:
 *    None
 *
 *-----------------------------------------------------------------------------
 */

Bool
Vmx86_GetMemInfoCopy(VMDriver *curVM,           // IN
                     VMMemInfoArgs *outArgs)    // IN/OUT
{
   VMMemInfoArgs *buf;
   VMDriver *vm;
   Bool ret = FALSE;

   ASSERT(curVM);

   buf = HostIF_AllocKernelMem(VM_GET_MEM_INFO_SIZE(MAX_VMS), TRUE);
   if (!buf) {
      goto err0;
   }
   if (HostIF_CopyFromUser(buf, outArgs, VM_GET_MEM_INFO_SIZE(1))) {
      goto err1;
   }

   HostIF_GlobalLock(8);

   /* Now that we have the lock, we can read vmCount --hpreg */
   if (buf->numVMs < vmCount) {
      HostIF_GlobalUnlock(8);
      goto err1;
   }

   buf->numLockedPages = numLockedPages;
   buf->lockedPageLimit = lockedPageLimit;
   buf->maxLockedPages = Vmx86LockedPageLimit(curVM);
   buf->globalMinAllocation = Vmx86CalculateGlobalMinAllocation(minVmMemPct);
   buf->minVmMemPct = minVmMemPct;
   Vmx86COWStats(&buf->cowInfo);

   for (vm = vmDriverList, buf->numVMs = 0;
        vm;
        vm = vm->nextDriver, buf->numVMs++) {
      ASSERT(buf->numVMs < vmCount);
      if (vm == curVM) {
         buf->callerIndex = buf->numVMs;
      }
      HostIF_VMLock(vm, 11);
      buf->memInfo[buf->numVMs] = vm->memInfo;
      HostIF_VMUnlock(vm, 11);
   }
   ASSERT(buf->numVMs == vmCount);

   HostIF_GlobalUnlock(8);

   if (!HostIF_CopyToUser(outArgs, buf, VM_GET_MEM_INFO_SIZE(buf->numVMs))) {
      ret = TRUE;
   }

err1:
   HostIF_FreeKernelMem(buf);
err0:
   return ret;
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86SetMemoryUsage --
 *
 *      Updates the paged and nonpaged memory reserved memory values for 
 *      the vm.  
 *
 * Results:
 *      Returns global information about the memory state in args as well
 *      as a value indicating whether or not the virtual machine was
 *      started.
 *
 * Side effects:
 *      None
 *
 *----------------------------------------------------------------------
 */

static void
Vmx86SetMemoryUsage(VMDriver *curVM, unsigned paged, unsigned nonpaged)
{
   ASSERT(HostIF_VMLockIsHeld(curVM));
   curVM->memInfo.paged         = paged;
   curVM->memInfo.nonpaged      = nonpaged;
   curVM->memInfo.minAllocation = Vmx86MinAllocation(curVM, minVmMemPct);
   curVM->memInfo.maxAllocation = paged + nonpaged;
}

/*
 *----------------------------------------------------------------------
 *
 * Vmx86_Admit --
 *
 *      Set the memory management information about this VM and handles
 *      admission control. We allow vm to power on if there is room for
 *      the minimum allocation for all running vms in memory.  Note that 
 *      the hard memory limit can change dynamically in windows so we
 *      don't have guarantees due to admission control.  
 *
 * Results:
 *      Returns global information about the memory state in args as well
 *      as a value indicating whether or not the virtual machine was
 *      started.
 *
 * Side effects:
 *      None
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_Admit(VMDriver *curVM, VMMemInfoArgs *args)
{
   unsigned int globalMinAllocation;

   HostIF_GlobalLock(9);
   /* Update the overcommitment level and minimums for all vms. */
   minVmMemPct = args->minVmMemPct;
   Vmx86UpdateMinAllocations(minVmMemPct);
   globalMinAllocation = Vmx86CalculateGlobalMinAllocation(args->minVmMemPct);

   HostIF_VMLock(curVM, 12);

   Vmx86SetMemoryUsage(curVM, args->memInfo->paged, args->memInfo->nonpaged);
   curVM->memInfo.shares = args->memInfo->shares;
   curVM->memInfo.usedPct = 100;
   curVM->memInfo.mainMemSize = args->memInfo->mainMemSize;
   // Preliminary admission control to put memory pressure on other VMs.
   if (globalMinAllocation + curVM->memInfo.minAllocation <= 
       Vmx86LockedPageLimitForAdmissonControl()) {
      curVM->memInfo.admitted = TRUE;
   } else {
      curVM->memInfo.admitted = FALSE;
   }

#ifndef linux
   {
   unsigned int allocatedPages, nonpaged;
   signed int pages;
   MPN32* mpns;
   /* 
    * More admission control: Get enough memory for the nonpaged portion 
    * of the VM.  Drop locks for this long operation.
    * XXX Timeout?
    */
   HostIF_VMUnlock(curVM, 12);
   HostIF_GlobalUnlock(9);

#define ALLOCATE_CHUNK_SIZE 64
   allocatedPages = 0;
   nonpaged = args->memInfo->nonpaged;
   mpns = HostIF_AllocKernelMem(nonpaged * sizeof(MPN32), FALSE);
   if (mpns == NULL) {
      goto undoAdmission;
   }
   while(allocatedPages < nonpaged) {
      pages = Vmx86_AllocLockedPages(curVM, mpns + allocatedPages, 
	                             MIN(ALLOCATE_CHUNK_SIZE, nonpaged - allocatedPages),
				     TRUE);
      if (pages <= 0) {
	 break;
      }
      allocatedPages += pages;
   }

   /* 
    * Free the allocated pages. 
    * XXX Do not free the pages but hand them directly to the admitted VM.
    */

   for (pages = 0; pages < allocatedPages; pages += ALLOCATE_CHUNK_SIZE) {
      Vmx86_FreeLockedPages(curVM, mpns + pages, 
	                    MIN(ALLOCATE_CHUNK_SIZE, allocatedPages - pages), TRUE);
   }
   HostIF_FreeKernelMem(mpns);
#undef ALLOCATE_CHUNK_SIZE

undoAdmission:
   if (allocatedPages != nonpaged) {
       curVM->memInfo.admitted = FALSE; // undo admission
   }

   HostIF_GlobalLock(9);
   HostIF_VMLock(curVM, 12);
   }
#endif

   /* Return global state to the caller. */
   args->memInfo[0] = curVM->memInfo;
   args->numVMs = vmCount;
   args->numLockedPages = numLockedPages;
   args->maxLockedPages = Vmx86LockedPageLimit(curVM);
   args->lockedPageLimit = lockedPageLimit; 
   args->globalMinAllocation = globalMinAllocation;
   args->minVmMemPct = minVmMemPct;
   HostIF_VMUnlock(curVM, 12);
   HostIF_GlobalUnlock(9);
}

Bool
Vmx86_Readmit(VMDriver *curVM, int32 pageDelta)
{
   unsigned globalMinAllocation, newMinAllocation;
   Bool retval = FALSE;
   int32 nonpaged;

   HostIF_GlobalLock(31);
   globalMinAllocation = Vmx86CalculateGlobalMinAllocation(minVmMemPct);
   HostIF_VMLock(curVM, 25);
   nonpaged = curVM->memInfo.nonpaged + pageDelta;
   if (nonpaged >= 0) {
      globalMinAllocation -= Vmx86MinAllocation(curVM, minVmMemPct);
      newMinAllocation = Vmx86MinAllocationFunc(curVM->memInfo.paged,
                                                nonpaged,
                                                minVmMemPct);
      if (globalMinAllocation + newMinAllocation <= Vmx86LockedPageLimit(curVM) ||
         pageDelta <= 0) {
         Vmx86SetMemoryUsage(curVM, curVM->memInfo.paged, nonpaged);     
         retval = TRUE;
      }
   }
   HostIF_VMUnlock(curVM, 25);
   HostIF_GlobalUnlock(31);
   return retval;
}
                    
/*
 *----------------------------------------------------------------------
 *
 * Vmx86_SetMemUsage --
 *
 *      Set the memory usage by this vm based on its memSample data.
 *
 * Results:
 *      Updates memory allocations.
 *
 * Side effects:
 *      None
 *
 *----------------------------------------------------------------------
 */

void
Vmx86_SetMemUsage(VMDriver *curVM, 
		  unsigned usedPct)
{
   if (usedPct <= 100) {
      HostIF_VMLock(curVM, 13);
      curVM->memInfo.usedPct = usedPct;
      HostIF_VMUnlock(curVM, 13);
   }
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_PAEEnabled --
 *
 *      Is PAE enabled?
 *
 * Results:
 *      TRUE if PAE enabled.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

Bool
Vmx86_PAEEnabled()
{
#ifdef VM_X86_64
   uint64 cr4;
#else
   uint32 cr4;
#endif

   GET_CR4(cr4);
   return (cr4 & CR4_PAE) != 0;
}


/*
 *----------------------------------------------------------------------
 * Vmx86_VTCapableCPU --
 * 
 *   Verify that the VT is enabled on the CPU.
 *----------------------------------------------------------------------
 */

Bool
Vmx86_VTCapableCPU(void)
{
   uint64 msr =__GET_MSR(MSR_FEATCTL);
   return ((msr & (MSR_FEATCTL_VMXE | MSR_FEATCTL_LOCK)) == 
           (MSR_FEATCTL_VMXE | MSR_FEATCTL_LOCK));
}


/*
 *----------------------------------------------------------------------
 * Vmx86_VTSupportedCPU --
 * 
 *   Verify that the CPU has the VT capabilities required to run the
 *   VT-enabled monitor.
 *----------------------------------------------------------------------
 */

static uint32
Vmx86VTComputeMandatoryBits(int msrNum, uint32 bits)
{
   uint64 msr =__GET_MSR(msrNum);
   uint32 ones = LODWORD(msr);
   uint32 zeros = HIDWORD(msr); 
   return (bits | ones) & zeros;
}

Bool
Vmx86_VTSupportedCPU(void)
{
   uint64 msr;
   unsigned memType;
   unsigned vmcsSz;   
   
   if ((VT64_REQUIRED_PINBASED_CTLS
        & ~Vmx86VTComputeMandatoryBits(MSR_VMX_PINBASED_CTLS,
                                  VT64_REQUIRED_PINBASED_CTLS))
       || (VT64_REQUIRED_PROCBASED_CTLS
           & ~Vmx86VTComputeMandatoryBits(MSR_VMX_PROCBASED_CTLS,
                                     VT64_REQUIRED_PROCBASED_CTLS))
       || (VT64_REQUIRED_ENTRY_CTLS
           & ~Vmx86VTComputeMandatoryBits(MSR_VMX_ENTRY_CTLS,
                                     VT64_REQUIRED_ENTRY_CTLS))
       || (VT64_REQUIRED_EXIT_CTLS
           & ~Vmx86VTComputeMandatoryBits(MSR_VMX_EXIT_CTLS,
                                     VT64_REQUIRED_EXIT_CTLS))) {
      return FALSE;
   } 

   msr = __GET_MSR(MSR_VMX_BASIC);

   memType = (unsigned) ((msr >> 50) & 0xf);
   if (memType != MTRR_TYPE_WB) {
      return FALSE;
   }

   vmcsSz = (unsigned) (msr & (1ULL << 47) ? (HIDWORD(msr) & 0x1ff) * PAGE_SIZE
                        : HIDWORD(msr) & 0x1fff);
   if (vmcsSz > PAGE_SIZE) {
      return FALSE;
   }

   return TRUE;
}
