iPXE
hyperv.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2014 Michael Brown <mbrown@fensystems.co.uk>.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License as
00006  * published by the Free Software Foundation; either version 2 of the
00007  * License, or (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but
00010  * WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00017  * 02110-1301, USA.
00018  *
00019  * You can also choose to distribute this program under the terms of
00020  * the Unmodified Binary Distribution Licence (as given in the file
00021  * COPYING.UBDL), provided that you have satisfied its requirements.
00022  */
00023 
00024 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
00025 
00026 /** @file
00027  *
00028  * Hyper-V driver
00029  *
00030  */
00031 
00032 #include <stdlib.h>
00033 #include <stdarg.h>
00034 #include <string.h>
00035 #include <unistd.h>
00036 #include <assert.h>
00037 #include <errno.h>
00038 #include <byteswap.h>
00039 #include <pic8259.h>
00040 #include <ipxe/malloc.h>
00041 #include <ipxe/device.h>
00042 #include <ipxe/timer.h>
00043 #include <ipxe/quiesce.h>
00044 #include <ipxe/cpuid.h>
00045 #include <ipxe/msr.h>
00046 #include <ipxe/hyperv.h>
00047 #include <ipxe/vmbus.h>
00048 #include "hyperv.h"
00049 
00050 /** Maximum time to wait for a message response
00051  *
00052  * This is a policy decision.
00053  */
00054 #define HV_MESSAGE_MAX_WAIT_MS 1000
00055 
00056 /** Hyper-V timer frequency (fixed 10Mhz) */
00057 #define HV_TIMER_HZ 10000000
00058 
00059 /** Hyper-V timer scale factor (used to avoid 64-bit division) */
00060 #define HV_TIMER_SHIFT 18
00061 
00062 /**
00063  * Convert a Hyper-V status code to an iPXE status code
00064  *
00065  * @v status            Hyper-V status code
00066  * @ret rc              iPXE status code (before negation)
00067  */
00068 #define EHV( status ) EPLATFORM ( EINFO_EPLATFORM, (status) )
00069 
00070 /**
00071  * Allocate zeroed pages
00072  *
00073  * @v hv                Hyper-V hypervisor
00074  * @v ...               Page addresses to fill in, terminated by NULL
00075  * @ret rc              Return status code
00076  */
00077 __attribute__ (( sentinel )) int
00078 hv_alloc_pages ( struct hv_hypervisor *hv, ... ) {
00079         va_list args;
00080         void **page;
00081         int i;
00082 
00083         /* Allocate and zero pages */
00084         va_start ( args, hv );
00085         for ( i = 0 ; ( ( page = va_arg ( args, void ** ) ) != NULL ); i++ ) {
00086                 *page = malloc_dma ( PAGE_SIZE, PAGE_SIZE );
00087                 if ( ! *page )
00088                         goto err_alloc;
00089                 memset ( *page, 0, PAGE_SIZE );
00090         }
00091         va_end ( args );
00092 
00093         return 0;
00094 
00095  err_alloc:
00096         va_end ( args );
00097         va_start ( args, hv );
00098         for ( ; i >= 0 ; i-- ) {
00099                 page = va_arg ( args, void ** );
00100                 free_dma ( *page, PAGE_SIZE );
00101         }
00102         va_end ( args );
00103         return -ENOMEM;
00104 }
00105 
00106 /**
00107  * Free pages
00108  *
00109  * @v hv                Hyper-V hypervisor
00110  * @v ...               Page addresses, terminated by NULL
00111  */
00112 __attribute__ (( sentinel )) void
00113 hv_free_pages ( struct hv_hypervisor *hv, ... ) {
00114         va_list args;
00115         void *page;
00116 
00117         va_start ( args, hv );
00118         while ( ( page = va_arg ( args, void * ) ) != NULL )
00119                 free_dma ( page, PAGE_SIZE );
00120         va_end ( args );
00121 }
00122 
00123 /**
00124  * Allocate message buffer
00125  *
00126  * @v hv                Hyper-V hypervisor
00127  * @ret rc              Return status code
00128  */
00129 static int hv_alloc_message ( struct hv_hypervisor *hv ) {
00130 
00131         /* Allocate buffer.  Must be aligned to at least 8 bytes and
00132          * must not cross a page boundary, so align on its own size.
00133          */
00134         hv->message = malloc_dma ( sizeof ( *hv->message ),
00135                                    sizeof ( *hv->message ) );
00136         if ( ! hv->message )
00137                 return -ENOMEM;
00138 
00139         return 0;
00140 }
00141 
00142 /**
00143  * Free message buffer
00144  *
00145  * @v hv                Hyper-V hypervisor
00146  */
00147 static void hv_free_message ( struct hv_hypervisor *hv ) {
00148 
00149         /* Free buffer */
00150         free_dma ( hv->message, sizeof ( *hv->message ) );
00151 }
00152 
00153 /**
00154  * Check whether or not we are running in Hyper-V
00155  *
00156  * @ret rc              Return status code
00157  */
00158 static int hv_check_hv ( void ) {
00159         struct x86_features features;
00160         uint32_t interface_id;
00161         uint32_t discard_ebx;
00162         uint32_t discard_ecx;
00163         uint32_t discard_edx;
00164 
00165         /* Check for presence of a hypervisor (not necessarily Hyper-V) */
00166         x86_features ( &features );
00167         if ( ! ( features.intel.ecx & CPUID_FEATURES_INTEL_ECX_HYPERVISOR ) ) {
00168                 DBGC ( HV_INTERFACE_ID, "HV not running in a hypervisor\n" );
00169                 return -ENODEV;
00170         }
00171 
00172         /* Check that hypervisor is Hyper-V */
00173         cpuid ( HV_CPUID_INTERFACE_ID, 0, &interface_id, &discard_ebx,
00174                 &discard_ecx, &discard_edx );
00175         if ( interface_id != HV_INTERFACE_ID ) {
00176                 DBGC ( HV_INTERFACE_ID, "HV not running in Hyper-V (interface "
00177                        "ID %#08x)\n", interface_id );
00178                 return -ENODEV;
00179         }
00180 
00181         return 0;
00182 }
00183 
00184 /**
00185  * Check required features
00186  *
00187  * @v hv                Hyper-V hypervisor
00188  * @ret rc              Return status code
00189  */
00190 static int hv_check_features ( struct hv_hypervisor *hv ) {
00191         uint32_t available;
00192         uint32_t permissions;
00193         uint32_t discard_ecx;
00194         uint32_t discard_edx;
00195 
00196         /* Check that required features and privileges are available */
00197         cpuid ( HV_CPUID_FEATURES, 0, &available, &permissions, &discard_ecx,
00198                 &discard_edx );
00199         if ( ! ( available & HV_FEATURES_AVAIL_HYPERCALL_MSR ) ) {
00200                 DBGC ( hv, "HV %p has no hypercall MSRs (features %08x:%08x)\n",
00201                        hv, available, permissions );
00202                 return -ENODEV;
00203         }
00204         if ( ! ( available & HV_FEATURES_AVAIL_SYNIC_MSR ) ) {
00205                 DBGC ( hv, "HV %p has no SynIC MSRs (features %08x:%08x)\n",
00206                        hv, available, permissions );
00207                 return -ENODEV;
00208         }
00209         if ( ! ( permissions & HV_FEATURES_PERM_POST_MESSAGES ) ) {
00210                 DBGC ( hv, "HV %p cannot post messages (features %08x:%08x)\n",
00211                        hv, available, permissions );
00212                 return -EACCES;
00213         }
00214         if ( ! ( permissions & HV_FEATURES_PERM_SIGNAL_EVENTS ) ) {
00215                 DBGC ( hv, "HV %p cannot signal events (features %08x:%08x)",
00216                        hv, available, permissions );
00217                 return -EACCES;
00218         }
00219 
00220         return 0;
00221 }
00222 
00223 /**
00224  * Check that Gen 2 UEFI firmware is not running
00225  *
00226  * @v hv                Hyper-V hypervisor
00227  * @ret rc              Return status code
00228  *
00229  * We must not steal ownership from the Gen 2 UEFI firmware, since
00230  * doing so will cause an immediate crash.  Avoid this by checking for
00231  * the guest OS identity known to be used by the Gen 2 UEFI firmware.
00232  */
00233 static int hv_check_uefi ( struct hv_hypervisor *hv ) {
00234         uint64_t guest_os_id;
00235 
00236         /* Check for UEFI firmware's guest OS identity */
00237         guest_os_id = rdmsr ( HV_X64_MSR_GUEST_OS_ID );
00238         if ( guest_os_id == HV_GUEST_OS_ID_UEFI ) {
00239                 DBGC ( hv, "HV %p is owned by UEFI firmware\n", hv );
00240                 return -ENOTSUP;
00241         }
00242 
00243         return 0;
00244 }
00245 
00246 /**
00247  * Map hypercall page
00248  *
00249  * @v hv                Hyper-V hypervisor
00250  */
00251 static void hv_map_hypercall ( struct hv_hypervisor *hv ) {
00252         union {
00253                 struct {
00254                         uint32_t ebx;
00255                         uint32_t ecx;
00256                         uint32_t edx;
00257                 } __attribute__ (( packed ));
00258                 char text[ 13 /* "bbbbccccdddd" + NUL */ ];
00259         } vendor_id;
00260         uint32_t build;
00261         uint32_t version;
00262         uint32_t discard_eax;
00263         uint32_t discard_ecx;
00264         uint32_t discard_edx;
00265         uint64_t guest_os_id;
00266         uint64_t hypercall;
00267 
00268         /* Report guest OS identity */
00269         guest_os_id = rdmsr ( HV_X64_MSR_GUEST_OS_ID );
00270         if ( guest_os_id != 0 ) {
00271                 DBGC ( hv, "HV %p guest OS ID MSR was %#08llx\n",
00272                        hv, guest_os_id );
00273         }
00274         guest_os_id = HV_GUEST_OS_ID_IPXE;
00275         DBGC2 ( hv, "HV %p guest OS ID MSR is %#08llx\n", hv, guest_os_id );
00276         wrmsr ( HV_X64_MSR_GUEST_OS_ID, guest_os_id );
00277 
00278         /* Get hypervisor system identity (for debugging) */
00279         cpuid ( HV_CPUID_VENDOR_ID, 0, &discard_eax, &vendor_id.ebx,
00280                 &vendor_id.ecx, &vendor_id.edx );
00281         vendor_id.text[ sizeof ( vendor_id.text ) - 1 ] = '\0';
00282         cpuid ( HV_CPUID_HYPERVISOR_ID, 0, &build, &version, &discard_ecx,
00283                 &discard_edx );
00284         DBGC ( hv, "HV %p detected \"%s\" version %d.%d build %d\n", hv,
00285                vendor_id.text, ( version >> 16 ), ( version & 0xffff ), build );
00286 
00287         /* Map hypercall page */
00288         hypercall = rdmsr ( HV_X64_MSR_HYPERCALL );
00289         hypercall &= ( PAGE_SIZE - 1 );
00290         hypercall |= ( virt_to_phys ( hv->hypercall ) | HV_HYPERCALL_ENABLE );
00291         DBGC2 ( hv, "HV %p hypercall MSR is %#08llx\n", hv, hypercall );
00292         wrmsr ( HV_X64_MSR_HYPERCALL, hypercall );
00293 }
00294 
00295 /**
00296  * Unmap hypercall page
00297  *
00298  * @v hv                Hyper-V hypervisor
00299  */
00300 static void hv_unmap_hypercall ( struct hv_hypervisor *hv ) {
00301         uint64_t hypercall;
00302         uint64_t guest_os_id;
00303 
00304         /* Unmap the hypercall page */
00305         hypercall = rdmsr ( HV_X64_MSR_HYPERCALL );
00306         hypercall &= ( ( PAGE_SIZE - 1 ) & ~HV_HYPERCALL_ENABLE );
00307         DBGC2 ( hv, "HV %p hypercall MSR is %#08llx\n", hv, hypercall );
00308         wrmsr ( HV_X64_MSR_HYPERCALL, hypercall );
00309 
00310         /* Reset the guest OS identity */
00311         guest_os_id = 0;
00312         DBGC2 ( hv, "HV %p guest OS ID MSR is %#08llx\n", hv, guest_os_id );
00313         wrmsr ( HV_X64_MSR_GUEST_OS_ID, guest_os_id );
00314 }
00315 
00316 /**
00317  * Map synthetic interrupt controller
00318  *
00319  * @v hv                Hyper-V hypervisor
00320  */
00321 static void hv_map_synic ( struct hv_hypervisor *hv ) {
00322         uint64_t simp;
00323         uint64_t siefp;
00324         uint64_t scontrol;
00325 
00326         /* Zero SynIC message and event pages */
00327         memset ( hv->synic.message, 0, PAGE_SIZE );
00328         memset ( hv->synic.event, 0, PAGE_SIZE );
00329 
00330         /* Map SynIC message page */
00331         simp = rdmsr ( HV_X64_MSR_SIMP );
00332         simp &= ( PAGE_SIZE - 1 );
00333         simp |= ( virt_to_phys ( hv->synic.message ) | HV_SIMP_ENABLE );
00334         DBGC2 ( hv, "HV %p SIMP MSR is %#08llx\n", hv, simp );
00335         wrmsr ( HV_X64_MSR_SIMP, simp );
00336 
00337         /* Map SynIC event page */
00338         siefp = rdmsr ( HV_X64_MSR_SIEFP );
00339         siefp &= ( PAGE_SIZE - 1 );
00340         siefp |= ( virt_to_phys ( hv->synic.event ) | HV_SIEFP_ENABLE );
00341         DBGC2 ( hv, "HV %p SIEFP MSR is %#08llx\n", hv, siefp );
00342         wrmsr ( HV_X64_MSR_SIEFP, siefp );
00343 
00344         /* Enable SynIC */
00345         scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
00346         scontrol |= HV_SCONTROL_ENABLE;
00347         DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
00348         wrmsr ( HV_X64_MSR_SCONTROL, scontrol );
00349 }
00350 
00351 /**
00352  * Unmap synthetic interrupt controller, leaving SCONTROL untouched
00353  *
00354  * @v hv                Hyper-V hypervisor
00355  */
00356 static void hv_unmap_synic_no_scontrol ( struct hv_hypervisor *hv ) {
00357         uint64_t siefp;
00358         uint64_t simp;
00359 
00360         /* Unmap SynIC event page */
00361         siefp = rdmsr ( HV_X64_MSR_SIEFP );
00362         siefp &= ( ( PAGE_SIZE - 1 ) & ~HV_SIEFP_ENABLE );
00363         DBGC2 ( hv, "HV %p SIEFP MSR is %#08llx\n", hv, siefp );
00364         wrmsr ( HV_X64_MSR_SIEFP, siefp );
00365 
00366         /* Unmap SynIC message page */
00367         simp = rdmsr ( HV_X64_MSR_SIMP );
00368         simp &= ( ( PAGE_SIZE - 1 ) & ~HV_SIMP_ENABLE );
00369         DBGC2 ( hv, "HV %p SIMP MSR is %#08llx\n", hv, simp );
00370         wrmsr ( HV_X64_MSR_SIMP, simp );
00371 }
00372 
00373 /**
00374  * Unmap synthetic interrupt controller
00375  *
00376  * @v hv                Hyper-V hypervisor
00377  */
00378 static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
00379         uint64_t scontrol;
00380 
00381         /* Disable SynIC */
00382         scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
00383         scontrol &= ~HV_SCONTROL_ENABLE;
00384         DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
00385         wrmsr ( HV_X64_MSR_SCONTROL, scontrol );
00386 
00387         /* Unmap SynIC event and message pages */
00388         hv_unmap_synic_no_scontrol ( hv );
00389 }
00390 
00391 /**
00392  * Enable synthetic interrupt
00393  *
00394  * @v hv                Hyper-V hypervisor
00395  * @v sintx             Synthetic interrupt number
00396  */
00397 void hv_enable_sint ( struct hv_hypervisor *hv, unsigned int sintx ) {
00398         unsigned long msr = HV_X64_MSR_SINT ( sintx );
00399         uint64_t sint;
00400 
00401         /* Enable synthetic interrupt
00402          *
00403          * We have to enable the interrupt, otherwise messages will
00404          * not be delivered (even though the documentation implies
00405          * that polling for messages is possible).  We enable AutoEOI
00406          * and hook the interrupt to the obsolete IRQ13 (FPU
00407          * exception) vector, which will be implemented as a no-op.
00408          */
00409         sint = rdmsr ( msr );
00410         sint &= ~( HV_SINT_MASKED | HV_SINT_VECTOR_MASK );
00411         sint |= ( HV_SINT_AUTO_EOI |
00412                   HV_SINT_VECTOR ( IRQ_INT ( 13 /* See comment above */ ) ) );
00413         DBGC2 ( hv, "HV %p SINT%d MSR is %#08llx\n", hv, sintx, sint );
00414         wrmsr ( msr, sint );
00415 }
00416 
00417 /**
00418  * Disable synthetic interrupt
00419  *
00420  * @v hv                Hyper-V hypervisor
00421  * @v sintx             Synthetic interrupt number
00422  */
00423 void hv_disable_sint ( struct hv_hypervisor *hv, unsigned int sintx ) {
00424         unsigned long msr = HV_X64_MSR_SINT ( sintx );
00425         uint64_t sint;
00426 
00427         /* Do nothing if interrupt is already disabled */
00428         sint = rdmsr ( msr );
00429         if ( sint & HV_SINT_MASKED )
00430                 return;
00431 
00432         /* Disable synthetic interrupt */
00433         sint &= ~HV_SINT_AUTO_EOI;
00434         sint |= HV_SINT_MASKED;
00435         DBGC2 ( hv, "HV %p SINT%d MSR is %#08llx\n", hv, sintx, sint );
00436         wrmsr ( msr, sint );
00437 }
00438 
00439 /**
00440  * Post message
00441  *
00442  * @v hv                Hyper-V hypervisor
00443  * @v id                Connection ID
00444  * @v type              Message type
00445  * @v data              Message
00446  * @v len               Length of message
00447  * @ret rc              Return status code
00448  */
00449 int hv_post_message ( struct hv_hypervisor *hv, unsigned int id,
00450                       unsigned int type, const void *data, size_t len ) {
00451         struct hv_post_message *msg = &hv->message->posted;
00452         int status;
00453         int rc;
00454 
00455         /* Sanity check */
00456         assert ( len <= sizeof ( msg->data ) );
00457 
00458         /* Construct message */
00459         memset ( msg, 0, sizeof ( *msg ) );
00460         msg->id = cpu_to_le32 ( id );
00461         msg->type = cpu_to_le32 ( type );
00462         msg->len = cpu_to_le32 ( len );
00463         memcpy ( msg->data, data, len );
00464         DBGC2 ( hv, "HV %p connection %d posting message type %#08x:\n",
00465                 hv, id, type );
00466         DBGC2_HDA ( hv, 0, msg->data, len );
00467 
00468         /* Post message */
00469         if ( ( status = hv_call ( hv, HV_POST_MESSAGE, msg, NULL ) ) != 0 ) {
00470                 rc = -EHV ( status );
00471                 DBGC ( hv, "HV %p could not post message to %#08x: %s\n",
00472                        hv, id, strerror ( rc ) );
00473                 return rc;
00474         }
00475 
00476         return 0;
00477 }
00478 
00479 /**
00480  * Wait for received message
00481  *
00482  * @v hv                Hyper-V hypervisor
00483  * @v sintx             Synthetic interrupt number
00484  * @ret rc              Return status code
00485  */
00486 int hv_wait_for_message ( struct hv_hypervisor *hv, unsigned int sintx ) {
00487         struct hv_message *msg = &hv->message->received;
00488         struct hv_message *src = &hv->synic.message[sintx];
00489         unsigned int retries;
00490         size_t len;
00491 
00492         /* Wait for message to arrive */
00493         for ( retries = 0 ; retries < HV_MESSAGE_MAX_WAIT_MS ; retries++ ) {
00494 
00495                 /* Check for message */
00496                 if ( src->type ) {
00497 
00498                         /* Copy message */
00499                         memset ( msg, 0, sizeof ( *msg ) );
00500                         len = src->len;
00501                         assert ( len <= sizeof ( *msg ) );
00502                         memcpy ( msg, src,
00503                                  ( offsetof ( typeof ( *msg ), data ) + len ) );
00504                         DBGC2 ( hv, "HV %p SINT%d received message type "
00505                                 "%#08x:\n", hv, sintx,
00506                                 le32_to_cpu ( msg->type ) );
00507                         DBGC2_HDA ( hv, 0, msg->data, len );
00508 
00509                         /* Consume message */
00510                         src->type = 0;
00511 
00512                         return 0;
00513                 }
00514 
00515                 /* Trigger message delivery */
00516                 wrmsr ( HV_X64_MSR_EOM, 0 );
00517 
00518                 /* Delay */
00519                 mdelay ( 1 );
00520         }
00521 
00522         DBGC ( hv, "HV %p SINT%d timed out waiting for message\n",
00523                hv, sintx );
00524         return -ETIMEDOUT;
00525 }
00526 
00527 /**
00528  * Signal event
00529  *
00530  * @v hv                Hyper-V hypervisor
00531  * @v id                Connection ID
00532  * @v flag              Flag number
00533  * @ret rc              Return status code
00534  */
00535 int hv_signal_event ( struct hv_hypervisor *hv, unsigned int id,
00536                       unsigned int flag ) {
00537         struct hv_signal_event *event = &hv->message->signalled;
00538         int status;
00539         int rc;
00540 
00541         /* Construct event */
00542         memset ( event, 0, sizeof ( *event ) );
00543         event->id = cpu_to_le32 ( id );
00544         event->flag = cpu_to_le16 ( flag );
00545 
00546         /* Signal event */
00547         if ( ( status = hv_call ( hv, HV_SIGNAL_EVENT, event, NULL ) ) != 0 ) {
00548                 rc = -EHV ( status );
00549                 DBGC ( hv, "HV %p could not signal event to %#08x: %s\n",
00550                        hv, id, strerror ( rc ) );
00551                 return rc;
00552         }
00553 
00554         return 0;
00555 }
00556 
00557 /**
00558  * Probe root device
00559  *
00560  * @v rootdev           Root device
00561  * @ret rc              Return status code
00562  */
00563 static int hv_probe ( struct root_device *rootdev ) {
00564         struct hv_hypervisor *hv;
00565         int rc;
00566 
00567         /* Check we are running in Hyper-V */
00568         if ( ( rc = hv_check_hv() ) != 0 )
00569                 goto err_check_hv;
00570 
00571         /* Allocate and initialise structure */
00572         hv = zalloc ( sizeof ( *hv ) );
00573         if ( ! hv ) {
00574                 rc = -ENOMEM;
00575                 goto err_alloc;
00576         }
00577 
00578         /* Check features */
00579         if ( ( rc = hv_check_features ( hv ) ) != 0 )
00580                 goto err_check_features;
00581 
00582         /* Check that Gen 2 UEFI firmware is not running */
00583         if ( ( rc = hv_check_uefi ( hv ) ) != 0 )
00584                 goto err_check_uefi;
00585 
00586         /* Allocate pages */
00587         if ( ( rc = hv_alloc_pages ( hv, &hv->hypercall, &hv->synic.message,
00588                                      &hv->synic.event, NULL ) ) != 0 )
00589                 goto err_alloc_pages;
00590 
00591         /* Allocate message buffer */
00592         if ( ( rc = hv_alloc_message ( hv ) ) != 0 )
00593                 goto err_alloc_message;
00594 
00595         /* Map hypercall page */
00596         hv_map_hypercall ( hv );
00597 
00598         /* Map synthetic interrupt controller */
00599         hv_map_synic ( hv );
00600 
00601         /* Probe Hyper-V devices */
00602         if ( ( rc = vmbus_probe ( hv, &rootdev->dev ) ) != 0 )
00603                 goto err_vmbus_probe;
00604 
00605         rootdev_set_drvdata ( rootdev, hv );
00606         return 0;
00607 
00608         vmbus_remove ( hv, &rootdev->dev );
00609  err_vmbus_probe:
00610         hv_unmap_synic ( hv );
00611         hv_unmap_hypercall ( hv );
00612         hv_free_message ( hv );
00613  err_alloc_message:
00614         hv_free_pages ( hv, hv->hypercall, hv->synic.message, hv->synic.event,
00615                         NULL );
00616  err_alloc_pages:
00617  err_check_uefi:
00618  err_check_features:
00619         free ( hv );
00620  err_alloc:
00621  err_check_hv:
00622         return rc;
00623 }
00624 
00625 /**
00626  * Remove root device
00627  *
00628  * @v rootdev           Root device
00629  */
00630 static void hv_remove ( struct root_device *rootdev ) {
00631         struct hv_hypervisor *hv = rootdev_get_drvdata ( rootdev );
00632 
00633         vmbus_remove ( hv, &rootdev->dev );
00634         hv_unmap_synic ( hv );
00635         hv_unmap_hypercall ( hv );
00636         hv_free_message ( hv );
00637         hv_free_pages ( hv, hv->hypercall, hv->synic.message, hv->synic.event,
00638                         NULL );
00639         free ( hv );
00640         rootdev_set_drvdata ( rootdev, NULL );
00641 }
00642 
00643 /** Hyper-V root device driver */
00644 static struct root_driver hv_root_driver = {
00645         .probe = hv_probe,
00646         .remove = hv_remove,
00647 };
00648 
00649 /** Hyper-V root device */
00650 struct root_device hv_root_device __root_device = {
00651         .dev = { .name = "Hyper-V" },
00652         .driver = &hv_root_driver,
00653 };
00654 
00655 /**
00656  * Quiesce system
00657  *
00658  */
00659 static void hv_quiesce ( void ) {
00660         struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device );
00661         unsigned int i;
00662 
00663         /* Do nothing if we are not running in Hyper-V */
00664         if ( ! hv )
00665                 return;
00666 
00667         /* The "enlightened" portions of the Windows Server 2016 boot
00668          * process will not cleanly take ownership of an active
00669          * Hyper-V connection.  Experimentation shows that the minimum
00670          * requirement is that we disable the SynIC message page
00671          * (i.e. zero the SIMP MSR).
00672          *
00673          * We cannot perform a full shutdown of the Hyper-V
00674          * connection.  Experimentation shows that if we disable the
00675          * SynIC (i.e. zero the SCONTROL MSR) then Windows Server 2016
00676          * will enter an indefinite wait loop.
00677          *
00678          * Attempt to create a safe handover environment by resetting
00679          * all MSRs except for SCONTROL.
00680          *
00681          * Note that we do not shut down our VMBus devices, since we
00682          * may need to unquiesce the system and continue operation.
00683          */
00684 
00685         /* Disable all synthetic interrupts */
00686         for ( i = 0 ; i <= HV_SINT_MAX ; i++ )
00687                 hv_disable_sint ( hv, i );
00688 
00689         /* Unmap synthetic interrupt controller, leaving SCONTROL
00690          * enabled (see above).
00691          */
00692         hv_unmap_synic_no_scontrol ( hv );
00693 
00694         /* Unmap hypercall page */
00695         hv_unmap_hypercall ( hv );
00696 
00697         DBGC ( hv, "HV %p quiesced\n", hv );
00698 }
00699 
00700 /**
00701  * Unquiesce system
00702  *
00703  */
00704 static void hv_unquiesce ( void ) {
00705         struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device );
00706         uint64_t simp;
00707         int rc;
00708 
00709         /* Do nothing if we are not running in Hyper-V */
00710         if ( ! hv )
00711                 return;
00712 
00713         /* Experimentation shows that the "enlightened" portions of
00714          * Windows Server 2016 will break our Hyper-V connection at
00715          * some point during a SAN boot.  Surprisingly it does not
00716          * change the guest OS ID MSR, but it does leave the SynIC
00717          * message page disabled.
00718          *
00719          * Our own explicit quiescing procedure will also disable the
00720          * SynIC message page.  We can therefore use the SynIC message
00721          * page enable bit as a heuristic to determine when we need to
00722          * reestablish our Hyper-V connection.
00723          */
00724         simp = rdmsr ( HV_X64_MSR_SIMP );
00725         if ( simp & HV_SIMP_ENABLE )
00726                 return;
00727 
00728         /* Remap hypercall page */
00729         hv_map_hypercall ( hv );
00730 
00731         /* Remap synthetic interrupt controller */
00732         hv_map_synic ( hv );
00733 
00734         /* Reset Hyper-V devices */
00735         if ( ( rc = vmbus_reset ( hv, &hv_root_device.dev ) ) != 0 ) {
00736                 DBGC ( hv, "HV %p could not unquiesce: %s\n",
00737                        hv, strerror ( rc ) );
00738                 /* Nothing we can do */
00739                 return;
00740         }
00741 }
00742 
00743 /** Hyper-V quiescer */
00744 struct quiescer hv_quiescer __quiescer = {
00745         .quiesce = hv_quiesce,
00746         .unquiesce = hv_unquiesce,
00747 };
00748 
00749 /**
00750  * Probe timer
00751  *
00752  * @ret rc              Return status code
00753  */
00754 static int hv_timer_probe ( void ) {
00755         uint32_t available;
00756         uint32_t discard_ebx;
00757         uint32_t discard_ecx;
00758         uint32_t discard_edx;
00759         int rc;
00760 
00761         /* Check we are running in Hyper-V */
00762         if ( ( rc = hv_check_hv() ) != 0 )
00763                 return rc;
00764 
00765         /* Check for available reference counter */
00766         cpuid ( HV_CPUID_FEATURES, 0, &available, &discard_ebx, &discard_ecx,
00767                 &discard_edx );
00768         if ( ! ( available & HV_FEATURES_AVAIL_TIME_REF_COUNT_MSR ) ) {
00769                 DBGC ( HV_INTERFACE_ID, "HV has no time reference counter\n" );
00770                 return -ENODEV;
00771         }
00772 
00773         return 0;
00774 }
00775 
00776 /**
00777  * Get current system time in ticks
00778  *
00779  * @ret ticks           Current time, in ticks
00780  */
00781 static unsigned long hv_currticks ( void ) {
00782 
00783         /* Calculate time using a combination of bit shifts and
00784          * multiplication (to avoid a 64-bit division).
00785          */
00786         return ( ( rdmsr ( HV_X64_MSR_TIME_REF_COUNT ) >> HV_TIMER_SHIFT ) *
00787                  ( TICKS_PER_SEC / ( HV_TIMER_HZ >> HV_TIMER_SHIFT ) ) );
00788 }
00789 
00790 /**
00791  * Delay for a fixed number of microseconds
00792  *
00793  * @v usecs             Number of microseconds for which to delay
00794  */
00795 static void hv_udelay ( unsigned long usecs ) {
00796         uint32_t start;
00797         uint32_t elapsed;
00798         uint32_t threshold;
00799 
00800         /* Spin until specified number of 10MHz ticks have elapsed */
00801         start = rdmsr ( HV_X64_MSR_TIME_REF_COUNT );
00802         threshold = ( usecs * ( HV_TIMER_HZ / 1000000 ) );
00803         do {
00804                 elapsed = ( rdmsr ( HV_X64_MSR_TIME_REF_COUNT ) - start );
00805         } while ( elapsed < threshold );
00806 }
00807 
00808 /** Hyper-V timer */
00809 struct timer hv_timer __timer ( TIMER_PREFERRED ) = {
00810         .name = "Hyper-V",
00811         .probe = hv_timer_probe,
00812         .currticks = hv_currticks,
00813         .udelay = hv_udelay,
00814 };
00815 
00816 /* Drag in objects via hv_root_device */
00817 REQUIRING_SYMBOL ( hv_root_device );
00818 
00819 /* Drag in netvsc driver */
00820 REQUIRE_OBJECT ( netvsc );