From 67a39e089a5f52cf7721a556b6df564e7143ab83 Mon Sep 17 00:00:00 2001
From: Kimihiro Nonaka <nonakap@gmail.com>
Date: Thu, 15 Mar 2018 20:19:21 +0900
Subject: [PATCH 1/5] Added Hyper-V guest support from FreeBSD and OpenBSD.
 (WIP)

---
 sys/arch/amd64/amd64/genassym.cf |    2 +
 sys/arch/amd64/amd64/vector.S    |   45 +
 sys/arch/amd64/conf/GENERIC      |    6 +
 sys/arch/amd64/conf/files.amd64  |    4 +
 sys/arch/i386/conf/files.i386    |    4 +
 sys/arch/i386/i386/genassym.cf   |    2 +
 sys/arch/i386/i386/vector.S      |   42 +
 sys/arch/x86/acpi/vmbus_acpi.c   |  105 ++
 sys/arch/x86/conf/files.x86      |   27 +
 sys/arch/x86/include/cpu.h       |    2 +
 sys/arch/x86/include/intrdefs.h  |   11 +-
 sys/arch/x86/isa/clock.c         |    4 +-
 sys/arch/x86/x86/cpu.c           |   12 +-
 sys/arch/x86/x86/hyperv.c        |  813 +++++++++++
 sys/arch/x86/x86/hypervreg.h     |  575 ++++++++
 sys/arch/x86/x86/hypervvar.h     |  121 ++
 sys/arch/x86/x86/intr.c          |   21 +
 sys/arch/x86/x86/lapic.c         |   74 +-
 sys/arch/x86/x86/vmbus.c         | 2172 ++++++++++++++++++++++++++++++
 sys/arch/x86/x86/vmbusvar.h      |  272 ++++
 20 files changed, 4272 insertions(+), 42 deletions(-)
 create mode 100644 sys/arch/x86/acpi/vmbus_acpi.c
 create mode 100644 sys/arch/x86/x86/hyperv.c
 create mode 100644 sys/arch/x86/x86/hypervreg.h
 create mode 100644 sys/arch/x86/x86/hypervvar.h
 create mode 100644 sys/arch/x86/x86/vmbus.c
 create mode 100644 sys/arch/x86/x86/vmbusvar.h

diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf
index f52cf5cc328..ec848eba682 100644
--- a/sys/arch/amd64/amd64/genassym.cf
+++ b/sys/arch/amd64/amd64/genassym.cf
@@ -317,11 +317,13 @@ define	IS_LWP			offsetof(struct intrsource, is_lwp)
 
 define	IPL_NONE		IPL_NONE
 define	IPL_PREEMPT		IPL_PREEMPT
+define	IPL_NET			IPL_NET
 define	IPL_CLOCK		IPL_CLOCK
 define	IPL_HIGH		IPL_HIGH
 
 define	LIR_IPI			LIR_IPI
 define	LIR_TIMER		LIR_TIMER
+define	LIR_HYPERV		LIR_HYPERV
 
 define	SIR_NET			SIR_NET
 define	SIR_CLOCK		SIR_CLOCK
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 225386aea74..dc199895937 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -84,6 +84,9 @@
 #include "ioapic.h"
 #include "lapic.h"
 #include "assym.h"
+#ifndef XEN
+#include "vmbus.h"
+#endif
 
 	.text
 
@@ -262,6 +265,48 @@ IDTVEC(intr_lapic_ltimer)
 IDTVEC_END(intr_lapic_ltimer)
 	TEXT_USER_END
 
+#if NVMBUS > 0
+	/*
+	 * Hyper-V event channel upcall interrupt handler.
+	 * Only used when the hypervisor supports direct vector callbacks.
+	 */
+IDTVEC(recurse_hyperv_upcall)
+	INTR_RECURSE_HWFRAME
+	pushq	$0
+	pushq	$T_ASTFLT
+	INTR_RECURSE_ENTRY
+	jmp	1f
+IDTVEC_END(recurse_hyperv_upcall)
+NENTRY(handle_hyperv_upcall)
+	movl	CPUVAR(ILEVEL),%ebx
+	cmpl	$IPL_NET,%ebx
+	jae	2f
+	jmp	1f
+END(handle_hyperv_upcall)
+IDTVEC(resume_hyperv_upcall)
+1:
+	incl	CPUVAR(IDEPTH)
+	movl	$IPL_NET,CPUVAR(ILEVEL)
+	sti
+	pushq	%rbx
+	movq	%rsp,%rsi
+	call	_C_LABEL(hyperv_intr)
+	jmp	_C_LABEL(Xdoreti)
+2:
+	orl	$(1 << LIR_HYPERV),CPUVAR(IPENDING)
+	INTRFASTEXIT
+IDTVEC_END(resume_hyperv_upcall)
+
+	TEXT_USER_BEGIN
+IDTVEC(intr_hyperv_upcall)
+	pushq	$0
+	pushq	$T_ASTFLT
+	INTRENTRY
+	jmp	_C_LABEL(handle_hyperv_upcall)
+IDTVEC_END(intr_hyperv_upcall)
+	TEXT_USER_END
+#endif	/* NVMBUS > 0 */
+
 #endif /* NLAPIC > 0 */
 
 #ifndef XEN
diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
index cf27d75c54f..4272ae800aa 100644
--- a/sys/arch/amd64/conf/GENERIC
+++ b/sys/arch/amd64/conf/GENERIC
@@ -83,6 +83,7 @@ makeoptions	SPECTRE_V2_GCC_MITIGATION=1	# GCC Spectre variant 2
 acpicpu*	at cpu?		# ACPI CPU (including frequency scaling)
 coretemp*	at cpu?		# Intel on-die thermal sensor
 est0		at cpu0		# Intel Enhanced SpeedStep (non-ACPI)
+hyperv0 	at cpu0		# Microsoft Hyper-V
 #odcm0		at cpu0		# On-demand clock modulation
 powernow0	at cpu0		# AMD PowerNow! and Cool'n'Quiet (non-ACPI)
 vmt0		at cpu0		# VMware Tools
@@ -1247,6 +1248,11 @@ vioif*	at virtio?			# Virtio network device
 viornd* at virtio?			# Virtio entropy device
 vioscsi* at virtio?			# Virtio SCSI device
 
+# Hyper-V devices
+vmbus*		at acpi?		# Hyper-V VMBus
+#hvn*		at vmbus?		# Hyper-V NetVSC
+#hvs*		at vmbus?		# Hyper-V StorVSC
+
 # Pull in optional local configuration
 cinclude "arch/amd64/conf/GENERIC.local"
 
diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64
index 7a2da885879..2a96fd319af 100644
--- a/sys/arch/amd64/conf/files.amd64
+++ b/sys/arch/amd64/conf/files.amd64
@@ -188,5 +188,9 @@ include "dev/apm/files.apm"
 include "dev/acpi/files.acpi"
 file	arch/amd64/acpi/acpi_wakeup_low.S	acpi
 
+# Hyper-V VMBus
+attach	vmbus at acpinodebus with vmbus_acpi
+file	arch/x86/acpi/vmbus_acpi.c		vmbus_acpi
+
 include	"arch/amd64/conf/majors.amd64"
 endif #xen
diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386
index dc74369ee5f..e837daf1e38 100644
--- a/sys/arch/i386/conf/files.i386
+++ b/sys/arch/i386/conf/files.i386
@@ -410,6 +410,10 @@ include	"arch/i386/pnpbios/files.pnpbios"
 include "dev/acpi/files.acpi"
 file	arch/i386/acpi/acpi_wakeup_low.S	acpi
 
+# Hyper-V VMBus
+attach	vmbus at acpinodebus with vmbus_acpi
+file	arch/x86/acpi/vmbus_acpi.c		vmbus_acpi
+
 # Obsolete vesabios/vesafb flags
 obsolete	defflag	opt_vesabios.h	VESABIOSVERBOSE
 obsolete	defparam opt_vesafb.h	VESAFB_WIDTH VESAFB_HEIGHT VESAFB_DEPTH
diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf
index e29b7a4e6d6..3043db181a6 100644
--- a/sys/arch/i386/i386/genassym.cf
+++ b/sys/arch/i386/i386/genassym.cf
@@ -319,6 +319,7 @@ define	IS_LWP			offsetof(struct intrsource, is_lwp)
 
 define	IPL_NONE		IPL_NONE
 define	IPL_PREEMPT		IPL_PREEMPT
+define	IPL_NET			IPL_NET
 define	IPL_SCHED		IPL_SCHED
 define	IPL_CLOCK		IPL_CLOCK
 define	IPL_HIGH		IPL_HIGH
@@ -329,6 +330,7 @@ define	IPL_SOFTSERIAL		IPL_SOFTSERIAL
 
 define	LIR_IPI			LIR_IPI
 define	LIR_TIMER		LIR_TIMER
+define	LIR_HYPERV		LIR_HYPERV
 
 define	SIR_NET			SIR_NET
 define	SIR_CLOCK		SIR_CLOCK
diff --git a/sys/arch/i386/i386/vector.S b/sys/arch/i386/i386/vector.S
index 69cee42b8a5..2ef961644b2 100644
--- a/sys/arch/i386/i386/vector.S
+++ b/sys/arch/i386/i386/vector.S
@@ -85,6 +85,9 @@ __KERNEL_RCSID(0, "$NetBSD: vector.S,v 1.77 2018/04/03 07:20:52 christos Exp $")
 
 #include "ioapic.h"
 #include "lapic.h"
+#ifndef XEN
+#include "vmbus.h"
+#endif
 
 #include "assym.h"
 
@@ -317,6 +320,45 @@ IDTVEC(resume_lapic_ltimer)
 	orl	$(1 << LIR_TIMER),CPUVAR(IPENDING)
 	INTRFASTEXIT
 IDTVEC_END(resume_lapic_ltimer)
+
+#if NVMBUS > 0
+	/*
+	 * Hyper-V event channel upcall interrupt handler.
+	 * Only used when the hypervisor supports direct vector callbacks.
+	 */
+IDTVEC(recurse_hyperv_upcall)
+	INTR_RECURSE_HWFRAME
+	pushl	$0
+	pushl	$T_ASTFLT
+	INTRENTRY
+	jmp	1f
+IDTVEC_END(recurse_hyperv_upcall)
+IDTVEC(intr_hyperv_upcall)
+	pushl	$0
+	pushl	$T_ASTFLT
+	INTRENTRY
+	movl	CPUVAR(ILEVEL),%ebx
+	cmpl	$IPL_NET,%ebx
+	jae	2f
+	jmp	1f
+IDTVEC_END(intr_hyperv_upcall)
+IDTVEC(resume_hyperv_upcall)
+1:
+	pushl	%ebx
+	IDEPTH_INCR
+	movl	$IPL_NET,CPUVAR(ILEVEL)
+	sti
+	pushl	%esp
+	call	_C_LABEL(hyperv_intr)
+	addl	$4,%esp
+	cli
+	jmp	_C_LABEL(Xdoreti)
+2:
+	orl	$(1 << LIR_HYPERV),CPUVAR(IPENDING)
+	INTRFASTEXIT
+IDTVEC_END(resume_hyperv_upcall)
+#endif	/* NVMBUS > 0 */
+
 #endif /* NLAPIC > 0 */
 
 
diff --git a/sys/arch/x86/acpi/vmbus_acpi.c b/sys/arch/x86/acpi/vmbus_acpi.c
new file mode 100644
index 00000000000..7cf89ddc8e3
--- /dev/null
+++ b/sys/arch/x86/acpi/vmbus_acpi.c
@@ -0,0 +1,105 @@
+/*	$NetBSD$	*/
+
+/*
+ * Copyright (c) 2018 Kimihiro Nonaka <nonaka@NetBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/param.h>
+#include <sys/device.h>
+#include <sys/systm.h>
+#include <sys/kmem.h>
+
+#include <dev/acpi/acpireg.h>
+#include <dev/acpi/acpivar.h>
+
+#include <x86/x86/hypervreg.h>
+#include <x86/x86/hypervvar.h>
+#include <x86/x86/vmbusvar.h>
+
+#define _COMPONENT	ACPI_RESOURCE_COMPONENT
+ACPI_MODULE_NAME	("vmbus_acpi")
+
+static int	vmbus_acpi_match(device_t, cfdata_t, void *);
+static void	vmbus_acpi_attach(device_t, device_t, void *);
+static int	vmbus_acpi_detach(device_t, int);
+
+struct vmbus_acpi_softc {
+	struct vmbus_softc sc;
+};
+
+CFATTACH_DECL_NEW(vmbus_acpi, sizeof(struct vmbus_acpi_softc),
+    vmbus_acpi_match, vmbus_acpi_attach, vmbus_acpi_detach, NULL);
+
+static const char * const vmbus_acpi_ids[] = {
+	"VMBUS",
+	"VMBus",
+	NULL
+};
+
+static int
+vmbus_acpi_match(device_t parent, cfdata_t match, void *opaque)
+{
+	struct acpi_attach_args *aa = opaque;
+
+	if (!vmbus_match(parent, match, opaque))
+		return 0;
+
+	if (aa->aa_node->ad_type != ACPI_TYPE_DEVICE)
+		return 0;
+
+	return acpi_match_hid(aa->aa_node->ad_devinfo, vmbus_acpi_ids);
+}
+
+static void
+vmbus_acpi_attach(device_t parent, device_t self, void *opaque)
+{
+	struct vmbus_acpi_softc *sc = device_private(self);
+	struct acpi_attach_args *aa = opaque;
+
+	sc->sc.sc_dev = self;
+	sc->sc.sc_dmat = aa->aa_dmat64 ? aa->aa_dmat64 : aa->aa_dmat;
+
+	if (vmbus_attach(&sc->sc))
+		return;
+
+	(void)pmf_device_register(self, NULL, NULL);
+}
+
+static int
+vmbus_acpi_detach(device_t self, int flags)
+{
+	struct vmbus_acpi_softc *sc = device_private(self);
+	int rv;
+
+	rv = vmbus_detach(&sc->sc, flags);
+	if (rv)
+		return rv;
+
+	pmf_device_deregister(self);
+
+	return 0;
+}
diff --git a/sys/arch/x86/conf/files.x86 b/sys/arch/x86/conf/files.x86
index fe8daa298ac..061308304c9 100644
--- a/sys/arch/x86/conf/files.x86
+++ b/sys/arch/x86/conf/files.x86
@@ -73,6 +73,33 @@ device	vmt: sysmon_power, sysmon_taskq
 attach	vmt at cpufeaturebus
 file	arch/x86/x86/vmt.c		vmt
 
+device	hyperv
+attach	hyperv at cpufeaturebus
+file	arch/x86/x86/hyperv.c		hyperv needs-flag
+
+define	hypervvmbus {}
+device	vmbus: hypervvmbus
+file	arch/x86/x86/vmbus.c		vmbus needs-flag
+
+device	hvheartbeat
+attach	hvheartbeat at hypervvmbus
+file	arch/x86/x86/hvheartbeat_vmbus.c hvheartbeat
+
+device	hvkvp
+attach	hvkvp at hypervvmbus
+file	arch/x86/x86/hvkvp_vmbus.c	hvkvp
+
+device	hvshutdown
+attach	hvshutdown at hypervvmbus
+file	arch/x86/x86/hvshutdown_vmbus.c	hvshutdown
+
+device	hvtimesync
+attach	hvtimesync at hypervvmbus
+file	arch/x86/x86/hvtimesync_vmbus.c	hvtimesync
+
+file	arch/x86/x86/vmbusic.c		hvheartbeat | hvkvp | hvshutdown |
+					hvtimesync
+
 file	arch/x86/x86/apic.c		ioapic | lapic
 file	arch/x86/x86/bus_dma.c		machdep
 file	arch/x86/x86/bus_space.c	machdep
diff --git a/sys/arch/x86/include/cpu.h b/sys/arch/x86/include/cpu.h
index 0177fd2503c..0de5fe457e6 100644
--- a/sys/arch/x86/include/cpu.h
+++ b/sys/arch/x86/include/cpu.h
@@ -474,6 +474,8 @@ void	startrtclock(void);
 void	i8254_delay(unsigned int);
 void	i8254_microtime(struct timeval *);
 void	i8254_initclocks(void);
+unsigned int gettick(void);
+extern void (*x86_delay)(unsigned int);
 #endif
 
 /* cpu.c */
diff --git a/sys/arch/x86/include/intrdefs.h b/sys/arch/x86/include/intrdefs.h
index 9c8824fd188..5c1a5c32daf 100644
--- a/sys/arch/x86/include/intrdefs.h
+++ b/sys/arch/x86/include/intrdefs.h
@@ -27,6 +27,7 @@
  */
 #define LIR_IPI		31
 #define LIR_TIMER	30
+#define LIR_HYPERV	29
 
 /*
  * XXX These should be lowest numbered, but right now would
@@ -34,11 +35,11 @@
  * means that soft interrupt take priority over hardware
  * interrupts when lowering the priority level!
  */
-#define	SIR_SERIAL	29
-#define	SIR_NET		28
-#define	SIR_BIO		27
-#define	SIR_CLOCK	26
-#define	SIR_PREEMPT	25
+#define	SIR_SERIAL	28
+#define	SIR_NET		27
+#define	SIR_BIO		26
+#define	SIR_CLOCK	25
+#define	SIR_PREEMPT	24
 
 /*
  * Maximum # of interrupt sources per CPU. 32 to fit in one word.
diff --git a/sys/arch/x86/isa/clock.c b/sys/arch/x86/isa/clock.c
index eb206e44734..2c2fa157137 100644
--- a/sys/arch/x86/isa/clock.c
+++ b/sys/arch/x86/isa/clock.c
@@ -183,8 +183,8 @@ int clock_debug = 0;
 #define DPRINTF(arg)
 #endif
 
-/* Used by lapic.c */
-unsigned int	gettick(void);
+void (*x86_delay)(unsigned int) = i8254_delay;
+
 void		sysbeep(int, int);
 static void     tickle_tc(void);
 
diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c
index f3a593c1514..c117f84f316 100644
--- a/sys/arch/x86/x86/cpu.c
+++ b/sys/arch/x86/x86/cpu.c
@@ -774,7 +774,7 @@ cpu_start_secondary(struct cpu_info *ci)
 	KASSERT(cpu_starting == NULL);
 	cpu_starting = ci;
 	for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) {
-		i8254_delay(10);
+		x86_delay(10);
 	}
 
 	if ((ci->ci_flags & CPUF_PRESENT) == 0) {
@@ -810,7 +810,7 @@ cpu_boot_secondary(struct cpu_info *ci)
 
 	atomic_or_32(&ci->ci_flags, CPUF_GO);
 	for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) {
-		i8254_delay(10);
+		x86_delay(10);
 	}
 	if ((ci->ci_flags & CPUF_RUNNING) == 0) {
 		aprint_error_dev(ci->ci_dev, "failed to start\n");
@@ -1070,7 +1070,7 @@ mp_cpu_start(struct cpu_info *ci, paddr_t target)
 			    __func__);
 			return error;
 		}
-		i8254_delay(10000);
+		x86_delay(10000);
 
 		error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE);
 		if (error != 0) {
@@ -1078,7 +1078,7 @@ mp_cpu_start(struct cpu_info *ci, paddr_t target)
 			    __func__);
 			return error;
 		}
-		i8254_delay(200);
+		x86_delay(200);
 
 		error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE);
 		if (error != 0) {
@@ -1086,7 +1086,7 @@ mp_cpu_start(struct cpu_info *ci, paddr_t target)
 			    __func__);
 			return error;
 		}
-		i8254_delay(200);
+		x86_delay(200);
 	}
 
 	return 0;
@@ -1244,7 +1244,7 @@ cpu_get_tsc_freq(struct cpu_info *ci)
 
 	if (cpu_hascounter()) {
 		last_tsc = cpu_counter_serializing();
-		i8254_delay(100000);
+		x86_delay(100000);
 		ci->ci_data.cpu_cc_freq =
 		    (cpu_counter_serializing() - last_tsc) * 10;
 	}
diff --git a/sys/arch/x86/x86/hyperv.c b/sys/arch/x86/x86/hyperv.c
new file mode 100644
index 00000000000..02fc991d081
--- /dev/null
+++ b/sys/arch/x86/x86/hyperv.c
@@ -0,0 +1,813 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Implements low-level interactions with Hyper-V/Azuree
+ */
+#include <sys/cdefs.h>
+#ifdef __KERNEL_RCSID
+__KERNEL_RCSID(0, "$NetBSD$");
+#endif
+#ifdef __FBSDID
+__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hyperv.c 331757 2018-03-30 02:25:12Z emaste $");
+#endif
+
+#ifdef _KERNEL_OPT
+#include "lapic.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/device.h>
+#include <sys/bus.h>
+#include <sys/kmem.h>
+#include <sys/module.h>
+#include <sys/pmf.h>
+#include <sys/sysctl.h>
+#include <sys/timetc.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/cpufunc.h>
+#include <machine/cputypes.h>
+#include <machine/cpuvar.h>
+#include <machine/cpu_counter.h>
+
+#include <x86/x86/hypervreg.h>
+#include <x86/x86/hypervvar.h>
+
+struct hyperv_softc {
+	device_t		sc_dev;
+
+	struct sysctllog	*sc_log;
+};
+
+struct hyperv_hypercall_ctx {
+	void		*hc_addr;
+	paddr_t		hc_paddr;
+};
+
+static struct hyperv_hypercall_ctx hyperv_hypercall_ctx;
+
+static u_int	hyperv_get_timecount(struct timecounter *);
+
+u_int hyperv_ver_major;
+
+u_int hyperv_features;
+u_int hyperv_recommends;
+
+static u_int hyperv_pm_features;
+static u_int hyperv_features3;
+
+const struct sysctlnode *hyperv_sysctl_node;
+
+static char hyperv_version_str[64];
+static char hyperv_features_str[256];
+static char hyperv_pm_features_str[256];
+static char hyperv_features3_str[256];
+
+hyperv_tc64_t hyperv_tc64;
+
+static struct timecounter hyperv_timecounter = {
+	.tc_get_timecount = hyperv_get_timecount,
+	.tc_counter_mask = 0xffffffff,
+	.tc_frequency = HYPERV_TIMER_FREQ,
+	.tc_name = "Hyper-V",
+	.tc_quality = 2000,
+};
+
+static int	hyperv_match(device_t, cfdata_t, void *);
+static void	hyperv_attach(device_t, device_t, void *);
+static int	hyperv_detach(device_t, int);
+
+CFATTACH_DECL_NEW(hyperv, sizeof(struct hyperv_softc),
+    hyperv_match, hyperv_attach, hyperv_detach, NULL);
+
+static void	hyperv_hypercall_memfree(void);
+static bool	hyperv_init_hypercall(void);
+static int	hyperv_sysctl_setup_root(struct hyperv_softc *);
+
+static u_int
+hyperv_get_timecount(struct timecounter *tc)
+{
+
+	return (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
+}
+
+static uint64_t
+hyperv_tc64_rdmsr(void)
+{
+
+	return rdmsr(MSR_HV_TIME_REF_COUNT);
+}
+
+#ifdef __amd64__
+/*
+ * Reference TSC
+ */
+struct hyperv_ref_tsc {
+	struct hyperv_reftsc	*tsc_ref;
+	paddr_t			tsc_paddr;
+};
+
+static struct hyperv_ref_tsc hyperv_ref_tsc;
+
+static struct timecounter hyperv_tsc_timecounter = {
+	.tc_get_timecount = NULL,	/* based on CPU vendor. */
+	.tc_counter_mask = 0xffffffff,
+	.tc_frequency = HYPERV_TIMER_FREQ,
+	.tc_name = "Hyper-V-TSC",
+	.tc_quality = 3000,
+};
+
+static __inline u_int
+atomic_load_acq_int(volatile u_int *p)
+{
+	u_int r = *p;
+	__insn_barrier();
+	return r;
+}
+
+#define HYPERV_TSC_TIMECOUNT(fence)					\
+static uint64_t								\
+hyperv_tc64_tsc_##fence(void)						\
+{									\
+	struct hyperv_reftsc *tsc_ref = hyperv_ref_tsc.tsc_ref;		\
+	uint32_t seq;							\
+									\
+	while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {	\
+		uint64_t disc, ret, tsc;				\
+		uint64_t scale = tsc_ref->tsc_scale;			\
+		int64_t ofs = tsc_ref->tsc_ofs;				\
+									\
+		x86_##fence();						\
+		tsc = cpu_counter();					\
+									\
+		/* ret = ((tsc * scale) >> 64) + ofs */			\
+		__asm__ __volatile__ ("mulq %3" :			\
+		    "=d" (ret), "=a" (disc) :				\
+		    "a" (tsc), "r" (scale));				\
+		ret += ofs;						\
+									\
+		__insn_barrier();					\
+		if (tsc_ref->tsc_seq == seq)				\
+			return ret;					\
+									\
+		/* Sequence changed; re-sync. */			\
+	}								\
+	/* Fallback to the generic timecounter, i.e. rdmsr. */		\
+	return rdmsr(MSR_HV_TIME_REF_COUNT);				\
+}									\
+									\
+static u_int								\
+hyperv_tsc_timecount_##fence(struct timecounter *tc __unused)		\
+{									\
+									\
+	return hyperv_tc64_tsc_##fence();				\
+}
+
+HYPERV_TSC_TIMECOUNT(lfence);
+HYPERV_TSC_TIMECOUNT(mfence);
+
+static bool
+hyperv_tsc_tcinit(void)
+{
+	hyperv_tc64_t tc64 = NULL;
+	uint64_t orig_msr, msr;
+
+	if ((hyperv_features &
+	     (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC)) !=
+	    (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC) ||
+	    (cpu_feature[0] & CPUID_SSE2) == 0)	/* SSE2 for mfence/lfence */
+		return false;
+
+	switch (cpu_vendor) {
+	case CPUVENDOR_AMD:
+		hyperv_tsc_timecounter.tc_get_timecount =
+		    hyperv_tsc_timecount_mfence;
+		tc64 = hyperv_tc64_tsc_mfence;
+		break;
+
+	case CPUVENDOR_INTEL:
+		hyperv_tsc_timecounter.tc_get_timecount =
+		    hyperv_tsc_timecount_lfence;
+		tc64 = hyperv_tc64_tsc_lfence;
+		break;
+
+	default:
+		/* Unsupport CPU vendors. */
+		return false;
+	}
+
+	hyperv_ref_tsc.tsc_ref = (void *)uvm_km_alloc(kernel_map,
+	    PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
+	if (hyperv_ref_tsc.tsc_ref == NULL) {
+		aprint_error("Hyper-V: reference TSC page allocation failed\n");
+		return false;
+	}
+
+	if (!pmap_extract(pmap_kernel(), (vaddr_t)hyperv_ref_tsc.tsc_ref,
+	    &hyperv_ref_tsc.tsc_paddr)) {
+		aprint_error("Hyper-V: reference TSC page setup failed\n");
+		uvm_km_free(kernel_map, (vaddr_t)hyperv_ref_tsc.tsc_ref,
+		    PAGE_SIZE, UVM_KMF_WIRED);
+		hyperv_ref_tsc.tsc_ref = NULL;
+		return false;
+	}
+
+	orig_msr = rdmsr(MSR_HV_REFERENCE_TSC);
+	msr = MSR_HV_REFTSC_ENABLE | (orig_msr & MSR_HV_REFTSC_RSVD_MASK) |
+	    ((hyperv_ref_tsc.tsc_paddr >> PAGE_SHIFT) << MSR_HV_REFTSC_PGSHIFT);
+	wrmsr(MSR_HV_REFERENCE_TSC, msr);
+
+	/* Install 64 bits timecounter method for other modules to use. */
+	hyperv_tc64 = tc64;
+
+	/* Register "enlightened" timecounter. */
+	tc_init(&hyperv_tsc_timecounter);
+
+	return true;
+}
+#endif
+
+static void
+delay_tc(unsigned int n)
+{
+	struct timecounter *tc;
+	uint64_t end, now;
+	u_int last, u;
+
+	tc = timecounter;
+	if (tc->tc_quality <= 0) {
+		x86_delay(n);
+		return;
+	}
+
+	now = 0;
+	end = tc->tc_frequency * n / 1000000;
+	last = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
+	do {
+		x86_pause();
+		u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
+		if (u < last)
+			now += tc->tc_counter_mask - last + u + 1;
+		else
+			now += u - last;
+		last = u;
+	} while (now < end);
+}
+
+static __inline uint64_t
+hyperv_hypercall_md(volatile void *hc_addr, uint64_t in_val, uint64_t in_paddr,
+    uint64_t out_paddr)
+{
+	uint64_t status;
+
+#ifdef __amd64__
+	__asm__ __volatile__ ("mov %0, %%r8" : : "r" (out_paddr): "r8");
+	__asm__ __volatile__ ("call *%3" : "=a" (status) : "c" (in_val),
+	    "d" (in_paddr), "m" (hc_addr));
+#else
+	uint32_t in_val_hi = in_val >> 32;
+	uint32_t in_val_lo = in_val & 0xFFFFFFFF;
+	uint32_t status_hi, status_lo;
+	uint32_t in_paddr_hi = in_paddr >> 32;
+	uint32_t in_paddr_lo = in_paddr & 0xFFFFFFFF;
+	uint32_t out_paddr_hi = out_paddr >> 32;
+	uint32_t out_paddr_lo = out_paddr & 0xFFFFFFFF;
+
+	__asm__ __volatile__ ("call *%8" : "=d" (status_hi), "=a" (status_lo) :
+	    "d" (in_val_hi), "a" (in_val_lo),
+	    "b" (in_paddr_hi), "c" (in_paddr_lo),
+	    "D" (out_paddr_hi), "S" (out_paddr_lo),
+	    "m" (hc_addr));
+	status = status_lo | ((uint64_t)status_hi << 32);
+#endif
+
+	return status;
+}
+
+__inline uint64_t
+hyperv_hypercall(uint64_t control, paddr_t in_paddr, paddr_t out_paddr)
+{
+
+	if (hyperv_hypercall_ctx.hc_addr == NULL)
+		return ~HYPERCALL_STATUS_SUCCESS;
+
+	return hyperv_hypercall_md(hyperv_hypercall_ctx.hc_addr, control,
+	    in_paddr, out_paddr);
+}
+
+uint64_t
+hyperv_hypercall_post_message(paddr_t msg)
+{
+
+	return hyperv_hypercall(HYPERCALL_POST_MESSAGE, msg, 0);
+}
+
+uint64_t
+hyperv_hypercall_signal_event(paddr_t monprm)
+{
+
+	return hyperv_hypercall(HYPERCALL_SIGNAL_EVENT, monprm, 0);
+}
+
+int
+hyperv_guid2str(const struct hyperv_guid *guid, char *buf, size_t sz)
+{
+	const uint8_t *d = guid->hv_guid;
+
+	return snprintf(buf, sz, "%02x%02x%02x%02x-"
+	    "%02x%02x-%02x%02x-%02x%02x-"
+	    "%02x%02x%02x%02x%02x%02x",
+	    d[3], d[2], d[1], d[0],
+	    d[5], d[4], d[7], d[6], d[8], d[9],
+	    d[10], d[11], d[12], d[13], d[14], d[15]);
+}
+
+static bool
+hyperv_identify(void)
+{
+	char buf[256];
+	u_int regs[4];
+	u_int maxleaf;
+
+	if (vm_guest != VM_GUEST_HV)
+		return false;
+
+	x86_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
+	maxleaf = regs[0];
+	if (maxleaf < CPUID_LEAF_HV_LIMITS)
+		return false;
+
+	x86_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
+	if (regs[0] != CPUID_HV_IFACE_HYPERV)
+		return false;
+
+	x86_cpuid(CPUID_LEAF_HV_FEATURES, regs);
+	if (!(regs[0] & CPUID_HV_MSR_HYPERCALL)) {
+		/*
+		 * Hyper-V w/o Hypercall is impossible; someone
+		 * is faking Hyper-V.
+		 */
+		return false;
+	}
+
+	hyperv_features = regs[0];
+	hyperv_pm_features = regs[2];
+	hyperv_features3 = regs[3];
+
+	x86_cpuid(CPUID_LEAF_HV_IDENTITY, regs);
+	hyperv_ver_major = regs[1] >> 16;
+	snprintf(hyperv_version_str, sizeof(hyperv_version_str),
+	    "%d.%d.%d [SP%d]",
+	    hyperv_ver_major, regs[1] & 0xffff, regs[0], regs[2]);
+	aprint_verbose("Hyper-V Version: %s\n", hyperv_version_str);
+
+	snprintb(hyperv_features_str, sizeof(hyperv_features_str),
+	    "\020"
+	    "\001VPRUNTIME"	/* MSR_HV_VP_RUNTIME */
+	    "\002TMREFCNT"	/* MSR_HV_TIME_REF_COUNT */
+	    "\003SYNIC"		/* MSRs for SynIC */
+	    "\004SYNTM"		/* MSRs for SynTimer */
+	    "\005APIC"		/* MSR_HV_{EOI,ICR,TPR} */
+	    "\006HYPERCALL"	/* MSR_HV_{GUEST_OS_ID,HYPERCALL} */
+	    "\007VPINDEX"	/* MSR_HV_VP_INDEX */
+	    "\010RESET"		/* MSR_HV_RESET */
+	    "\011STATS"		/* MSR_HV_STATS_ */
+	    "\012REFTSC"	/* MSR_HV_REFERENCE_TSC */
+	    "\013IDLE"		/* MSR_HV_GUEST_IDLE */
+	    "\014TMFREQ"	/* MSR_HV_{TSC,APIC}_FREQUENCY */
+	    "\015DEBUG",	/* MSR_HV_SYNTH_DEBUG_ */
+	    hyperv_features);
+	aprint_verbose("  Features=%s\n", hyperv_features_str);
+	snprintb(buf, sizeof(buf),
+	    "\020"
+	    "\005C3HPET",	/* HPET is required for C3 state */
+	    (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK));
+	snprintf(hyperv_pm_features_str, sizeof(hyperv_pm_features_str),
+	    "%s [C%u]", buf, CPUPM_HV_CSTATE(hyperv_pm_features));
+	aprint_verbose("  PM Features=%s\n", hyperv_pm_features_str);
+	snprintb(hyperv_features3_str, sizeof(hyperv_features3_str),
+	    "\020"
+	    "\001MWAIT"		/* MWAIT */
+	    "\002DEBUG"		/* guest debug support */
+	    "\003PERFMON"	/* performance monitor */
+	    "\004PCPUDPE"	/* physical CPU dynamic partition event */
+	    "\005XMMHC"		/* hypercall input through XMM regs */
+	    "\006IDLE"		/* guest idle support */
+	    "\007SLEEP"		/* hypervisor sleep support */
+	    "\010NUMA"		/* NUMA distance query support */
+	    "\011TMFREQ"	/* timer frequency query (TSC, LAPIC) */
+	    "\012SYNCMC"	/* inject synthetic machine checks */
+	    "\013CRASH"		/* MSRs for guest crash */
+	    "\014DEBUGMSR"	/* MSRs for guest debug */
+	    "\015NPIEP"		/* NPIEP */
+	    "\016HVDIS",	/* disabling hypervisor */
+	    hyperv_features3);
+	aprint_verbose("  Features3=%s\n", hyperv_features3_str);
+
+	x86_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs);
+	hyperv_recommends = regs[0];
+	aprint_verbose("  Recommends: %08x %08x\n", regs[0], regs[1]);
+
+	x86_cpuid(CPUID_LEAF_HV_LIMITS, regs);
+	aprint_verbose("  Limits: Vcpu:%d Lcpu:%d Int:%d\n",
+	    regs[0], regs[1], regs[2]);
+
+	if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) {
+		x86_cpuid(CPUID_LEAF_HV_HWFEATURES, regs);
+		aprint_verbose("  HW Features: %08x, AMD: %08x\n",
+		    regs[0], regs[3]);
+	}
+
+	return true;
+}
+
+bool
+hyperv_init(void)
+{
+
+	if (!hyperv_identify()) {
+		/* Not Hyper-V; reset guest id to the generic one. */
+		if (vm_guest == VM_GUEST_HV)
+			vm_guest = VM_GUEST_VM;
+		return false;
+	}
+
+	/* Set guest id */
+	wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_OSTYPE_NETBSD |
+	    (uint64_t)__NetBSD_Version__ << MSR_HV_GUESTID_VERSION_SHIFT);
+
+	if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) {
+		/* Register Hyper-V timecounter */
+		tc_init(&hyperv_timecounter);
+
+		/*
+		 * Install 64 bits timecounter method for other modules to use.
+		 */
+		hyperv_tc64 = hyperv_tc64_rdmsr;
+#ifdef __amd64__
+		hyperv_tsc_tcinit();
+#endif
+
+		/* delay with timecounter */
+		x86_delay = delay_func = delay_tc;
+	}
+
+#if NLAPIC > 0
+	if ((hyperv_features & CPUID_HV_MSR_TIME_FREQ) &&
+	    (hyperv_features3 & CPUID3_HV_TIME_FREQ)) {
+		extern uint32_t lapic_per_second;
+
+		lapic_per_second = rdmsr(MSR_HV_APIC_FREQUENCY);
+	}
+#endif
+
+	return hyperv_init_hypercall();
+}
+
+bool
+hyperv_is_initialized(void)
+{
+	uint64_t msr;
+
+	if (vm_guest != VM_GUEST_HV)
+		return false;
+	if (rdmsr_safe(MSR_HV_HYPERCALL, &msr) == EFAULT)
+		return false;
+	return (msr & MSR_HV_HYPERCALL_ENABLE) ? true : false;
+}
+
+static int
+hyperv_match(device_t parent, cfdata_t cf, void *aux)
+{
+	struct cpufeature_attach_args *cfaa = aux;
+	struct cpu_info *ci = cfaa->ci;
+
+	if (strcmp(cfaa->name, "vm") != 0)
+		return 0;
+	if ((ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) == 0)
+		return 0;
+	if (vm_guest != VM_GUEST_HV)
+		return 0;
+
+	return 1;
+}
+
+static void
+hyperv_attach(device_t parent, device_t self, void *aux)
+{
+	struct hyperv_softc *sc = device_private(self);
+
+	sc->sc_dev = self;
+
+	aprint_naive("\n");
+	aprint_normal(": Hyper-V\n");
+
+	if (!hyperv_is_initialized()) {
+		if (rdmsr(MSR_HV_GUEST_OS_ID) == 0) {
+			if (!hyperv_init()) {
+				aprint_error_dev(self, "initialize failed\n");
+				return;
+			}
+		}
+		hyperv_init_hypercall();
+	}
+
+	(void) pmf_device_register(self, NULL, NULL);
+
+	(void) hyperv_sysctl_setup_root(sc);
+}
+
+static int
+hyperv_detach(device_t self, int flags)
+{
+	struct hyperv_softc *sc = device_private(self);
+	uint64_t hc;
+
+	/* Disable Hypercall */
+	hc = rdmsr(MSR_HV_HYPERCALL);
+	wrmsr(MSR_HV_HYPERCALL, hc & MSR_HV_HYPERCALL_RSVD_MASK);
+	hyperv_hypercall_memfree();
+
+	if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT)
+		tc_detach(&hyperv_timecounter);
+
+	wrmsr(MSR_HV_GUEST_OS_ID, 0);
+
+	pmf_device_deregister(self);
+
+	if (sc->sc_log != NULL) {
+		sysctl_teardown(&sc->sc_log);
+		sc->sc_log = NULL;
+	}
+
+	return 0;
+}
+
+static void
+hyperv_hypercall_memfree(void)
+{
+
+	if (hyperv_hypercall_ctx.hc_addr != NULL) {
+		uvm_km_free(kernel_map, (vaddr_t)hyperv_hypercall_ctx.hc_addr,
+		    PAGE_SIZE, UVM_KMF_WIRED);
+		hyperv_hypercall_ctx.hc_addr = NULL;
+	}
+}
+
+static bool
+hyperv_init_hypercall(void)
+{
+	uint64_t hc, hc_orig;
+
+	hyperv_hypercall_ctx.hc_addr = (void *)uvm_km_alloc(kernel_map,
+	    PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_EXEC);
+	KASSERT(hyperv_hypercall_ctx.hc_addr != NULL);
+	memset(hyperv_hypercall_ctx.hc_addr, 0xcc, PAGE_SIZE);
+	wbinvd();
+	x86_flush();
+
+	/* The hypercall page must be both readable and executable */
+	uvm_km_protect(kernel_map, (vaddr_t)hyperv_hypercall_ctx.hc_addr,
+	    PAGE_SIZE, VM_PROT_READ | VM_PROT_EXECUTE);
+
+	if (!pmap_extract(pmap_kernel(), (vaddr_t)hyperv_hypercall_ctx.hc_addr,
+	    &hyperv_hypercall_ctx.hc_paddr)) {
+		aprint_error("Hyper-V: Hypercall page setup failed\n");
+		hyperv_hypercall_memfree();
+		/* Can't perform any Hyper-V specific actions */
+		vm_guest = VM_GUEST_VM;
+		return false;
+	}
+
+	/* Get the 'reserved' bits, which requires preservation. */
+	hc_orig = rdmsr(MSR_HV_HYPERCALL);
+
+	/*
+	 * Setup the Hypercall page.
+	 *
+	 * NOTE: 'reserved' bits MUST be preserved.
+	 */
+	hc = (atop(hyperv_hypercall_ctx.hc_paddr) << MSR_HV_HYPERCALL_PGSHIFT) |
+	    (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) |
+	    MSR_HV_HYPERCALL_ENABLE;
+	wrmsr(MSR_HV_HYPERCALL, hc);
+
+	/*
+	 * Confirm that Hypercall page did get setup.
+	 */
+	hc = rdmsr(MSR_HV_HYPERCALL);
+	if (!(hc & MSR_HV_HYPERCALL_ENABLE)) {
+		aprint_error("Hyper-V: Hypercall setup failed\n");
+		hyperv_hypercall_memfree();
+		/* Can't perform any Hyper-V specific actions */
+		vm_guest = VM_GUEST_VM;
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Hyper-V bus_dma utilities.
+ */
+void *
+hyperv_dma_alloc(bus_dma_tag_t dmat, struct hyperv_dma *dma, bus_size_t size,
+    bus_size_t alignment, bus_size_t boundary, int nsegs)
+{
+	int rseg, error;
+
+	KASSERT(dma->segs == NULL);
+
+	dma->nsegs = nsegs;
+	dma->segs = kmem_zalloc(sizeof(*dma->segs) * nsegs, KM_SLEEP);
+
+	error = bus_dmamem_alloc(dmat, size, alignment, boundary, dma->segs,
+	    nsegs, &rseg, BUS_DMA_WAITOK);
+	if (error) {
+		aprint_error("%s: bus_dmamem_alloc failed: error=%d\n",
+		    __func__, error);
+		goto fail1;
+	}
+	error = bus_dmamem_map(dmat, dma->segs, rseg, size, &dma->addr,
+	    BUS_DMA_WAITOK);
+	if (error) {
+		aprint_error("%s: bus_dmamem_map failed: error=%d\n",
+		    __func__, error);
+		goto fail2;
+	}
+	error = bus_dmamap_create(dmat, size, rseg, size, boundary,
+	    BUS_DMA_WAITOK, &dma->map);
+	if (error) {
+		aprint_error("%s: bus_dmamap_create failed: error=%d\n",
+		    __func__, error);
+		goto fail3;
+	}
+	error = bus_dmamap_load(dmat, dma->map, dma->addr, size, NULL,
+	    BUS_DMA_WAITOK | BUS_DMA_READ | BUS_DMA_WRITE);
+	if (error) {
+		aprint_error("%s: bus_dmamap_load failed: error=%d\n",
+		    __func__, error);
+		goto fail4;
+	}
+
+	return dma->addr;
+
+fail4:	bus_dmamap_destroy(dmat, dma->map);
+fail3:	bus_dmamem_unmap(dmat, dma->addr, size);
+	dma->addr = NULL;
+fail2:	bus_dmamem_free(dmat, dma->segs, rseg);
+fail1:	kmem_free(dma->segs, sizeof(*dma->segs) * nsegs);
+	dma->segs = NULL;
+	dma->nsegs = 0;
+	return NULL;
+}
+
+void
+hyperv_dma_free(bus_dma_tag_t dmat, struct hyperv_dma *dma)
+{
+	bus_size_t size = dma->map->dm_mapsize;
+	int rsegs = dma->map->dm_nsegs;
+
+	bus_dmamap_unload(dmat, dma->map);
+	bus_dmamap_destroy(dmat, dma->map);
+	bus_dmamem_unmap(dmat, dma->addr, size);
+	dma->addr = NULL;
+	bus_dmamem_free(dmat, dma->segs, rsegs);
+	kmem_free(dma->segs, sizeof(*dma->segs) * dma->nsegs);
+	dma->segs = NULL;
+	dma->nsegs = 0;
+}
+
+static int
+hyperv_sysctl_setup(struct hyperv_softc *sc,
+    const struct sysctlnode *hyperv_node)
+{
+	int error;
+
+	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
+	    CTLFLAG_READONLY, CTLTYPE_STRING, "version", NULL,
+	    NULL, 0, hyperv_version_str,
+	    0, CTL_CREATE, CTL_EOL);
+	if (error)
+		return error;
+
+	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
+	    CTLFLAG_READONLY, CTLTYPE_STRING, "features", NULL,
+	    NULL, 0, hyperv_features_str,
+	    0, CTL_CREATE, CTL_EOL);
+	if (error)
+		return error;
+
+	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
+	    CTLFLAG_READONLY, CTLTYPE_STRING, "pm_features", NULL,
+	    NULL, 0, hyperv_pm_features_str,
+	    0, CTL_CREATE, CTL_EOL);
+	if (error)
+		return error;
+
+	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
+	    CTLFLAG_READONLY, CTLTYPE_STRING, "features3", NULL,
+	    NULL, 0, hyperv_features3_str,
+	    0, CTL_CREATE, CTL_EOL);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+static int
+hyperv_sysctl_setup_root(struct hyperv_softc *sc)
+{
+	const struct sysctlnode *machdep_node, *hyperv_node;
+	int error;
+
+	error = sysctl_createv(&sc->sc_log, 0, NULL, &machdep_node,
+	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
+	    NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
+	if (error)
+		goto fail;
+
+	error = sysctl_createv(&sc->sc_log, 0, &machdep_node, &hyperv_node,
+	    CTLFLAG_PERMANENT, CTLTYPE_NODE, device_xname(sc->sc_dev), NULL,
+	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
+	if (error)
+		goto fail;
+
+	error = hyperv_sysctl_setup(sc, hyperv_node);
+	if (error)
+		goto fail;
+
+	hyperv_sysctl_node = hyperv_node;
+
+	return 0;
+
+fail:
+	sysctl_teardown(&sc->sc_log);
+	sc->sc_log = NULL;
+	return error;
+}
+
+MODULE(MODULE_CLASS_DRIVER, hyperv, NULL);
+
+#ifdef _MODULE
+#include "ioconf.c"
+#endif
+
+static int
+hyperv_modcmd(modcmd_t cmd, void *aux)
+{
+	int rv = 0;
+
+	switch (cmd) {
+	case MODULE_CMD_INIT:
+#ifdef _MODULE
+		rv = config_init_component(cfdriver_ioconf_hyperv,
+		    cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
+#endif
+		hyperv_init();
+		break;
+
+	case MODULE_CMD_FINI:
+#ifdef _MODULE
+		rv = config_fini_component(cfdriver_ioconf_hyperv,
+		    cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
+#endif
+		break;
+
+	default:
+		rv = ENOTTY;
+		break;
+	}
+
+	return rv;
+}
diff --git a/sys/arch/x86/x86/hypervreg.h b/sys/arch/x86/x86/hypervreg.h
new file mode 100644
index 00000000000..591231e6471
--- /dev/null
+++ b/sys/arch/x86/x86/hypervreg.h
@@ -0,0 +1,575 @@
+/*	$NetBSD$	*/
+/*	$OpenBSD: hypervreg.h,v 1.10 2017/01/05 13:17:22 mikeb Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _HYPERVREG_H_
+#define _HYPERVREG_H_
+
+#if defined(_KERNEL)
+
+#define VMBUS_CONNID_MESSAGE		1
+#define VMBUS_CONNID_EVENT		2
+#define VMBUS_SINT_MESSAGE		2
+#define VMBUS_SINT_TIMER		4
+
+#define VMBUS_GPADL_START		0xffff /* 0x10000 effectively */
+
+struct hyperv_guid {
+	uint8_t		hv_guid[16];
+} __packed;
+
+/*
+ * $FreeBSD: head/sys/dev/hyperv/vmbus/hyperv_reg.h 303283 2016-07-25 03:12:40Z sephe $
+ */
+
+/*
+ * Hyper-V Synthetic MSRs
+ */
+
+#define MSR_HV_GUEST_OS_ID		0x40000000
+#define MSR_HV_GUESTID_BUILD_MASK	0xffffULL
+#define MSR_HV_GUESTID_VERSION_MASK	0x0000ffffffff0000ULL
+#define MSR_HV_GUESTID_VERSION_SHIFT	16
+#define MSR_HV_GUESTID_OSID_MASK	0x00ff000000000000ULL
+#define MSR_HV_GUESTID_OSID_SHIFT	48
+#define MSR_HV_GUESTID_OSTYPE_MASK	0x7f00000000000000ULL
+#define MSR_HV_GUESTID_OSTYPE_SHIFT	56
+#define MSR_HV_GUESTID_OPENSRC		0x8000000000000000ULL
+#define MSR_HV_GUESTID_OSID_OPENBSD	0x0001000000000000ULL
+#define MSR_HV_GUESTID_OSID_NETBSD	0x0002000000000000ULL
+#define MSR_HV_GUESTID_OSTYPE_LINUX	\
+	((0x01ULL << MSR_HV_GUESTID_OSTYPE_SHIFT) | MSR_HV_GUESTID_OPENSRC)
+#define MSR_HV_GUESTID_OSTYPE_FREEBSD	\
+	((0x02ULL << MSR_HV_GUESTID_OSTYPE_SHIFT) | MSR_HV_GUESTID_OPENSRC)
+#define MSR_HV_GUESTID_OSTYPE_OPENBSD	\
+	((0x02ULL << MSR_HV_GUESTID_OSTYPE_SHIFT) | MSR_HV_GUESTID_OPENSRC | \
+	 MSR_HV_GUESTID_OSID_OPENBSD)
+#define MSR_HV_GUESTID_OSTYPE_NETBSD	\
+	((0x02ULL << MSR_HV_GUESTID_OSTYPE_SHIFT) | MSR_HV_GUESTID_OPENSRC | \
+	 MSR_HV_GUESTID_OSID_NETBSD)
+
+#define MSR_HV_HYPERCALL		0x40000001
+#define MSR_HV_HYPERCALL_ENABLE		0x0001ULL
+#define MSR_HV_HYPERCALL_RSVD_MASK	0x0ffeULL
+#define MSR_HV_HYPERCALL_PGSHIFT	12
+
+#define MSR_HV_VP_INDEX			0x40000002
+
+#define MSR_HV_TIME_REF_COUNT		0x40000020
+
+#define MSR_HV_REFERENCE_TSC		0x40000021
+#define MSR_HV_REFTSC_ENABLE		0x0001ULL
+#define MSR_HV_REFTSC_RSVD_MASK		0x0ffeULL
+#define MSR_HV_REFTSC_PGSHIFT		12
+
+#define MSR_HV_TSC_FREQUENCY		0x40000022
+
+#define MSR_HV_APIC_FREQUENCY		0x40000023
+
+#define MSR_HV_SCONTROL			0x40000080
+#define MSR_HV_SCTRL_ENABLE		0x0001ULL
+#define MSR_HV_SCTRL_RSVD_MASK		0xfffffffffffffffeULL
+
+#define MSR_HV_SIEFP			0x40000082
+#define MSR_HV_SIEFP_ENABLE		0x0001ULL
+#define MSR_HV_SIEFP_RSVD_MASK		0x0ffeULL
+#define MSR_HV_SIEFP_PGSHIFT		12
+
+#define MSR_HV_SIMP			0x40000083
+#define MSR_HV_SIMP_ENABLE		0x0001ULL
+#define MSR_HV_SIMP_RSVD_MASK		0x0ffeULL
+#define MSR_HV_SIMP_PGSHIFT		12
+
+#define MSR_HV_EOM			0x40000084
+
+#define MSR_HV_SINT0			0x40000090
+#define MSR_HV_SINT_VECTOR_MASK		0x00ffULL
+#define MSR_HV_SINT_RSVD1_MASK		0xff00ULL
+#define MSR_HV_SINT_MASKED		0x00010000ULL
+#define MSR_HV_SINT_AUTOEOI		0x00020000ULL
+#define MSR_HV_SINT_RSVD2_MASK		0xfffffffffffc0000ULL
+#define MSR_HV_SINT_RSVD_MASK		(MSR_HV_SINT_RSVD1_MASK |	\
+					 MSR_HV_SINT_RSVD2_MASK)
+
+#define MSR_HV_STIMER0_CONFIG		0x400000b0
+#define MSR_HV_STIMER_CFG_ENABLE	0x0001ULL
+#define MSR_HV_STIMER_CFG_PERIODIC	0x0002ULL
+#define MSR_HV_STIMER_CFG_LAZY		0x0004ULL
+#define MSR_HV_STIMER_CFG_AUTOEN	0x0008ULL
+#define MSR_HV_STIMER_CFG_SINT_MASK	0x000f0000ULL
+#define MSR_HV_STIMER_CFG_SINT_SHIFT	16
+
+#define MSR_HV_STIMER0_COUNT		0x400000b1
+
+/*
+ * CPUID leaves
+ */
+
+#define CPUID_LEAF_HV_MAXLEAF		0x40000000
+
+#define CPUID_LEAF_HV_INTERFACE		0x40000001
+#define CPUID_HV_IFACE_HYPERV		0x31237648	/* HV#1 */
+
+#define CPUID_LEAF_HV_IDENTITY		0x40000002
+
+#define CPUID_LEAF_HV_FEATURES		0x40000003
+/* EAX: features */
+#define CPUID_HV_MSR_TIME_REFCNT	0x0002	/* MSR_HV_TIME_REF_COUNT */
+#define CPUID_HV_MSR_SYNIC		0x0004	/* MSRs for SynIC */
+#define CPUID_HV_MSR_SYNTIMER		0x0008	/* MSRs for SynTimer */
+#define CPUID_HV_MSR_APIC		0x0010	/* MSR_HV_{EOI,ICR,TPR} */
+#define CPUID_HV_MSR_HYPERCALL		0x0020	/* MSR_HV_GUEST_OS_ID
+						 * MSR_HV_HYPERCALL */
+#define CPUID_HV_MSR_VP_INDEX		0x0040	/* MSR_HV_VP_INDEX */
+#define CPUID_HV_MSR_REFERENCE_TSC	0x0200	/* MSR_HV_REFERENCE_TSC */
+#define CPUID_HV_MSR_GUEST_IDLE		0x0400	/* MSR_HV_GUEST_IDLE */
+#define CPUID_HV_MSR_TIME_FREQ		0x0800	/* MSR_HV_xxx_FREQUENCY */
+/* ECX: power management features */
+#define CPUPM_HV_CSTATE_MASK		0x000f	/* deepest C-state */
+#define CPUPM_HV_C3_HPET		0x0010	/* C3 requires HPET */
+#define CPUPM_HV_CSTATE(f)		((f) & CPUPM_HV_CSTATE_MASK)
+/* EDX: features3 */
+#define CPUID3_HV_MWAIT			0x0001	/* MWAIT */
+#define CPUID3_HV_XMM_HYPERCALL		0x0010	/* Hypercall input through
+						 * XMM regs */
+#define CPUID3_HV_GUEST_IDLE		0x0020	/* guest idle */
+#define CPUID3_HV_NUMA			0x0080	/* NUMA distance query */
+#define CPUID3_HV_TIME_FREQ		0x0100	/* timer frequency query
+						 * (TSC, LAPIC) */
+#define CPUID3_HV_MSR_CRASH		0x0400	/* MSRs for guest crash */
+
+#define CPUID_LEAF_HV_RECOMMENDS	0x40000004
+#define CPUID_LEAF_HV_LIMITS		0x40000005
+#define CPUID_LEAF_HV_HWFEATURES	0x40000006
+
+/*
+ * Hyper-V Monitor Notification Facility
+ */
+struct hyperv_mon_param {
+	uint32_t	mp_connid;
+	uint16_t	mp_evtflag_ofs;
+	uint16_t	mp_rsvd;
+} __packed;
+
+/*
+ * Hyper-V message types
+ */
+#define HYPERV_MSGTYPE_NONE		0
+#define HYPERV_MSGTYPE_CHANNEL		1
+#define HYPERV_MSGTYPE_TIMER_EXPIRED	0x80000010
+
+/*
+ * Hypercall status codes
+ */
+#define HYPERCALL_STATUS_SUCCESS	0x0000
+
+/*
+ * Hypercall input values
+ */
+#define HYPERCALL_POST_MESSAGE		0x005c
+#define HYPERCALL_SIGNAL_EVENT		0x005d
+
+/*
+ * Hypercall input parameters
+ */
+#define HYPERCALL_PARAM_ALIGN		8
+#if 0
+/*
+ * XXX
+ * <<Hypervisor Top Level Functional Specification 4.0b>> requires
+ * input parameters size to be multiple of 8, however, many post
+ * message input parameters do _not_ meet this requirement.
+ */
+#define HYPERCALL_PARAM_SIZE_ALIGN	8
+#endif
+
+/*
+ * HYPERCALL_POST_MESSAGE
+ */
+#define HYPERCALL_POSTMSGIN_DSIZE_MAX	240
+#define HYPERCALL_POSTMSGIN_SIZE	256
+
+struct hyperv_hypercall_postmsg_in {
+	uint32_t	hc_connid;
+	uint32_t	hc_rsvd;
+	uint32_t	hc_msgtype;	/* VMBUS_MSGTYPE_ */
+	uint32_t	hc_dsize;
+	uint8_t		hc_data[HYPERCALL_POSTMSGIN_DSIZE_MAX];
+} __packed;
+
+/*
+ * $FreeBSD: head/sys/dev/hyperv/include/vmbus.h 306389 2016-09-28 04:25:25Z sephe $
+ */
+
+/*
+ * VMBUS version is 32 bit, upper 16 bit for major_number and lower
+ * 16 bit for minor_number.
+ *
+ * 0.13  --  Windows Server 2008
+ * 1.1   --  Windows 7
+ * 2.4   --  Windows 8
+ * 3.0   --  Windows 8.1
+ * 4.0   --  Windows 10
+ */
+#define VMBUS_VERSION_WS2008		((0 << 16) | (13))
+#define VMBUS_VERSION_WIN7		((1 << 16) | (1))
+#define VMBUS_VERSION_WIN8		((2 << 16) | (4))
+#define VMBUS_VERSION_WIN8_1		((3 << 16) | (0))
+#define VMBUS_VERSION_WIN10		((4 << 16) | (0))
+
+#define VMBUS_VERSION_MAJOR(ver)	(((uint32_t)(ver)) >> 16)
+#define VMBUS_VERSION_MINOR(ver)	(((uint32_t)(ver)) & 0xffff)
+
+/*
+ * GPA stuffs.
+ */
+struct vmbus_gpa_range {
+	uint32_t	gpa_len;
+	uint32_t	gpa_ofs;
+	uint64_t	gpa_page[0];
+} __packed;
+
+/* This is actually vmbus_gpa_range.gpa_page[1] */
+struct vmbus_gpa {
+	uint32_t	gpa_len;
+	uint32_t	gpa_ofs;
+	uint64_t	gpa_page;
+} __packed;
+
+#define VMBUS_CHANPKT_SIZE_SHIFT	3
+
+#define VMBUS_CHANPKT_GETLEN(pktlen)	\
+	(((int)(pktlen)) << VMBUS_CHANPKT_SIZE_SHIFT)
+
+struct vmbus_chanpkt_hdr {
+	uint16_t	cph_type;	/* VMBUS_CHANPKT_TYPE_ */
+	uint16_t	cph_hlen;	/* header len, in 8 bytes */
+	uint16_t	cph_tlen;	/* total len, in 8 bytes */
+	uint16_t	cph_flags;	/* VMBUS_CHANPKT_FLAG_ */
+	uint64_t	cph_tid;
+} __packed;
+
+#define VMBUS_CHANPKT_TYPE_INBAND	0x0006
+#define VMBUS_CHANPKT_TYPE_RXBUF	0x0007
+#define VMBUS_CHANPKT_TYPE_GPA		0x0009
+#define VMBUS_CHANPKT_TYPE_COMP		0x000b
+
+#define VMBUS_CHANPKT_FLAG_RC		0x0001	/* report completion */
+
+#define VMBUS_CHANPKT_CONST_DATA(pkt)			\
+	((const void *)((const uint8_t *)(pkt) +	\
+	    VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen)))
+
+/*
+ * $FreeBSD: head/sys/dev/hyperv/vmbus/vmbus_reg.h 305405 2016-09-05 03:21:31Z sephe $
+ */
+
+/*
+ * Hyper-V SynIC message format.
+ */
+
+#define VMBUS_MSG_DSIZE_MAX		240
+#define VMBUS_MSG_SIZE			256
+
+struct vmbus_message {
+	uint32_t	msg_type;	/* VMBUS_MSGTYPE_ */
+	uint8_t		msg_dsize;	/* data size */
+	uint8_t		msg_flags;	/* VMBUS_MSGFLAG_ */
+	uint16_t	msg_rsvd;
+	uint64_t	msg_id;
+	uint8_t		msg_data[VMBUS_MSG_DSIZE_MAX];
+} __packed;
+
+#define VMBUS_MSGFLAG_PENDING		0x01
+
+/*
+ * Hyper-V SynIC event flags
+ */
+
+#define VMBUS_EVTFLAGS_SIZE	256
+#define VMBUS_EVTFLAGS_MAX	((VMBUS_EVTFLAGS_SIZE / LONG_BIT) * 8)
+#define VMBUS_EVTFLAG_LEN	LONG_BIT
+#define VMBUS_EVTFLAG_MASK	(LONG_BIT - 1)
+
+struct vmbus_evtflags {
+	ulong		evt_flags[VMBUS_EVTFLAGS_MAX];
+} __packed;
+
+/*
+ * Hyper-V Monitor Notification Facility
+ */
+
+struct vmbus_mon_trig {
+	uint32_t	mt_pending;
+	uint32_t	mt_armed;
+} __packed;
+
+#define VMBUS_MONTRIGS_MAX	4
+#define VMBUS_MONTRIG_LEN	32
+
+struct vmbus_mnf {
+	uint32_t	mnf_state;
+	uint32_t	mnf_rsvd1;
+
+	struct vmbus_mon_trig
+			mnf_trigs[VMBUS_MONTRIGS_MAX];
+	uint8_t		mnf_rsvd2[536];
+
+	uint16_t	mnf_lat[VMBUS_MONTRIGS_MAX][VMBUS_MONTRIG_LEN];
+	uint8_t		mnf_rsvd3[256];
+
+	struct hyperv_mon_param
+			mnf_param[VMBUS_MONTRIGS_MAX][VMBUS_MONTRIG_LEN];
+	uint8_t		mnf_rsvd4[1984];
+} __packed;
+
+/*
+ * Buffer ring
+ */
+struct vmbus_bufring {
+	/*
+	 * If br_windex == br_rindex, this bufring is empty; this
+	 * means we can _not_ write data to the bufring, if the
+	 * write is going to make br_windex same as br_rindex.
+	 */
+	volatile uint32_t	br_windex;
+	volatile uint32_t	br_rindex;
+
+	/*
+	 * Interrupt mask {0,1}
+	 *
+	 * For TX bufring, host set this to 1, when it is processing
+	 * the TX bufring, so that we can safely skip the TX event
+	 * notification to host.
+	 *
+	 * For RX bufring, once this is set to 1 by us, host will not
+	 * further dispatch interrupts to us, even if there are data
+	 * pending on the RX bufring.  This effectively disables the
+	 * interrupt of the channel to which this RX bufring is attached.
+	 */
+	volatile uint32_t	br_imask;
+
+	uint8_t			br_rsvd[4084];
+	uint8_t			br_data[0];
+} __packed;
+
+/*
+ * Channel
+ */
+
+#define VMBUS_CHAN_MAX_COMPAT	256
+#define VMBUS_CHAN_MAX		(VMBUS_EVTFLAG_LEN * VMBUS_EVTFLAGS_MAX)
+
+/*
+ * Channel packets
+ */
+
+#define VMBUS_CHANPKT_SIZE_ALIGN	(1 << VMBUS_CHANPKT_SIZE_SHIFT)
+
+#define VMBUS_CHANPKT_SETLEN(pktlen, len)		\
+do {							\
+	(pktlen) = (len) >> VMBUS_CHANPKT_SIZE_SHIFT;	\
+} while (0)
+
+struct vmbus_chanpkt {
+	struct vmbus_chanpkt_hdr cp_hdr;
+} __packed;
+
+struct vmbus_chanpkt_sglist {
+	struct vmbus_chanpkt_hdr cp_hdr;
+	uint32_t	cp_rsvd;
+	uint32_t	cp_gpa_cnt;
+	struct vmbus_gpa cp_gpa[0];
+} __packed;
+
+struct vmbus_chanpkt_prplist {
+	struct vmbus_chanpkt_hdr cp_hdr;
+	uint32_t	cp_rsvd;
+	uint32_t	cp_range_cnt;
+	struct vmbus_gpa_range cp_range[0];
+} __packed;
+
+/*
+ * Channel messages
+ * - Embedded in vmbus_message.msg_data, e.g. response and notification.
+ * - Embedded in hyperv_hypercall_postmsg_in.hc_data, e.g. request.
+ */
+
+#define VMBUS_CHANMSG_CHOFFER			1	/* NOTE */
+#define VMBUS_CHANMSG_CHRESCIND			2	/* NOTE */
+#define VMBUS_CHANMSG_CHREQUEST			3	/* REQ */
+#define VMBUS_CHANMSG_CHOFFER_DONE		4	/* NOTE */
+#define VMBUS_CHANMSG_CHOPEN			5	/* REQ */
+#define VMBUS_CHANMSG_CHOPEN_RESP		6	/* RESP */
+#define VMBUS_CHANMSG_CHCLOSE			7	/* REQ */
+#define VMBUS_CHANMSG_GPADL_CONN		8	/* REQ */
+#define VMBUS_CHANMSG_GPADL_SUBCONN		9	/* REQ */
+#define VMBUS_CHANMSG_GPADL_CONNRESP		10	/* RESP */
+#define VMBUS_CHANMSG_GPADL_DISCONN		11	/* REQ */
+#define VMBUS_CHANMSG_GPADL_DISCONNRESP		12	/* RESP */
+#define VMBUS_CHANMSG_CHFREE			13	/* REQ */
+#define VMBUS_CHANMSG_CONNECT			14	/* REQ */
+#define VMBUS_CHANMSG_CONNECT_RESP		15	/* RESP */
+#define VMBUS_CHANMSG_DISCONNECT		16	/* REQ */
+#define VMBUS_CHANMSG_COUNT			17
+#define VMBUS_CHANMSG_MAX			22
+
+struct vmbus_chanmsg_hdr {
+	uint32_t	chm_type;	/* VMBUS_CHANMSG_* */
+	uint32_t	chm_rsvd;
+} __packed;
+
+/* VMBUS_CHANMSG_CONNECT */
+struct vmbus_chanmsg_connect {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_ver;
+	uint32_t	chm_rsvd;
+	uint64_t	chm_evtflags;
+	uint64_t	chm_mnf1;
+	uint64_t	chm_mnf2;
+} __packed;
+
+/* VMBUS_CHANMSG_CONNECT_RESP */
+struct vmbus_chanmsg_connect_resp {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint8_t		chm_done;
+} __packed;
+
+/* VMBUS_CHANMSG_CHREQUEST */
+struct vmbus_chanmsg_chrequest {
+	struct vmbus_chanmsg_hdr chm_hdr;
+} __packed;
+
+/* VMBUS_CHANMSG_DISCONNECT */
+struct vmbus_chanmsg_disconnect {
+	struct vmbus_chanmsg_hdr chm_hdr;
+} __packed;
+
+/* VMBUS_CHANMSG_CHOPEN */
+struct vmbus_chanmsg_chopen {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+	uint32_t	chm_openid;
+	uint32_t	chm_gpadl;
+	uint32_t	chm_vcpuid;
+	uint32_t	chm_txbr_pgcnt;
+#define VMBUS_CHANMSG_CHOPEN_UDATA_SIZE	120
+	uint8_t		chm_udata[VMBUS_CHANMSG_CHOPEN_UDATA_SIZE];
+} __packed;
+
+/* VMBUS_CHANMSG_CHOPEN_RESP */
+struct vmbus_chanmsg_chopen_resp {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+	uint32_t	chm_openid;
+	uint32_t	chm_status;
+} __packed;
+
+/* VMBUS_CHANMSG_GPADL_CONN */
+struct vmbus_chanmsg_gpadl_conn {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+	uint32_t	chm_gpadl;
+	uint16_t	chm_range_len;
+	uint16_t	chm_range_cnt;
+	struct vmbus_gpa_range chm_range;
+} __packed;
+
+#define VMBUS_CHANMSG_GPADL_CONN_PGMAX		26
+
+/* VMBUS_CHANMSG_GPADL_SUBCONN */
+struct vmbus_chanmsg_gpadl_subconn {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_msgno;
+	uint32_t	chm_gpadl;
+	uint64_t	chm_gpa_page[0];
+} __packed;
+
+#define VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX	28
+
+/* VMBUS_CHANMSG_GPADL_CONNRESP */
+struct vmbus_chanmsg_gpadl_connresp {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+	uint32_t	chm_gpadl;
+	uint32_t	chm_status;
+} __packed;
+
+/* VMBUS_CHANMSG_CHCLOSE */
+struct vmbus_chanmsg_chclose {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+} __packed;
+
+/* VMBUS_CHANMSG_GPADL_DISCONN */
+struct vmbus_chanmsg_gpadl_disconn {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+	uint32_t	chm_gpadl;
+} __packed;
+
+/* VMBUS_CHANMSG_CHFREE */
+struct vmbus_chanmsg_chfree {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+} __packed;
+
+/* VMBUS_CHANMSG_CHRESCIND */
+struct vmbus_chanmsg_chrescind {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	uint32_t	chm_chanid;
+} __packed;
+
+/* VMBUS_CHANMSG_CHOFFER */
+struct vmbus_chanmsg_choffer {
+	struct vmbus_chanmsg_hdr chm_hdr;
+	struct hyperv_guid chm_chtype;
+	struct hyperv_guid chm_chinst;
+	uint64_t	chm_chlat;	/* unit: 100ns */
+	uint32_t	chm_chrev;
+	uint32_t	chm_svrctx_sz;
+	uint16_t	chm_chflags;
+	uint16_t	chm_mmio_sz;	/* unit: MB */
+	uint8_t		chm_udata[120];
+	uint16_t	chm_subidx;
+	uint16_t	chm_rsvd;
+	uint32_t	chm_chanid;
+	uint8_t		chm_montrig;
+	uint8_t		chm_flags1;	/* VMBUS_CHOFFER_FLAG1_ */
+	uint16_t	chm_flags2;
+	uint32_t	chm_connid;
+} __packed;
+
+#define VMBUS_CHOFFER_FLAG1_HASMNF	0x01
+
+#endif	/* _KERNEL */
+
+#endif	/* _HYPERVREG_H_ */
diff --git a/sys/arch/x86/x86/hypervvar.h b/sys/arch/x86/x86/hypervvar.h
new file mode 100644
index 00000000000..b82987253bf
--- /dev/null
+++ b/sys/arch/x86/x86/hypervvar.h
@@ -0,0 +1,121 @@
+/*	$NetBSD$	*/
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/dev/hyperv/include/hyperv.h 326255 2017-11-27 14:52:40Z pfg $
+ */
+
+#ifndef _HYPERVVAR_H_
+#define _HYPERVVAR_H_
+
+#if defined(_KERNEL)
+
+#include <sys/bus.h>
+#include <sys/timex.h>
+
+#define HYPERV_TIMER_NS_FACTOR	100ULL
+#define HYPERV_TIMER_FREQ	(NANOSECOND / HYPERV_TIMER_NS_FACTOR)
+
+#endif	/* _KERNEL */
+
+/*
+ * Hyper-V Reference TSC
+ */
+struct hyperv_reftsc {
+	volatile uint32_t	tsc_seq;
+	volatile uint32_t	tsc_rsvd1;
+	volatile uint64_t	tsc_scale;
+	volatile int64_t	tsc_ofs;
+} __packed __aligned(PAGE_SIZE);
+#ifdef __CTASSERT
+__CTASSERT(sizeof(struct hyperv_reftsc) == PAGE_SIZE);
+#endif
+
+#if defined(_KERNEL)
+
+#define HYPERV_GUID_STRLEN	40
+
+struct hyperv_guid;
+struct trapframe;
+struct sysctlnode;
+
+int	hyperv_guid2str(const struct hyperv_guid *, char *, size_t);
+
+/*
+ * hyperv_tc64 could be NULL, if there were no suitable Hyper-V
+ * specific timecounter.
+ */
+typedef uint64_t (*hyperv_tc64_t)(void);
+extern hyperv_tc64_t hyperv_tc64;
+
+extern u_int hyperv_ver_major;
+extern u_int hyperv_features;		/* CPUID_HV_MSR_ */
+extern u_int hyperv_recommends;
+
+extern const struct sysctlnode *hyperv_sysctl_node;
+
+uint64_t hyperv_hypercall(uint64_t, paddr_t, paddr_t);
+uint64_t hyperv_hypercall_post_message(paddr_t);
+uint64_t hyperv_hypercall_signal_event(paddr_t);
+
+bool	hyperv_init(void);
+bool	hyperv_is_initialized(void);
+void	hyperv_intr(struct trapframe *);
+
+/*
+ * Hyper-V bus_dma utilities.
+ */
+struct hyperv_dma {
+	bus_dmamap_t		map;
+	bus_dma_segment_t	*segs;
+	void			*addr;
+	int			nsegs;
+};
+
+static __inline bus_addr_t
+hyperv_dma_get_paddr(struct hyperv_dma *dma)
+{
+	return dma->map->dm_segs[0].ds_addr;
+}
+
+void *hyperv_dma_alloc(bus_dma_tag_t, struct hyperv_dma *, bus_size_t,
+    bus_size_t, bus_size_t, int);
+void hyperv_dma_free(bus_dma_tag_t, struct hyperv_dma *);
+
+/*
+ * Vector used for Hyper-V Interrupts.
+ */
+extern void Xintr_hyperv_upcall(void);
+extern void Xresume_hyperv_upcall(void);
+extern void Xrecurse_hyperv_upcall(void);
+
+#endif	/* _KERNEL */
+
+#endif	/* _HYPERVVAR_H_ */
diff --git a/sys/arch/x86/x86/intr.c b/sys/arch/x86/x86/intr.c
index 838e6667afa..144ad42e6a1 100644
--- a/sys/arch/x86/x86/intr.c
+++ b/sys/arch/x86/x86/intr.c
@@ -166,6 +166,12 @@ __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.125 2018/04/04 22:52:59 christos Exp $");
 #include "lapic.h"
 #include "pci.h"
 #include "acpica.h"
+#ifndef XEN
+#include "vmbus.h"
+#if NVMBUS > 0
+#include <x86/x86/hypervvar.h>
+#endif
+#endif
 
 #if NIOAPIC > 0 || NACPICA > 0
 #include <machine/i82093var.h>
@@ -1425,6 +1431,9 @@ struct intrhand fake_softbio_intrhand;
 struct intrhand fake_timer_intrhand;
 struct intrhand fake_ipi_intrhand;
 struct intrhand fake_preempt_intrhand;
+#if NVMBUS > 0
+struct intrhand fake_hyperv_intrhand;
+#endif
 
 #if NLAPIC > 0 && defined(MULTIPROCESSOR)
 static const char *x86_ipi_names[X86_NIPI] = X86_IPI_NAMES;
@@ -1488,6 +1497,18 @@ cpu_intr_init(struct cpu_info *ci)
 		evcnt_attach_dynamic(&ci->ci_ipi_events[i], EVCNT_TYPE_MISC,
 		    NULL, device_xname(ci->ci_dev), x86_ipi_names[i]);
 #endif
+
+#if NVMBUS > 0
+	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
+	isp->is_recurse = Xrecurse_hyperv_upcall;
+	isp->is_resume = Xresume_hyperv_upcall;
+	fake_hyperv_intrhand.ih_level = IPL_NET;
+	isp->is_handlers = &fake_hyperv_intrhand;
+	isp->is_pic = &local_pic;
+	ci->ci_isources[LIR_HYPERV] = isp;
+	evcnt_attach_dynamic(&isp->is_evcnt, EVCNT_TYPE_INTR, NULL,
+	    device_xname(ci->ci_dev), "hyperv");
+#endif
 #endif
 
 #if defined(__HAVE_PREEMPTION)
diff --git a/sys/arch/x86/x86/lapic.c b/sys/arch/x86/x86/lapic.c
index 0d32e737b42..54c788ed156 100644
--- a/sys/arch/x86/x86/lapic.c
+++ b/sys/arch/x86/x86/lapic.c
@@ -587,7 +587,6 @@ lapic_initclocks(void)
 	lapic_eoi();
 }
 
-extern unsigned int gettick(void);	/* XXX put in header file */
 extern u_long rtclock_tval; /* XXX put in header file */
 extern void (*initclock_func)(void); /* XXX put in header file */
 
@@ -605,41 +604,58 @@ extern void (*initclock_func)(void); /* XXX put in header file */
 void
 lapic_calibrate_timer(struct cpu_info *ci)
 {
-	unsigned int seen, delta, initial_i8254, initial_lapic;
-	unsigned int cur_i8254, cur_lapic;
-	uint64_t tmp;
+	struct timecounter *tc;
+	timecounter_get_t *tick_func;
+	unsigned int tval, mask, delta, initial_counter, initial_lapic;
+	unsigned int cur_counter, cur_lapic;
+	uint64_t seen, end, tmp, freq;
 	int i;
 	char tbuf[9];
 
-	aprint_debug_dev(ci->ci_dev, "calibrating local timer\n");
+	if (lapic_per_second == 0) {
+		aprint_debug_dev(ci->ci_dev, "calibrating local timer\n");
+
+		tc = timecounter;
+		if (tc->tc_quality <= 0) {
+			tick_func = (timecounter_get_t *)gettick;
+			tval = rtclock_tval;
+			mask = ~0u;
+			freq = TIMER_FREQ;
+		} else {
+			tick_func = tc->tc_get_timecount;
+			tval = mask = tc->tc_counter_mask;
+			freq = tc->tc_frequency;
+		}
+		end = freq / 100;
 
-	/*
-	 * Configure timer to one-shot, interrupt masked,
-	 * large positive number.
-	 */
-	lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_M);
-	lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
-	lapic_writereg(LAPIC_ICR_TIMER, 0x80000000);
+		/*
+		 * Configure timer to one-shot, interrupt masked,
+		 * large positive number.
+		 */
+		lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_M);
+		lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
+		lapic_writereg(LAPIC_ICR_TIMER, 0x80000000);
 
-	x86_disable_intr();
+		x86_disable_intr();
 
-	initial_lapic = lapic_gettick();
-	initial_i8254 = gettick();
+		initial_lapic = lapic_gettick();
+		initial_counter = tick_func(tc) & mask;
 
-	for (seen = 0; seen < TIMER_FREQ / 100; seen += delta) {
-		cur_i8254 = gettick();
-		if (cur_i8254 > initial_i8254)
-			delta = rtclock_tval - (cur_i8254 - initial_i8254);
-		else
-			delta = initial_i8254 - cur_i8254;
-		initial_i8254 = cur_i8254;
-	}
-	cur_lapic = lapic_gettick();
+		for (seen = 0; seen < end; seen += delta) {
+			cur_counter = tick_func(tc) & mask;
+			if (cur_counter > initial_counter)
+				delta = tval - (cur_counter - initial_counter);
+			else
+				delta = initial_counter - cur_counter;
+			initial_counter = cur_counter;
+		}
+		cur_lapic = lapic_gettick();
 
-	x86_enable_intr();
+		x86_enable_intr();
 
-	tmp = initial_lapic - cur_lapic;
-	lapic_per_second = (tmp * TIMER_FREQ + seen / 2) / seen;
+		tmp = initial_lapic - cur_lapic;
+		lapic_per_second = (tmp * freq + seen / 2) / seen;
+	}
 
 	humanize_number(tbuf, sizeof(tbuf), lapic_per_second, "Hz", 1000);
 
@@ -777,7 +793,7 @@ i82489_ipi_init(int target)
 
 	i82489_writereg(LAPIC_ICRLO, LAPIC_DLMODE_INIT | LAPIC_LEVEL_ASSERT);
 	i82489_icr_wait();
-	i8254_delay(10000);
+	x86_delay(10000);
 	i82489_writereg(LAPIC_ICRLO,
 	    LAPIC_DLMODE_INIT | LAPIC_TRIGGER_LEVEL | LAPIC_LEVEL_DEASSERT);
 	i82489_icr_wait();
@@ -849,7 +865,7 @@ x2apic_ipi_init(int target)
 
 	x2apic_write_icr(target, LAPIC_DLMODE_INIT | LAPIC_LEVEL_ASSERT);
 
-	i8254_delay(10000);
+	x86_delay(10000);
 
 	x2apic_write_icr(0,
 	    LAPIC_DLMODE_INIT | LAPIC_TRIGGER_LEVEL | LAPIC_LEVEL_DEASSERT);
diff --git a/sys/arch/x86/x86/vmbus.c b/sys/arch/x86/x86/vmbus.c
new file mode 100644
index 00000000000..d77956f579b
--- /dev/null
+++ b/sys/arch/x86/x86/vmbus.c
@@ -0,0 +1,2172 @@
+/*	$NetBSD$	*/
+/*	$OpenBSD: hyperv.c,v 1.43 2017/06/27 13:56:15 mikeb Exp $	*/
+
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * Copyright (c) 2016 Mike Belopuhov <mike@esdenera.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * The OpenBSD port was done under funding by Esdenera Networks GmbH.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/device.h>
+#include <sys/atomic.h>
+#include <sys/bitops.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/intr.h>
+#include <sys/kmem.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/xcall.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <x86/x86/hypervreg.h>
+#include <x86/x86/hypervvar.h>
+#include <x86/x86/vmbusvar.h>
+
+/* Command submission flags */
+#define HCF_SLEEPOK	0x0000
+#define HCF_NOSLEEP	0x0002	/* M_NOWAIT */
+#define HCF_NOREPLY	0x0004
+
+static void	vmbus_doattach(device_t);
+static int	vmbus_alloc_dma(struct vmbus_softc *);
+static void	vmbus_free_dma(struct vmbus_softc *);
+static int	vmbus_init_interrupts(struct vmbus_softc *);
+static void	vmbus_deinit_interrupts(struct vmbus_softc *);
+static void	vmbus_init_synic(void *, void *);
+static void	vmbus_deinit_synic(void *, void *);
+
+static int	vmbus_connect(struct vmbus_softc *);
+static int	vmbus_cmd(struct vmbus_softc *, void *, size_t, void *, size_t,
+		    int);
+static int	vmbus_start(struct vmbus_softc *, struct vmbus_msg *, paddr_t);
+static int	vmbus_reply(struct vmbus_softc *, struct vmbus_msg *);
+static void	vmbus_wait(struct vmbus_softc *,
+		    int (*done)(struct vmbus_softc *, struct vmbus_msg *),
+		    struct vmbus_msg *, void *, const char *);
+static uint16_t vmbus_intr_signal(struct vmbus_softc *, paddr_t);
+static void	vmbus_event_proc(struct vmbus_softc *, int);
+static void	vmbus_event_proc_compat(struct vmbus_softc *, int);
+static void	vmbus_event_proc_dummy(struct vmbus_softc *, int);
+static void	vmbus_message_softintr(void *);
+static void	vmbus_channel_response(struct vmbus_softc *,
+		    struct vmbus_chanmsg_hdr *);
+static void	vmbus_channel_offer(struct vmbus_softc *,
+		    struct vmbus_chanmsg_hdr *);
+static void	vmbus_channel_rescind(struct vmbus_softc *,
+		    struct vmbus_chanmsg_hdr *);
+static void	vmbus_channel_delivered(struct vmbus_softc *,
+		    struct vmbus_chanmsg_hdr *);
+static int	vmbus_channel_scan(struct vmbus_softc *);
+static void	vmbus_channel_cpu_default(struct vmbus_channel *);
+static void	vmbus_process_offer(struct vmbus_softc *, struct vmbus_offer *);
+static struct vmbus_channel *
+		vmbus_channel_lookup(struct vmbus_softc *, uint32_t);
+static int	vmbus_channel_ring_create(struct vmbus_channel *, uint32_t);
+static void	vmbus_channel_ring_destroy(struct vmbus_channel *);
+static void	vmbus_channel_pause(struct vmbus_channel *);
+static uint32_t	vmbus_channel_unpause(struct vmbus_channel *);
+static uint32_t	vmbus_channel_ready(struct vmbus_channel *);
+static int	vmbus_attach_icdevs(struct vmbus_softc *);
+static int	vmbus_attach_devices(struct vmbus_softc *);
+
+static struct vmbus_softc *vmbus_sc;
+
+static const struct {
+	int	hmd_response;
+	int	hmd_request;
+	void	(*hmd_handler)(struct vmbus_softc *,
+		    struct vmbus_chanmsg_hdr *);
+} vmbus_msg_dispatch[] = {
+	{ 0,					0, NULL },
+	{ VMBUS_CHANMSG_CHOFFER,		0, vmbus_channel_offer },
+	{ VMBUS_CHANMSG_CHRESCIND,		0, vmbus_channel_rescind },
+	{ VMBUS_CHANMSG_CHREQUEST,		VMBUS_CHANMSG_CHOFFER, NULL },
+	{ VMBUS_CHANMSG_CHOFFER_DONE,		0, vmbus_channel_delivered },
+	{ VMBUS_CHANMSG_CHOPEN,			0, NULL },
+	{ VMBUS_CHANMSG_CHOPEN_RESP,		VMBUS_CHANMSG_CHOPEN,
+	  vmbus_channel_response },
+	{ VMBUS_CHANMSG_CHCLOSE,		0, NULL },
+	{ VMBUS_CHANMSG_GPADL_CONN,		0, NULL },
+	{ VMBUS_CHANMSG_GPADL_SUBCONN,		0, NULL },
+	{ VMBUS_CHANMSG_GPADL_CONNRESP,		VMBUS_CHANMSG_GPADL_CONN,
+	  vmbus_channel_response },
+	{ VMBUS_CHANMSG_GPADL_DISCONN,		0, NULL },
+	{ VMBUS_CHANMSG_GPADL_DISCONNRESP,	VMBUS_CHANMSG_GPADL_DISCONN,
+	  vmbus_channel_response },
+	{ VMBUS_CHANMSG_CHFREE,			0, NULL },
+	{ VMBUS_CHANMSG_CONNECT,		0, NULL },
+	{ VMBUS_CHANMSG_CONNECT_RESP,		VMBUS_CHANMSG_CONNECT,
+	  vmbus_channel_response },
+	{ VMBUS_CHANMSG_DISCONNECT,		0, NULL },
+};
+
+const struct hyperv_guid hyperv_guid_network = {
+	{ 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46,
+	  0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e }
+};
+
+const struct hyperv_guid hyperv_guid_ide = {
+	{ 0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
+	  0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5 }
+};
+
+const struct hyperv_guid hyperv_guid_scsi = {
+	{ 0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
+	  0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f }
+};
+
+const struct hyperv_guid hyperv_guid_shutdown = {
+	{ 0x31, 0x60, 0x0b, 0x0e, 0x13, 0x52, 0x34, 0x49,
+	  0x81, 0x8b, 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb }
+};
+
+const struct hyperv_guid hyperv_guid_timesync = {
+	{ 0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
+	  0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf }
+};
+
+const struct hyperv_guid hyperv_guid_heartbeat = {
+	{ 0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
+	  0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d }
+};
+
+const struct hyperv_guid hyperv_guid_kvp = {
+	{ 0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
+	  0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x03, 0xe6 }
+};
+
+const struct hyperv_guid hyperv_guid_vss = {
+	{ 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42,
+	  0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40 }
+};
+
+const struct hyperv_guid hyperv_guid_dynmem = {
+	{ 0xdc, 0x74, 0x50, 0x52, 0x85, 0x89, 0xe2, 0x46,
+	  0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02 }
+};
+
+const struct hyperv_guid hyperv_guid_mouse = {
+	{ 0x9e, 0xb6, 0xa8, 0xcf, 0x4a, 0x5b, 0xc0, 0x4c,
+	  0xb9, 0x8b, 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a }
+};
+
+const struct hyperv_guid hyperv_guid_kbd = {
+	{ 0x6d, 0xad, 0x12, 0xf9, 0x17, 0x2b, 0xea, 0x48,
+	  0xbd, 0x65, 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84 }
+};
+
+const struct hyperv_guid hyperv_guid_video = {
+	{ 0x02, 0x78, 0x0a, 0xda, 0x77, 0xe3, 0xac, 0x4a,
+	  0x8e, 0x77, 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8 }
+};
+
+const struct hyperv_guid hyperv_guid_fc = {
+	{ 0x4a, 0xcc, 0x9b, 0x2f, 0x69, 0x00, 0xf3, 0x4a,
+	  0xb7, 0x6b, 0x6f, 0xd0, 0xbe, 0x52, 0x8c, 0xda }
+};
+
+const struct hyperv_guid hyperv_guid_fcopy = {
+	{ 0xe3, 0x4b, 0xd1, 0x34, 0xe4, 0xde, 0xc8, 0x41,
+	  0x9a, 0xe7, 0x6b, 0x17, 0x49, 0x77, 0xc1, 0x92 }
+};
+
+const struct hyperv_guid hyperv_guid_pcie = {
+	{ 0x1d, 0xf6, 0xc4, 0x44, 0x44, 0x44, 0x00, 0x44,
+	  0x9d, 0x52, 0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f }
+};
+
+const struct hyperv_guid hyperv_guid_netdir = {
+	{ 0x3d, 0xaf, 0x2e, 0x8c, 0xa7, 0x32, 0x09, 0x4b,
+	  0xab, 0x99, 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01 }
+};
+
+const struct hyperv_guid hyperv_guid_rdesktop = {
+	{ 0xf4, 0xac, 0x6a, 0x27, 0x15, 0xac, 0x6c, 0x42,
+	  0x98, 0xdd, 0x75, 0x21, 0xad, 0x3f, 0x01, 0xfe }
+};
+
+/* Automatic Virtual Machine Activation (AVMA) Services */
+const struct hyperv_guid hyperv_guid_avma1 = {
+	{ 0x55, 0xb2, 0x87, 0x44, 0x8c, 0xb8, 0x3f, 0x40,
+	  0xbb, 0x51, 0xd1, 0xf6, 0x9c, 0xf1, 0x7f, 0x87 }
+};
+
+const struct hyperv_guid hyperv_guid_avma2 = {
+	{ 0xf4, 0xba, 0x75, 0x33, 0x15, 0x9e, 0x30, 0x4b,
+	  0xb7, 0x65, 0x67, 0xac, 0xb1, 0x0d, 0x60, 0x7b }
+};
+
+const struct hyperv_guid hyperv_guid_avma3 = {
+	{ 0xa0, 0x1f, 0x22, 0x99, 0xad, 0x24, 0xe2, 0x11,
+	  0xbe, 0x98, 0x00, 0x1a, 0xa0, 0x1b, 0xbf, 0x6e }
+};
+
+const struct hyperv_guid hyperv_guid_avma4 = {
+	{ 0x16, 0x57, 0xe6, 0xf8, 0xb3, 0x3c, 0x06, 0x4a,
+	  0x9a, 0x60, 0x18, 0x89, 0xc5, 0xcc, 0xca, 0xb5 }
+};
+
+int
+vmbus_match(device_t parent, cfdata_t cf, void *aux)
+{
+
+	if (cf->cf_unit != 0 ||
+	    vm_guest != VM_GUEST_HV ||
+	    !(hyperv_features & CPUID_HV_MSR_SYNIC))
+		return 0;
+
+	return 1;
+}
+
+int
+vmbus_attach(struct vmbus_softc *sc)
+{
+
+	aprint_naive("\n");
+	aprint_normal(": Hyper-V VMBus\n");
+
+	vmbus_sc = sc;
+
+	sc->sc_msgpool = pool_cache_init(sizeof(struct vmbus_msg), 8, 0, 0,
+	    "hvmsg", NULL, IPL_NET, NULL, NULL, NULL);
+	sc->sc_event_proc = vmbus_event_proc_dummy;
+
+	config_interrupts(sc->sc_dev, vmbus_doattach);
+
+	return 0;
+}
+
+static void
+vmbus_doattach(device_t self)
+{
+	struct vmbus_softc *sc = device_private(self);
+
+	if (vmbus_alloc_dma(sc))
+		goto cleanup;
+
+	if (vmbus_init_interrupts(sc))
+		goto cleanup;
+
+	if (vmbus_connect(sc))
+		goto cleanup;
+
+	aprint_normal_dev(sc->sc_dev, "protocol %d.%d\n",
+	    VMBUS_VERSION_MAJOR(sc->sc_proto),
+	    VMBUS_VERSION_MINOR(sc->sc_proto));
+
+	if (sc->sc_proto == VMBUS_VERSION_WS2008 ||
+	    sc->sc_proto == VMBUS_VERSION_WIN7) {
+		sc->sc_event_proc = vmbus_event_proc_compat;
+		sc->sc_channel_max = VMBUS_CHAN_MAX_COMPAT;
+	} else {
+		sc->sc_event_proc = vmbus_event_proc;
+		sc->sc_channel_max = VMBUS_CHAN_MAX;
+	}
+
+	if (vmbus_channel_scan(sc))
+		goto cleanup;
+
+	/* Attach heartbeat, KVP and other "internal" services */
+	vmbus_attach_icdevs(sc);
+
+	/* Attach devices with external drivers */
+	vmbus_attach_devices(sc);
+
+	return;
+
+cleanup:
+	vmbus_deinit_interrupts(sc);
+	vmbus_free_dma(sc);
+}
+
+int
+vmbus_detach(struct vmbus_softc *sc, int flags)
+{
+
+	vmbus_deinit_interrupts(sc);
+	vmbus_free_dma(sc);
+
+	return 0;
+}
+
+void
+hyperv_intr(struct trapframe *frame)
+{
+	struct vmbus_softc *sc = vmbus_sc;
+	struct vmbus_message *msg;
+	int cpu;
+
+	kpreempt_disable();
+
+	cpu = cpu_index(curcpu());
+
+	sc->sc_event_proc(sc, cpu);
+
+	msg = (struct vmbus_message *)sc->sc_percpu[cpu].simp +
+	    VMBUS_SINT_MESSAGE;
+	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE))
+		softint_schedule(sc->sc_msg_sih);
+
+	kpreempt_enable();
+}
+
+static int
+vmbus_alloc_dma(struct vmbus_softc *sc)
+{
+	CPU_INFO_ITERATOR cii;
+	struct cpu_info *ci;
+	uint8_t *events;
+	int i;
+
+	/*
+	 * Per-CPU messages and event flags.
+	 */
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		struct vmbus_percpu_data *pd = &sc->sc_percpu[cpu_index(ci)];
+		void *ptr;
+
+		ptr = hyperv_dma_alloc(sc->sc_dmat, &pd->simp_dma,
+		    PAGE_SIZE, PAGE_SIZE, 0, 1);
+		if (ptr == NULL)
+			return ENOMEM;
+		pd->simp = ptr;
+
+		ptr = hyperv_dma_alloc(sc->sc_dmat, &pd->siep_dma,
+		    PAGE_SIZE, PAGE_SIZE, 0, 1);
+		if (ptr == NULL)
+			return ENOMEM;
+		pd->siep = ptr;
+	}
+
+	events = hyperv_dma_alloc(sc->sc_dmat, &sc->sc_events_dma,
+	    PAGE_SIZE, PAGE_SIZE, 0, 1);
+	if (events == NULL)
+		return ENOMEM;
+	sc->sc_wevents = (u_long *)events;
+	sc->sc_revents = (u_long *)(events + (PAGE_SIZE / 2));
+	sc->sc_events = events;
+
+	for (i = 0; i < __arraycount(sc->sc_monitor); i++) {
+		sc->sc_monitor[i] = hyperv_dma_alloc(sc->sc_dmat,
+		    &sc->sc_monitor_dma[i], PAGE_SIZE, PAGE_SIZE, 0, 1);
+		if (sc->sc_monitor[i] == NULL)
+			return ENOMEM;
+	}
+
+	return 0;
+}
+
+static void
+vmbus_free_dma(struct vmbus_softc *sc)
+{
+	CPU_INFO_ITERATOR cii;
+	struct cpu_info *ci;
+	int i;
+
+	if (sc->sc_events != NULL) {
+		sc->sc_events = sc->sc_wevents = sc->sc_revents = NULL;
+		hyperv_dma_free(sc->sc_dmat, &sc->sc_events_dma);
+	}
+
+	for (i = 0; i < __arraycount(sc->sc_monitor); i++) {
+		sc->sc_monitor[i] = NULL;
+		hyperv_dma_free(sc->sc_dmat, &sc->sc_monitor_dma[i]);
+	}
+
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		struct vmbus_percpu_data *pd = &sc->sc_percpu[cpu_index(ci)];
+
+		if (pd->simp != NULL) {
+			pd->simp = NULL;
+			hyperv_dma_free(sc->sc_dmat, &pd->simp_dma);
+		}
+		if (pd->siep != NULL) {
+			pd->siep = NULL;
+			hyperv_dma_free(sc->sc_dmat, &pd->siep_dma);
+		}
+	}
+}
+
+static int
+vmbus_init_interrupts(struct vmbus_softc *sc)
+{
+
+	TAILQ_INIT(&sc->sc_reqs);
+	mutex_init(&sc->sc_req_lock, MUTEX_DEFAULT, IPL_NET);
+
+	TAILQ_INIT(&sc->sc_rsps);
+	mutex_init(&sc->sc_rsp_lock, MUTEX_DEFAULT, IPL_NET);
+
+	sc->sc_proto = VMBUS_VERSION_WS2008;
+
+	/* XXX event_tq */
+
+	sc->sc_msg_sih = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
+	    vmbus_message_softintr, sc);
+	if (sc->sc_msg_sih == NULL)
+		return -1;
+
+	/*
+	 * All Hyper-V ISR required resources are setup, now let's find a
+	 * free IDT vector for Hyper-V ISR and set it up.
+	 */
+	mutex_enter(&cpu_lock);
+	sc->sc_idtvec = idt_vec_alloc(APIC_LEVEL(NIPL), IDT_INTR_HIGH);
+	mutex_exit(&cpu_lock);
+	KASSERT(sc->sc_idtvec > 0);
+	idt_vec_set(sc->sc_idtvec, Xintr_hyperv_upcall);
+
+	xc_wait(xc_broadcast(0, vmbus_init_synic, sc, NULL));
+	atomic_or_32(&sc->sc_flags, VMBUS_SCFLAG_SYNIC);
+
+	return 0;
+}
+
+static void
+vmbus_deinit_interrupts(struct vmbus_softc *sc)
+{
+
+	if (ISSET(sc->sc_flags, VMBUS_SCFLAG_SYNIC)) {
+		xc_wait(xc_broadcast(0, vmbus_deinit_synic, sc, NULL));
+		atomic_and_32(&sc->sc_flags, (uint32_t)~VMBUS_SCFLAG_SYNIC);
+	}
+
+	/* XXX event_tq */
+
+	if (sc->sc_msg_sih != NULL) {
+		softint_disestablish(sc->sc_msg_sih);
+		sc->sc_msg_sih = NULL;
+	}
+
+	if (sc->sc_idtvec > 0) {
+		idt_vec_free(sc->sc_idtvec);
+		sc->sc_idtvec = 0;
+	}
+}
+
+static void
+vmbus_init_synic(void *arg1, void *arg2)
+{
+	struct vmbus_softc *sc = arg1;
+	struct vmbus_percpu_data *pd;
+	uint64_t val, orig;
+	uint32_t sint;
+
+	kpreempt_disable();
+
+	pd = &sc->sc_percpu[cpu_index(curcpu())];
+
+	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
+		/* Save virtual processor id. */
+		pd->vcpuid = rdmsr(MSR_HV_VP_INDEX);
+	} else {
+		/* Set virtual processor id to 0 for compatibility. */
+		pd->vcpuid = 0;
+	}
+
+	/*
+	 * Setup the SynIC message.
+	 */
+	orig = rdmsr(MSR_HV_SIMP);
+	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
+	    ((hyperv_dma_get_paddr(&pd->simp_dma) >> PAGE_SHIFT) <<
+	     MSR_HV_SIMP_PGSHIFT);
+	wrmsr(MSR_HV_SIMP, val);
+
+	/*
+	 * Setup the SynIC event flags.
+	 */
+	orig = rdmsr(MSR_HV_SIEFP);
+	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
+	    ((hyperv_dma_get_paddr(&pd->siep_dma) >> PAGE_SHIFT) <<
+	     MSR_HV_SIEFP_PGSHIFT);
+	wrmsr(MSR_HV_SIEFP, val);
+
+	/*
+	 * Configure and unmask SINT for message and event flags.
+	 */
+	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
+	orig = rdmsr(sint);
+	val = sc->sc_idtvec | MSR_HV_SINT_AUTOEOI |
+	    (orig & MSR_HV_SINT_RSVD_MASK);
+	wrmsr(sint, val);
+
+	/*
+	 * Configure and unmask SINT for timer.
+	 */
+	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
+	orig = rdmsr(sint);
+	val = sc->sc_idtvec | MSR_HV_SINT_AUTOEOI |
+	    (orig & MSR_HV_SINT_RSVD_MASK);
+	wrmsr(sint, val);
+
+	/*
+	 * All done; enable SynIC.
+	 */
+	orig = rdmsr(MSR_HV_SCONTROL);
+	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
+	wrmsr(MSR_HV_SCONTROL, val);
+
+	kpreempt_enable();
+}
+
+static void
+vmbus_deinit_synic(void *arg1, void *arg2)
+{
+	uint64_t orig;
+	uint32_t sint;
+
+	kpreempt_disable();
+
+	/*
+	 * Disable SynIC.
+	 */
+	orig = rdmsr(MSR_HV_SCONTROL);
+	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
+
+	/*
+	 * Mask message and event flags SINT.
+	 */
+	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
+	orig = rdmsr(sint);
+	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
+
+	/*
+	 * Mask timer SINT.
+	 */
+	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
+	orig = rdmsr(sint);
+	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
+
+	/*
+	 * Teardown SynIC message.
+	 */
+	orig = rdmsr(MSR_HV_SIMP);
+	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
+
+	/*
+	 * Teardown SynIC event flags.
+	 */
+	orig = rdmsr(MSR_HV_SIEFP);
+	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
+
+	kpreempt_enable();
+}
+
+static int
+vmbus_connect(struct vmbus_softc *sc)
+{
+	static const uint32_t versions[] = {
+		VMBUS_VERSION_WIN8_1,
+		VMBUS_VERSION_WIN8,
+		VMBUS_VERSION_WIN7,
+		VMBUS_VERSION_WS2008
+	};
+	struct vmbus_chanmsg_connect cmd;
+	struct vmbus_chanmsg_connect_resp rsp;
+	int i, rv;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.chm_hdr.chm_type = VMBUS_CHANMSG_CONNECT;
+	cmd.chm_evtflags = hyperv_dma_get_paddr(&sc->sc_events_dma);
+	cmd.chm_mnf1 = hyperv_dma_get_paddr(&sc->sc_monitor_dma[0]);
+	cmd.chm_mnf2 = hyperv_dma_get_paddr(&sc->sc_monitor_dma[1]);
+
+	memset(&rsp, 0, sizeof(rsp));
+
+	for (i = 0; i < __arraycount(versions); i++) {
+		cmd.chm_ver = versions[i];
+		rv = vmbus_cmd(sc, &cmd, sizeof(cmd), &rsp, sizeof(rsp), 0);
+		if (rv) {
+			DPRINTF("%s: CONNECT failed\n",
+			    device_xname(sc->sc_dev));
+			return rv;
+		}
+		if (rsp.chm_done) {
+			atomic_or_32(&sc->sc_flags, VMBUS_SCFLAG_CONNECTED);
+			sc->sc_proto = versions[i];
+			sc->sc_handle = VMBUS_GPADL_START;
+			break;
+		}
+	}
+	if (i == __arraycount(versions)) {
+		aprint_error_dev(sc->sc_dev,
+		    "failed to negotiate protocol version\n");
+		return ENXIO;
+	}
+
+	return 0;
+}
+
+static int
+vmbus_cmd(struct vmbus_softc *sc, void *cmd, size_t cmdlen, void *rsp,
+    size_t rsplen, int flags)
+{
+	struct vmbus_msg *msg;
+	paddr_t pa;
+	int rv;
+
+	if (cmdlen > VMBUS_MSG_DSIZE_MAX) {
+		aprint_error_dev(sc->sc_dev, "payload too large (%lu)\n",
+		    cmdlen);
+		return EMSGSIZE;
+	}
+
+	msg = pool_cache_get_paddr(sc->sc_msgpool, PR_WAITOK, &pa);
+	memset(msg, 0, sizeof(*msg));
+	msg->msg_req.hc_dsize = cmdlen;
+	memcpy(msg->msg_req.hc_data, cmd, cmdlen);
+
+	if (!(flags & HCF_NOREPLY)) {
+		msg->msg_rsp = rsp;
+		msg->msg_rsplen = rsplen;
+	} else
+		msg->msg_flags |= MSGF_NOQUEUE;
+
+	if (flags & HCF_NOSLEEP)
+		msg->msg_flags |= MSGF_NOSLEEP;
+
+	rv = vmbus_start(sc, msg, pa);
+	if (rv == 0)
+		rv = vmbus_reply(sc, msg);
+	pool_cache_put_paddr(sc->sc_msgpool, msg, pa);
+	return rv;
+}
+
+static int
+vmbus_start(struct vmbus_softc *sc, struct vmbus_msg *msg, paddr_t msg_pa)
+{
+	static const int delays[] = {
+		100, 100, 100, 500, 500, 5000, 5000, 5000
+	};
+	const char *wchan = "hvstart";
+	uint16_t status;
+	int i, s;
+
+	msg->msg_req.hc_connid = VMBUS_CONNID_MESSAGE;
+	msg->msg_req.hc_msgtype = 1;
+
+	if (!(msg->msg_flags & MSGF_NOQUEUE)) {
+		mutex_enter(&sc->sc_req_lock);
+		TAILQ_INSERT_TAIL(&sc->sc_reqs, msg, msg_entry);
+		mutex_exit(&sc->sc_req_lock);
+	}
+
+	for (i = 0; i < __arraycount(delays); i++) {
+		status = hyperv_hypercall_post_message(
+		    msg_pa + offsetof(struct vmbus_msg, msg_req));
+		if (status == HYPERCALL_STATUS_SUCCESS)
+			break;
+
+		if (msg->msg_flags & MSGF_NOSLEEP) {
+			delay(delays[i]);
+			s = splnet();
+			hyperv_intr(NULL);
+			splx(s);
+		} else
+			tsleep(wchan, PRIBIO, wchan, 1);
+	}
+	if (status != HYPERCALL_STATUS_SUCCESS) {
+		aprint_error_dev(sc->sc_dev,
+		    "posting vmbus message failed with %d\n", status);
+		if (!(msg->msg_flags & MSGF_NOQUEUE)) {
+			mutex_enter(&sc->sc_req_lock);
+			TAILQ_REMOVE(&sc->sc_reqs, msg, msg_entry);
+			mutex_exit(&sc->sc_req_lock);
+		}
+		return EIO;
+	}
+
+	return 0;
+}
+
+static int
+vmbus_reply_done(struct vmbus_softc *sc, struct vmbus_msg *msg)
+{
+	struct vmbus_msg *m;
+
+	mutex_enter(&sc->sc_rsp_lock);
+	TAILQ_FOREACH(m, &sc->sc_rsps, msg_entry) {
+		if (m == msg) {
+			mutex_exit(&sc->sc_rsp_lock);
+			return 1;
+		}
+	}
+	mutex_exit(&sc->sc_rsp_lock);
+	return 0;
+}
+
+static int
+vmbus_reply(struct vmbus_softc *sc, struct vmbus_msg *msg)
+{
+
+	if (msg->msg_flags & MSGF_NOQUEUE)
+		return 0;
+
+	vmbus_wait(sc, vmbus_reply_done, msg, msg, "hvreply");
+
+	mutex_enter(&sc->sc_rsp_lock);
+	TAILQ_REMOVE(&sc->sc_rsps, msg, msg_entry);
+	mutex_exit(&sc->sc_rsp_lock);
+
+	return 0;
+}
+
+static void
+vmbus_wait(struct vmbus_softc *sc,
+    int (*cond)(struct vmbus_softc *, struct vmbus_msg *),
+    struct vmbus_msg *msg, void *wchan, const char *wmsg)
+{
+	int s;
+
+	while (!cond(sc, msg)) {
+		if (msg->msg_flags & MSGF_NOSLEEP) {
+			delay(1000);
+			s = splnet();
+			hyperv_intr(NULL);
+			splx(s);
+		} else
+			tsleep(wchan, PRIBIO, wmsg ? wmsg : "hvwait", 1);
+	}
+}
+
+static uint16_t
+vmbus_intr_signal(struct vmbus_softc *sc, paddr_t con_pa)
+{
+	uint64_t status;
+
+	status = hyperv_hypercall_signal_event(con_pa);
+	return (uint16_t)status;
+}
+
+#if LONG_BIT == 64
+#define ffsl(v)	ffs64(v)
+#elif LONG_BIT == 32
+#define ffsl(v)	ffs32(v)
+#else
+#error	unknown LONG_BIT
+#endif	/* LONG_BIT */
+
+static void
+vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *revents,
+    int maxrow)
+{
+	struct vmbus_channel *ch;
+	u_long pending;
+	uint32_t chanid, chanid_base;
+	int row, chanid_ofs;
+
+	for (row = 0; row < maxrow; row++) {
+		if (revents[row] == 0)
+			continue;
+
+		pending = atomic_swap_ulong(&revents[row], 0);
+		chanid_base = row * LONG_BIT;
+
+		while ((chanid_ofs = ffsl(pending)) != 0) {
+			chanid_ofs--;	/* NOTE: ffs is 1-based */
+			pending &= ~(1UL << chanid_ofs);
+
+			chanid = chanid_base + chanid_ofs;
+			/* vmbus channel protocol message */
+			if (chanid == 0)
+				continue;
+
+			ch = vmbus_channel_lookup(sc, chanid);
+			if (ch == NULL) {
+				aprint_error_dev(sc->sc_dev,
+				    "unhandled event on %d\n", chanid);
+				continue;
+			}
+			if (ch->ch_state != VMBUS_CHANSTATE_OPENED) {
+				aprint_error_dev(sc->sc_dev,
+				    "channel %d is not active\n", chanid);
+				continue;
+			}
+			ch->ch_evcnt.ev_count++;
+			vmbus_channel_schedule(ch);
+		}
+	}
+}
+
+static void
+vmbus_event_proc(struct vmbus_softc *sc, int cpu)
+{
+	struct vmbus_evtflags *evt;
+
+	/*
+	 * On Host with Win8 or above, the event page can be
+	 * checked directly to get the id of the channel
+	 * that has the pending interrupt.
+	 */
+	evt = (struct vmbus_evtflags *)sc->sc_percpu[cpu].siep +
+	    VMBUS_SINT_MESSAGE;
+
+	vmbus_event_flags_proc(sc, evt->evt_flags,
+	    __arraycount(evt->evt_flags));
+}
+
+static void
+vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
+{
+	struct vmbus_evtflags *evt;
+
+	evt = (struct vmbus_evtflags *)sc->sc_percpu[cpu].siep +
+	    VMBUS_SINT_MESSAGE;
+
+	if (test_bit(0, &evt->evt_flags[0])) {
+		clear_bit(0, &evt->evt_flags[0]);
+		/*
+		 * receive size is 1/2 page and divide that by 4 bytes
+		 */
+		vmbus_event_flags_proc(sc, sc->sc_revents,
+		    VMBUS_CHAN_MAX_COMPAT / VMBUS_EVTFLAG_LEN);
+	}
+}
+
+static void
+vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
+{
+}
+
+static void
+vmbus_message_softintr(void *arg)
+{
+	struct vmbus_softc *sc = arg;
+	struct vmbus_message *msg;
+	struct vmbus_chanmsg_hdr *hdr;
+	uint32_t type;
+	int cpu = cpu_index(curcpu());
+
+	for (;;) {
+		msg = (struct vmbus_message *)sc->sc_percpu[cpu].simp +
+		    VMBUS_SINT_MESSAGE;
+		if (msg->msg_type == HYPERV_MSGTYPE_NONE)
+			break;
+
+		hdr = (struct vmbus_chanmsg_hdr *)msg->msg_data;
+		type = hdr->chm_type;
+		if (type >= VMBUS_CHANMSG_COUNT) {
+			aprint_error_dev(sc->sc_dev,
+			    "unhandled message type %u flags %#x\n", type,
+			    msg->msg_flags);
+		} else {
+			if (vmbus_msg_dispatch[type].hmd_handler) {
+				vmbus_msg_dispatch[type].hmd_handler(sc, hdr);
+			} else {
+				aprint_error_dev(sc->sc_dev,
+				    "unhandled message type %u\n", type);
+			}
+		}
+
+		msg->msg_type = HYPERV_MSGTYPE_NONE;
+		membar_sync();
+		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING)
+			wrmsr(MSR_HV_EOM, 0);
+	}
+}
+
+static void
+vmbus_channel_response(struct vmbus_softc *sc, struct vmbus_chanmsg_hdr *rsphdr)
+{
+	struct vmbus_msg *msg;
+	struct vmbus_chanmsg_hdr *reqhdr;
+	int req;
+
+	req = vmbus_msg_dispatch[rsphdr->chm_type].hmd_request;
+	mutex_enter(&sc->sc_req_lock);
+	TAILQ_FOREACH(msg, &sc->sc_reqs, msg_entry) {
+		reqhdr = (struct vmbus_chanmsg_hdr *)&msg->msg_req.hc_data;
+		if (reqhdr->chm_type == req) {
+			TAILQ_REMOVE(&sc->sc_reqs, msg, msg_entry);
+			break;
+		}
+	}
+	mutex_exit(&sc->sc_req_lock);
+	if (msg != NULL) {
+		memcpy(msg->msg_rsp, rsphdr, msg->msg_rsplen);
+		mutex_enter(&sc->sc_rsp_lock);
+		TAILQ_INSERT_TAIL(&sc->sc_rsps, msg, msg_entry);
+		mutex_exit(&sc->sc_rsp_lock);
+		wakeup(msg);
+	}
+}
+
+static void
+vmbus_channel_offer(struct vmbus_softc *sc, struct vmbus_chanmsg_hdr *hdr)
+{
+	struct vmbus_offer *co;
+
+	co = kmem_intr_zalloc(sizeof(*co), KM_NOSLEEP);
+	if (co == NULL)
+		panic("%s: couldn't allocate offer", __func__);
+
+	memcpy(&co->co_chan, hdr, sizeof(co->co_chan));
+
+	mutex_enter(&sc->sc_offer_lock);
+	SIMPLEQ_INSERT_TAIL(&sc->sc_offers, co, co_entry);
+	mutex_exit(&sc->sc_offer_lock);
+}
+
+static void
+vmbus_channel_rescind(struct vmbus_softc *sc, struct vmbus_chanmsg_hdr *hdr)
+{
+	const struct vmbus_chanmsg_chrescind *cmd;
+
+	cmd = (const struct vmbus_chanmsg_chrescind *)hdr;
+	aprint_normal_dev(sc->sc_dev, "revoking channel %u\n", cmd->chm_chanid);
+}
+
+static void
+vmbus_channel_delivered(struct vmbus_softc *sc, struct vmbus_chanmsg_hdr *hdr)
+{
+
+	atomic_or_32(&sc->sc_flags, VMBUS_SCFLAG_OFFERS_DELIVERED);
+	wakeup(&sc->sc_offers);
+}
+
+static void
+hyperv_guid_sprint(struct hyperv_guid *guid, char *str, size_t size)
+{
+	static const struct {
+		const struct hyperv_guid *guid;
+		const char *ident;
+	} map[] = {
+		{ &hyperv_guid_network,		"network" },
+		{ &hyperv_guid_ide,		"ide" },
+		{ &hyperv_guid_scsi,		"scsi" },
+		{ &hyperv_guid_shutdown,	"shutdown" },
+		{ &hyperv_guid_timesync,	"timesync" },
+		{ &hyperv_guid_heartbeat,	"heartbeat" },
+		{ &hyperv_guid_kvp,		"kvp" },
+		{ &hyperv_guid_vss,		"vss" },
+		{ &hyperv_guid_dynmem,		"dynamic-memory" },
+		{ &hyperv_guid_mouse,		"mouse" },
+		{ &hyperv_guid_kbd,		"keyboard" },
+		{ &hyperv_guid_video,		"video" },
+		{ &hyperv_guid_fc,		"fiber-channel" },
+		{ &hyperv_guid_fcopy,		"file-copy" },
+		{ &hyperv_guid_pcie,		"pcie-passthrough" },
+		{ &hyperv_guid_netdir,		"network-direct" },
+		{ &hyperv_guid_rdesktop,	"remote-desktop" },
+		{ &hyperv_guid_avma1,		"avma-1" },
+		{ &hyperv_guid_avma2,		"avma-2" },
+		{ &hyperv_guid_avma3,		"avma-3" },
+		{ &hyperv_guid_avma4,		"avma-4" },
+	};
+	int i;
+
+	for (i = 0; i < __arraycount(map); i++) {
+		if (memcmp(guid, map[i].guid, sizeof(*guid)) == 0) {
+			strlcpy(str, map[i].ident, size);
+			return;
+		}
+	}
+	hyperv_guid2str(guid, str, size);
+}
+
+static int
+vmbus_channel_scan_done(struct vmbus_softc *sc, struct vmbus_msg *msg __unused)
+{
+
+	return ISSET(sc->sc_flags, VMBUS_SCFLAG_OFFERS_DELIVERED);
+}
+
+static int
+vmbus_channel_scan(struct vmbus_softc *sc)
+{
+	struct vmbus_chanmsg_hdr hdr;
+	struct vmbus_chanmsg_choffer rsp;
+	struct vmbus_offer *co;
+
+	SIMPLEQ_INIT(&sc->sc_offers);
+	mutex_init(&sc->sc_offer_lock, MUTEX_DEFAULT, IPL_NET);
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.chm_type = VMBUS_CHANMSG_CHREQUEST;
+
+	if (vmbus_cmd(sc, &hdr, sizeof(hdr), &rsp, sizeof(rsp), HCF_NOREPLY)) {
+		DPRINTF("%s: CHREQUEST failed\n", device_xname(sc->sc_dev));
+		return -1;
+	}
+
+	vmbus_wait(sc, vmbus_channel_scan_done, (struct vmbus_msg *)&hdr,
+	    &sc->sc_offers, "hvscan");
+
+	TAILQ_INIT(&sc->sc_channels);
+	mutex_init(&sc->sc_channel_lock, MUTEX_DEFAULT, IPL_NET);
+
+	mutex_enter(&sc->sc_offer_lock);
+	while (!SIMPLEQ_EMPTY(&sc->sc_offers)) {
+		co = SIMPLEQ_FIRST(&sc->sc_offers);
+		SIMPLEQ_REMOVE_HEAD(&sc->sc_offers, co_entry);
+		mutex_exit(&sc->sc_offer_lock);
+
+		vmbus_process_offer(sc, co);
+		kmem_free(co, sizeof(*co));
+
+		mutex_enter(&sc->sc_offer_lock);
+	}
+	mutex_exit(&sc->sc_offer_lock);
+
+	return 0;
+}
+
+static struct vmbus_channel *
+vmbus_channel_alloc(struct vmbus_softc *sc)
+{
+	struct vmbus_channel *ch;
+
+	ch = kmem_zalloc(sizeof(*ch), KM_SLEEP);
+
+	ch->ch_monprm = hyperv_dma_alloc(sc->sc_dmat, &ch->ch_monprm_dma,
+	    sizeof(*ch->ch_monprm), 8, 0, 1);
+	if (ch->ch_monprm == NULL) {
+		aprint_error_dev(sc->sc_dev, "monprm alloc failed\n");
+		kmem_free(ch, sizeof(*ch));
+		return NULL;
+	}
+	memset(ch->ch_monprm, 0, sizeof(*ch->ch_monprm));
+
+	ch->ch_refs = 1;
+	ch->ch_sc = sc;
+	mutex_init(&ch->ch_subchannel_lock, MUTEX_DEFAULT, IPL_NET);
+	TAILQ_INIT(&ch->ch_subchannels);
+
+	ch->ch_state = VMBUS_CHANSTATE_CLOSED;
+
+	return ch;
+}
+
+static void
+vmbus_channel_free(struct vmbus_channel *ch)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+
+	KASSERTMSG(TAILQ_EMPTY(&ch->ch_subchannels) &&
+	    ch->ch_subchannel_count == 0, "still owns sub-channels");
+	KASSERTMSG(ch->ch_state == 0 || ch->ch_state == VMBUS_CHANSTATE_CLOSED,
+	    "free busy channel");
+	KASSERTMSG(ch->ch_refs == 0, "channel %u: invalid refcnt %d",
+	    ch->ch_id, ch->ch_refs);
+
+	hyperv_dma_free(sc->sc_dmat, &ch->ch_monprm_dma);
+	mutex_destroy(&ch->ch_subchannel_lock);
+	/* XXX ch_evcnt */
+	softint_disestablish(ch->ch_taskq);
+	kmem_free(ch, sizeof(*ch));
+}
+
+static int
+vmbus_channel_add(struct vmbus_channel *nch)
+{
+	struct vmbus_softc *sc = nch->ch_sc;
+	struct vmbus_channel *ch;
+	u_int refs;
+
+	if (nch->ch_id == 0) {
+		aprint_debug_dev(sc->sc_dev, "got channel 0 offer, discard\n");
+		return EINVAL;
+	} else if (nch->ch_id >= sc->sc_channel_max) {
+		aprint_error_dev(sc->sc_dev, "invalid channel %u offer\n",
+		    nch->ch_id);
+		return EINVAL;
+	}
+
+	mutex_enter(&sc->sc_channel_lock);
+	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
+		if (!memcmp(&ch->ch_type, &nch->ch_type, sizeof(ch->ch_type)) &&
+		    !memcmp(&ch->ch_inst, &nch->ch_inst, sizeof(ch->ch_inst)))
+			break;
+	}
+	if (VMBUS_CHAN_ISPRIMARY(nch)) {
+		if (ch == NULL) {
+			TAILQ_INSERT_TAIL(&sc->sc_channels, nch, ch_entry);
+			mutex_exit(&sc->sc_channel_lock);
+			goto done;
+		} else {
+			mutex_exit(&sc->sc_channel_lock);
+			aprint_error_dev(sc->sc_dev,
+			    "duplicated primary channel%u\n", nch->ch_id);
+			return EINVAL;
+		}
+	} else {
+		if (ch == NULL) {
+			mutex_exit(&sc->sc_channel_lock);
+			aprint_error_dev(sc->sc_dev, "no primary channel%u\n",
+			    nch->ch_id);
+			return EINVAL;
+		}
+	}
+	mutex_exit(&sc->sc_channel_lock);
+
+	KASSERT(!VMBUS_CHAN_ISPRIMARY(nch));
+	KASSERT(ch != NULL);
+
+	refs = atomic_add_int_nv(&nch->ch_refs, 1);
+	KASSERT(refs == 1);
+
+	nch->ch_primary_channel = ch;
+	nch->ch_dev = ch->ch_dev;
+
+	mutex_enter(&ch->ch_subchannel_lock);
+	TAILQ_INSERT_TAIL(&ch->ch_subchannels, nch, ch_subentry);
+	ch->ch_subchannel_count++;
+	mutex_exit(&ch->ch_subchannel_lock);
+	wakeup(ch);
+
+done:
+	vmbus_channel_cpu_default(nch);
+
+	return 0;
+}
+
+void
+vmbus_channel_cpu_set(struct vmbus_channel *ch, int cpu)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+
+	KASSERTMSG(cpu >= 0 && cpu < ncpu, "invalid cpu %d", cpu);
+
+	if (sc->sc_proto == VMBUS_VERSION_WS2008 ||
+	    sc->sc_proto == VMBUS_VERSION_WIN7) {
+		/* Only cpu0 is supported */
+		cpu = 0;
+	}
+
+	ch->ch_cpuid = cpu;
+	ch->ch_vcpu = sc->sc_percpu[cpu].vcpuid;
+}
+
+void
+vmbus_channel_cpu_rr(struct vmbus_channel *ch)
+{
+	static uint32_t vmbus_channel_nextcpu;
+	int cpu;
+
+	cpu = atomic_add_32_nv(&vmbus_channel_nextcpu, 1) % ncpu;
+	vmbus_channel_cpu_set(ch, cpu);
+}
+
+static void
+vmbus_channel_cpu_default(struct vmbus_channel *ch)
+{
+
+        /*
+	 * By default, pin the channel to cpu0.  Devices having
+	 * special channel-cpu mapping requirement should call
+	 * vmbus_channel_cpu_{set,rr}().
+	 */
+	vmbus_channel_cpu_set(ch, 0);
+}
+
+static void
+vmbus_process_offer(struct vmbus_softc *sc, struct vmbus_offer *co)
+{
+	struct vmbus_channel *ch;
+
+	ch = vmbus_channel_alloc(sc);
+	if (ch == NULL) {
+		aprint_error_dev(sc->sc_dev, "allocate channel %u failed\n",
+		    co->co_chan.chm_chanid);
+		return;
+	}
+
+	/*
+	 * By default we setup state to enable batched reading.
+	 * A specific service can choose to disable this prior
+	 * to opening the channel.
+	 */
+	ch->ch_flags |= CHF_BATCHED;
+
+	hyperv_guid_sprint(&co->co_chan.chm_chtype, ch->ch_ident,
+	    sizeof(ch->ch_ident));
+
+	ch->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
+	if (sc->sc_proto > VMBUS_VERSION_WS2008)
+		ch->ch_monprm->mp_connid = co->co_chan.chm_connid;
+
+	if (co->co_chan.chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
+		ch->ch_mgroup = co->co_chan.chm_montrig / VMBUS_MONTRIG_LEN;
+		ch->ch_mindex = co->co_chan.chm_montrig % VMBUS_MONTRIG_LEN;
+		ch->ch_flags |= CHF_MONITOR;
+	}
+
+	ch->ch_id = co->co_chan.chm_chanid;
+	ch->ch_subidx = co->co_chan.chm_subidx;
+
+	memcpy(&ch->ch_type, &co->co_chan.chm_chtype, sizeof(ch->ch_type));
+	memcpy(&ch->ch_inst, &co->co_chan.chm_chinst, sizeof(ch->ch_inst));
+
+	if (VMBUS_CHAN_ISPRIMARY(ch)) {
+		/* set primary channel mgmt wq */
+	} else {
+		/* set sub channel mgmt wq */
+	}
+
+	if (vmbus_channel_add(ch) != 0) {
+		vmbus_channel_free(ch);
+		return;
+	}
+
+	ch->ch_state = VMBUS_CHANSTATE_OFFERED;
+
+#ifdef HYPERV_DEBUG
+	printf("%s: channel %u: \"%s\"", device_xname(sc->sc_dev), ch->ch_id,
+	    ch->ch_ident);
+	if (ch->ch_flags & CHF_MONITOR)
+		printf(", monitor %u\n", co->co_chan.chm_montrig);
+	else
+		printf("\n");
+#endif
+}
+
+static int
+vmbus_channel_release(struct vmbus_channel *ch)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+	struct vmbus_chanmsg_chfree cmd;
+	int rv;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.chm_hdr.chm_type = VMBUS_CHANMSG_CHFREE;
+	cmd.chm_chanid = ch->ch_id;
+
+	rv = vmbus_cmd(sc, &cmd, sizeof(cmd), NULL, 0, HCF_NOREPLY);
+	if (rv) {
+		DPRINTF("%s: CHFREE failed with %d\n", device_xname(sc->sc_dev),
+		    rv);
+	}
+	return rv;
+}
+
+struct vmbus_channel **
+vmbus_subchannel_get(struct vmbus_channel *prich, int cnt)
+{
+	struct vmbus_channel **ret, *ch;
+	int i;
+
+	KASSERT(cnt > 0);
+
+	ret = kmem_alloc(sizeof(struct vmbus_channel *) * cnt, KM_SLEEP);
+
+	mutex_enter(&prich->ch_subchannel_lock);
+
+	while (prich->ch_subchannel_count < cnt)
+		/* XXX use condvar(9) instead of mtsleep */
+		mtsleep(prich, PRIBIO, "hvvmsubch", 0,
+		    &prich->ch_subchannel_lock);
+
+	i = 0;
+	TAILQ_FOREACH(ch, &prich->ch_subchannels, ch_subentry) {
+		ret[i] = ch;	/* XXX inc refs */
+
+		if (++i == cnt)
+			break;
+	}
+
+	mutex_exit(&prich->ch_subchannel_lock);
+
+	return ret;
+}
+
+void
+vmbus_subchannel_put(struct vmbus_channel **subch, int cnt)
+{
+
+	kmem_free(subch, sizeof(struct vmbus_channel *) * cnt);
+}
+
+static struct vmbus_channel *
+vmbus_channel_lookup(struct vmbus_softc *sc, uint32_t relid)
+{
+	struct vmbus_channel *ch;
+
+	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
+		if (ch->ch_id == relid)
+			return ch;
+	}
+	return NULL;
+}
+
+static int
+vmbus_channel_ring_create(struct vmbus_channel *ch, uint32_t buflen)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+
+	buflen = roundup(buflen, PAGE_SIZE) + sizeof(struct vmbus_bufring);
+	ch->ch_ring_size = 2 * buflen;
+	ch->ch_ring = hyperv_dma_alloc(sc->sc_dmat, &ch->ch_ring_dma,
+	    ch->ch_ring_size, PAGE_SIZE, 0, 1);	/* page aligned memory */
+	if (ch->ch_ring == NULL) {
+		aprint_error_dev(sc->sc_dev,
+		    "failed to allocate channel ring\n");
+		return ENOMEM;
+	}
+
+	memset(&ch->ch_wrd, 0, sizeof(ch->ch_wrd));
+	ch->ch_wrd.rd_ring = (struct vmbus_bufring *)ch->ch_ring;
+	ch->ch_wrd.rd_size = buflen;
+	ch->ch_wrd.rd_dsize = buflen - sizeof(struct vmbus_bufring);
+	mutex_init(&ch->ch_wrd.rd_lock, MUTEX_DEFAULT, IPL_NET);
+
+	memset(&ch->ch_rrd, 0, sizeof(ch->ch_rrd));
+	ch->ch_rrd.rd_ring = (struct vmbus_bufring *)((uint8_t *)ch->ch_ring +
+	    buflen);
+	ch->ch_rrd.rd_size = buflen;
+	ch->ch_rrd.rd_dsize = buflen - sizeof(struct vmbus_bufring);
+	mutex_init(&ch->ch_rrd.rd_lock, MUTEX_DEFAULT, IPL_NET);
+
+	if (vmbus_handle_alloc(ch, &ch->ch_ring_dma, ch->ch_ring_size,
+	    &ch->ch_ring_gpadl)) {
+		aprint_error_dev(sc->sc_dev,
+		    "failed to obtain a PA handle for the ring\n");
+		vmbus_channel_ring_destroy(ch);
+		return ENOMEM;
+	}
+
+	return 0;
+}
+
+static void
+vmbus_channel_ring_destroy(struct vmbus_channel *ch)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+
+	hyperv_dma_free(sc->sc_dmat, &ch->ch_ring_dma);
+	ch->ch_ring = NULL;
+	vmbus_handle_free(ch, ch->ch_ring_gpadl);
+
+	mutex_destroy(&ch->ch_wrd.rd_lock);
+	memset(&ch->ch_wrd, 0, sizeof(ch->ch_wrd));
+	mutex_destroy(&ch->ch_rrd.rd_lock);
+	memset(&ch->ch_rrd, 0, sizeof(ch->ch_rrd));
+}
+
+int
+vmbus_channel_open(struct vmbus_channel *ch, size_t buflen, void *udata,
+    size_t udatalen, void (*handler)(void *), void *arg)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+	struct vmbus_chanmsg_chopen cmd;
+	struct vmbus_chanmsg_chopen_resp rsp;
+	int rv = EINVAL;
+
+	if (ch->ch_ring == NULL &&
+	    (rv = vmbus_channel_ring_create(ch, buflen))) {
+		DPRINTF("%s: failed to create channel ring\n",
+		    device_xname(sc->sc_dev));
+		return rv;
+	}
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.chm_hdr.chm_type = VMBUS_CHANMSG_CHOPEN;
+	cmd.chm_openid = ch->ch_id;
+	cmd.chm_chanid = ch->ch_id;
+	cmd.chm_gpadl = ch->ch_ring_gpadl;
+	cmd.chm_txbr_pgcnt = ch->ch_wrd.rd_size >> PAGE_SHIFT;
+	cmd.chm_vcpuid = ch->ch_vcpu;
+	if (udata && udatalen > 0)
+		memcpy(cmd.chm_udata, udata, udatalen);
+
+	memset(&rsp, 0, sizeof(rsp));
+
+	ch->ch_handler = handler;
+	ch->ch_ctx = arg;
+	ch->ch_state = VMBUS_CHANSTATE_OPENED;
+
+	rv = vmbus_cmd(sc, &cmd, sizeof(cmd), &rsp, sizeof(rsp), 0);
+	if (rv) {
+		vmbus_channel_ring_destroy(ch);
+		DPRINTF("%s: CHOPEN failed with %d\n", device_xname(sc->sc_dev),
+		    rv);
+		ch->ch_handler = NULL;
+		ch->ch_ctx = NULL;
+		ch->ch_state = VMBUS_CHANSTATE_OFFERED;
+		return rv;
+	}
+	return 0;
+}
+
+static void
+vmbus_channel_detach(struct vmbus_channel *ch)
+{
+	u_int refs;
+
+	refs = atomic_add_int_nv(&ch->ch_refs, -1);
+	if (refs == 1) {
+		/* XXX on workqueue? */
+		if (VMBUS_CHAN_ISPRIMARY(ch)) {
+			vmbus_channel_release(ch);
+			vmbus_channel_free(ch);
+		} else {
+			struct vmbus_channel *prich = ch->ch_primary_channel;
+
+			vmbus_channel_release(ch);
+
+			mutex_enter(&prich->ch_subchannel_lock);
+			TAILQ_REMOVE(&prich->ch_subchannels, ch, ch_subentry);
+			prich->ch_subchannel_count--;
+			mutex_exit(&prich->ch_subchannel_lock);
+			wakeup(prich);
+
+			vmbus_channel_free(ch);
+		}
+	}
+}
+
+static int
+vmbus_channel_close_internal(struct vmbus_channel *ch)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+	struct vmbus_chanmsg_chclose cmd;
+	int rv;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.chm_hdr.chm_type = VMBUS_CHANMSG_CHCLOSE;
+	cmd.chm_chanid = ch->ch_id;
+
+	ch->ch_state = VMBUS_CHANSTATE_CLOSING;
+	rv = vmbus_cmd(sc, &cmd, sizeof(cmd), NULL, 0, HCF_NOREPLY);
+	if (rv) {
+		DPRINTF("%s: CHCLOSE failed with %d\n",
+		    device_xname(sc->sc_dev), rv);
+		return rv;
+	}
+	ch->ch_state = VMBUS_CHANSTATE_CLOSED;
+	vmbus_channel_ring_destroy(ch);
+	return 0;
+}
+
+int
+vmbus_channel_close_direct(struct vmbus_channel *ch)
+{
+	int rv;
+
+	rv = vmbus_channel_close_internal(ch);
+	if (!VMBUS_CHAN_ISPRIMARY(ch))
+		vmbus_channel_detach(ch);
+	return rv;
+}
+
+int
+vmbus_channel_close(struct vmbus_channel *ch)
+{
+	struct vmbus_channel **subch;
+	int i, cnt, rv;
+
+	if (!VMBUS_CHAN_ISPRIMARY(ch))
+		return 0;
+
+	cnt = ch->ch_subchannel_count;
+	if (cnt > 0) {
+		subch = vmbus_subchannel_get(ch, cnt);
+		for (i = 0; i < ch->ch_subchannel_count; i++) {
+			rv = vmbus_channel_close_internal(subch[i]);
+			(void) rv;	/* XXX */
+			vmbus_channel_detach(ch);
+		}
+		vmbus_subchannel_put(subch, cnt);
+	}
+
+	return vmbus_channel_close_internal(ch);
+}
+
+static inline void
+vmbus_channel_setevent(struct vmbus_softc *sc, struct vmbus_channel *ch)
+{
+	struct vmbus_mon_trig *mtg;
+
+	/* Each uint32_t represents 32 channels */
+	set_bit(ch->ch_id, sc->sc_wevents);
+	if (ch->ch_flags & CHF_MONITOR) {
+		mtg = &sc->sc_monitor[1]->mnf_trigs[ch->ch_mgroup];
+		set_bit(ch->ch_mindex, &mtg->mt_pending);
+	} else
+		vmbus_intr_signal(sc, hyperv_dma_get_paddr(&ch->ch_monprm_dma));
+}
+
+static void
+vmbus_channel_intr(void *arg)
+{
+	struct vmbus_channel *ch = arg;
+
+	if (vmbus_channel_ready(ch))
+		ch->ch_handler(ch->ch_ctx);
+
+	if (vmbus_channel_unpause(ch) == 0)
+		return;
+
+	vmbus_channel_pause(ch);
+	vmbus_channel_schedule(ch);
+}
+
+int
+vmbus_channel_setdeferred(struct vmbus_channel *ch, const char *name)
+{
+
+	ch->ch_taskq = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
+	    vmbus_channel_intr, ch);
+	if (ch->ch_taskq == NULL)
+		return -1;
+	return 0;
+}
+
+void
+vmbus_channel_schedule(struct vmbus_channel *ch)
+{
+
+	if (ch->ch_handler) {
+		if (ch->ch_flags & CHF_BATCHED) {
+			vmbus_channel_pause(ch);
+			softint_schedule(ch->ch_taskq);
+		} else
+			ch->ch_handler(ch->ch_ctx);
+	}
+}
+
+static __inline void
+vmbus_ring_put(struct vmbus_ring_data *wrd, uint8_t *data, uint32_t datalen)
+{
+	int left = MIN(datalen, wrd->rd_dsize - wrd->rd_prod);
+
+	memcpy(&wrd->rd_ring->br_data[wrd->rd_prod], data, left);
+	memcpy(&wrd->rd_ring->br_data[0], data + left, datalen - left);
+	wrd->rd_prod += datalen;
+	if (wrd->rd_prod >= wrd->rd_dsize)
+		wrd->rd_prod -= wrd->rd_dsize;
+}
+
+static inline void
+vmbus_ring_get(struct vmbus_ring_data *rrd, uint8_t *data, uint32_t datalen,
+    int peek)
+{
+	int left = MIN(datalen, rrd->rd_dsize - rrd->rd_cons);
+
+	memcpy(data, &rrd->rd_ring->br_data[rrd->rd_cons], left);
+	memcpy(data + left, &rrd->rd_ring->br_data[0], datalen - left);
+	if (!peek) {
+		rrd->rd_cons += datalen;
+		if (rrd->rd_cons >= rrd->rd_dsize)
+			rrd->rd_cons -= rrd->rd_dsize;
+	}
+}
+
+static __inline void
+vmbus_ring_avail(struct vmbus_ring_data *rd, uint32_t *towrite,
+    uint32_t *toread)
+{
+	uint32_t ridx = rd->rd_ring->br_rindex;
+	uint32_t widx = rd->rd_ring->br_windex;
+	uint32_t r, w;
+
+	if (widx >= ridx)
+		w = rd->rd_dsize - (widx - ridx);
+	else
+		w = ridx - widx;
+	r = rd->rd_dsize - w;
+	if (towrite)
+		*towrite = w;
+	if (toread)
+		*toread = r;
+}
+
+static int
+vmbus_ring_write(struct vmbus_ring_data *wrd, struct iovec *iov, int iov_cnt,
+    int *needsig)
+{
+	uint64_t indices = 0;
+	uint32_t avail, oprod, datalen = sizeof(indices);
+	int i;
+
+	for (i = 0; i < iov_cnt; i++)
+		datalen += iov[i].iov_len;
+
+	KASSERT(datalen <= wrd->rd_dsize);
+
+	vmbus_ring_avail(wrd, &avail, NULL);
+	if (avail <= datalen) {
+		DPRINTF("%s: avail %u datalen %u\n", __func__, avail, datalen);
+		return EAGAIN;
+	}
+
+	oprod = wrd->rd_prod;
+
+	for (i = 0; i < iov_cnt; i++)
+		vmbus_ring_put(wrd, iov[i].iov_base, iov[i].iov_len);
+
+	indices = (uint64_t)oprod << 32;
+	vmbus_ring_put(wrd, (uint8_t *)&indices, sizeof(indices));
+
+	membar_sync();
+	wrd->rd_ring->br_windex = wrd->rd_prod;
+	membar_sync();
+
+	/* Signal when the ring transitions from being empty to non-empty */
+	if (wrd->rd_ring->br_imask == 0 &&
+	    wrd->rd_ring->br_rindex == oprod)
+		*needsig = 1;
+	else
+		*needsig = 0;
+
+	return 0;
+}
+
+int
+vmbus_channel_send(struct vmbus_channel *ch, void *data, uint32_t datalen,
+    uint64_t rid, int type, uint32_t flags)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+	struct vmbus_chanpkt cp;
+	struct iovec iov[3];
+	uint32_t pktlen, pktlen_aligned;
+	uint64_t zeropad = 0;
+	int rv, needsig = 0;
+
+	pktlen = sizeof(cp) + datalen;
+	pktlen_aligned = roundup(pktlen, sizeof(uint64_t));
+
+	cp.cp_hdr.cph_type = type;
+	cp.cp_hdr.cph_flags = flags;
+	VMBUS_CHANPKT_SETLEN(cp.cp_hdr.cph_hlen, sizeof(cp));
+	VMBUS_CHANPKT_SETLEN(cp.cp_hdr.cph_tlen, pktlen_aligned);
+	cp.cp_hdr.cph_tid = rid;
+
+	iov[0].iov_base = &cp;
+	iov[0].iov_len = sizeof(cp);
+
+	iov[1].iov_base = data;
+	iov[1].iov_len = datalen;
+
+	iov[2].iov_base = &zeropad;
+	iov[2].iov_len = pktlen_aligned - pktlen;
+
+	mutex_enter(&ch->ch_wrd.rd_lock);
+	rv = vmbus_ring_write(&ch->ch_wrd, iov, 3, &needsig);
+	mutex_exit(&ch->ch_wrd.rd_lock);
+	if (rv == 0 && needsig)
+		vmbus_channel_setevent(sc, ch);
+
+	return rv;
+}
+
+int
+vmbus_channel_send_sgl(struct vmbus_channel *ch, struct vmbus_gpa *sgl,
+    uint32_t nsge, void *data, uint32_t datalen, uint64_t rid)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+	struct vmbus_chanpkt_sglist cp;
+	struct iovec iov[4];
+	uint32_t buflen, pktlen, pktlen_aligned;
+	uint64_t zeropad = 0;
+	int rv, needsig = 0;
+
+	buflen = sizeof(struct vmbus_gpa) * nsge;
+	pktlen = sizeof(cp) + datalen + buflen;
+	pktlen_aligned = roundup(pktlen, sizeof(uint64_t));
+
+	cp.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
+	cp.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
+	VMBUS_CHANPKT_SETLEN(cp.cp_hdr.cph_hlen, sizeof(cp) + buflen);
+	VMBUS_CHANPKT_SETLEN(cp.cp_hdr.cph_tlen, pktlen_aligned);
+	cp.cp_hdr.cph_tid = rid;
+	cp.cp_gpa_cnt = nsge;
+	cp.cp_rsvd = 0;
+
+	iov[0].iov_base = &cp;
+	iov[0].iov_len = sizeof(cp);
+
+	iov[1].iov_base = sgl;
+	iov[1].iov_len = buflen;
+
+	iov[2].iov_base = data;
+	iov[2].iov_len = datalen;
+
+	iov[3].iov_base = &zeropad;
+	iov[3].iov_len = pktlen_aligned - pktlen;
+
+	mutex_enter(&ch->ch_wrd.rd_lock);
+	rv = vmbus_ring_write(&ch->ch_wrd, iov, 4, &needsig);
+	mutex_exit(&ch->ch_wrd.rd_lock);
+	if (rv == 0 && needsig)
+		vmbus_channel_setevent(sc, ch);
+
+	return rv;
+}
+
+int
+vmbus_channel_send_prpl(struct vmbus_channel *ch, struct vmbus_gpa_range *prpl,
+    uint32_t nprp, void *data, uint32_t datalen, uint64_t rid)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+	struct vmbus_chanpkt_prplist cp;
+	struct iovec iov[4];
+	uint32_t buflen, pktlen, pktlen_aligned;
+	uint64_t zeropad = 0;
+	int rv, needsig = 0;
+
+	buflen = sizeof(struct vmbus_gpa_range) * (nprp + 1);
+	pktlen = sizeof(cp) + datalen + buflen;
+	pktlen_aligned = roundup(pktlen, sizeof(uint64_t));
+
+	cp.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
+	cp.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
+	VMBUS_CHANPKT_SETLEN(cp.cp_hdr.cph_hlen, sizeof(cp) + buflen);
+	VMBUS_CHANPKT_SETLEN(cp.cp_hdr.cph_tlen, pktlen_aligned);
+	cp.cp_hdr.cph_tid = rid;
+	cp.cp_range_cnt = 1;
+	cp.cp_rsvd = 0;
+
+	iov[0].iov_base = &cp;
+	iov[0].iov_len = sizeof(cp);
+
+	iov[1].iov_base = prpl;
+	iov[1].iov_len = buflen;
+
+	iov[2].iov_base = data;
+	iov[2].iov_len = datalen;
+
+	iov[3].iov_base = &zeropad;
+	iov[3].iov_len = pktlen_aligned - pktlen;
+
+	mutex_enter(&ch->ch_wrd.rd_lock);
+	rv = vmbus_ring_write(&ch->ch_wrd, iov, 4, &needsig);
+	mutex_exit(&ch->ch_wrd.rd_lock);
+	if (rv == 0 && needsig)
+		vmbus_channel_setevent(sc, ch);
+
+	return rv;
+}
+
+static int
+vmbus_ring_peek(struct vmbus_ring_data *rrd, void *data, uint32_t datalen)
+{
+	uint32_t avail;
+
+	KASSERT(datalen <= rrd->rd_dsize);
+
+	vmbus_ring_avail(rrd, NULL, &avail);
+	if (avail < datalen)
+		return EAGAIN;
+
+	vmbus_ring_get(rrd, (uint8_t *)data, datalen, 1);
+	return 0;
+}
+
+static int
+vmbus_ring_read(struct vmbus_ring_data *rrd, void *data, uint32_t datalen,
+    uint32_t offset)
+{
+	uint64_t indices;
+	uint32_t avail;
+
+	KASSERT(datalen <= rrd->rd_dsize);
+
+	vmbus_ring_avail(rrd, NULL, &avail);
+	if (avail < datalen) {
+		DPRINTF("%s: avail %u datalen %u\n", __func__, avail, datalen);
+		return EAGAIN;
+	}
+
+	if (offset) {
+		rrd->rd_cons += offset;
+		if (rrd->rd_cons >= rrd->rd_dsize)
+			rrd->rd_cons -= rrd->rd_dsize;
+	}
+
+	vmbus_ring_get(rrd, (uint8_t *)data, datalen, 0);
+	vmbus_ring_get(rrd, (uint8_t *)&indices, sizeof(indices), 0);
+
+	membar_sync();
+	rrd->rd_ring->br_rindex = rrd->rd_cons;
+
+	return 0;
+}
+
+int
+vmbus_channel_recv(struct vmbus_channel *ch, void *data, uint32_t datalen,
+    uint32_t *rlen, uint64_t *rid, int raw)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+	struct vmbus_chanpkt_hdr cph;
+	uint32_t offset, pktlen;
+	int rv;
+
+	*rlen = 0;
+
+	mutex_enter(&ch->ch_rrd.rd_lock);
+
+	if ((rv = vmbus_ring_peek(&ch->ch_rrd, &cph, sizeof(cph))) != 0) {
+		mutex_exit(&ch->ch_rrd.rd_lock);
+		return rv;
+	}
+
+	offset = raw ? 0 : VMBUS_CHANPKT_GETLEN(cph.cph_hlen);
+	pktlen = VMBUS_CHANPKT_GETLEN(cph.cph_tlen) - offset;
+	if (pktlen > datalen) {
+		mutex_exit(&ch->ch_rrd.rd_lock);
+		aprint_error_dev(sc->sc_dev, "%s: pktlen %u datalen %u\n",
+		    __func__, pktlen, datalen);
+		return EINVAL;
+	}
+
+	rv = vmbus_ring_read(&ch->ch_rrd, data, pktlen, offset);
+	if (rv == 0) {
+		*rlen = pktlen;
+		*rid = cph.cph_tid;
+	}
+
+	mutex_exit(&ch->ch_rrd.rd_lock);
+
+	return rv;
+}
+
+static inline void
+vmbus_ring_mask(struct vmbus_ring_data *rd)
+{
+
+	membar_sync();
+	rd->rd_ring->br_imask = 1;
+	membar_sync();
+}
+
+static inline void
+vmbus_ring_unmask(struct vmbus_ring_data *rd)
+{
+
+	membar_sync();
+	rd->rd_ring->br_imask = 0;
+	membar_sync();
+}
+
+static void
+vmbus_channel_pause(struct vmbus_channel *ch)
+{
+
+	vmbus_ring_mask(&ch->ch_rrd);
+}
+
+static uint32_t
+vmbus_channel_unpause(struct vmbus_channel *ch)
+{
+	uint32_t avail;
+
+	vmbus_ring_unmask(&ch->ch_rrd);
+	vmbus_ring_avail(&ch->ch_rrd, NULL, &avail);
+
+	return avail;
+}
+
+static uint32_t
+vmbus_channel_ready(struct vmbus_channel *ch)
+{
+	uint32_t avail;
+
+	vmbus_ring_avail(&ch->ch_rrd, NULL, &avail);
+
+	return avail;
+}
+
+/* How many PFNs can be referenced by the header */
+#define VMBUS_NPFNHDR	((VMBUS_MSG_DSIZE_MAX -	\
+	  sizeof(struct vmbus_chanmsg_gpadl_conn)) / sizeof(uint64_t))
+
+/* How many PFNs can be referenced by the body */
+#define VMBUS_NPFNBODY	((VMBUS_MSG_DSIZE_MAX -	\
+	  sizeof(struct vmbus_chanmsg_gpadl_subconn)) / sizeof(uint64_t))
+
+int
+vmbus_handle_alloc(struct vmbus_channel *ch, const struct hyperv_dma *dma,
+    uint32_t buflen, uint32_t *handle)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+	struct vmbus_chanmsg_gpadl_conn *hdr;
+	struct vmbus_chanmsg_gpadl_subconn *cmd;
+	struct vmbus_chanmsg_gpadl_connresp rsp;
+	struct vmbus_msg *msg;
+	int i, j, last, left, rv;
+	int bodylen = 0, ncmds = 0, pfn = 0;
+	uint64_t *frames;
+	paddr_t pa;
+	uint8_t *body;
+	/* Total number of pages to reference */
+	int total = atop(buflen);
+	/* Number of pages that will fit the header */
+	int inhdr = MIN(total, VMBUS_NPFNHDR);
+
+	KASSERT((buflen & PAGE_MASK) == 0);
+	KASSERT(buflen == (uint32_t)dma->map->dm_mapsize);
+
+	msg = pool_cache_get_paddr(sc->sc_msgpool, PR_WAITOK, &pa);
+	memset(msg, 0, sizeof(*msg));
+
+	/* Prepare array of frame addresses */
+	frames = kmem_zalloc(total * sizeof(*frames), KM_SLEEP);
+	for (i = 0, j = 0; i < dma->map->dm_nsegs && j < total; i++) {
+		bus_dma_segment_t *seg = &dma->map->dm_segs[i];
+		bus_addr_t addr = seg->ds_addr;
+
+		KASSERT((addr & PAGE_MASK) == 0);
+		KASSERT((seg->ds_len & PAGE_MASK) == 0);
+
+		while (addr < seg->ds_addr + seg->ds_len && j < total) {
+			frames[j++] = atop(addr);
+			addr += PAGE_SIZE;
+		}
+	}
+
+	msg->msg_req.hc_dsize = sizeof(struct vmbus_chanmsg_gpadl_conn) +
+	    inhdr * sizeof(uint64_t);
+	hdr = (struct vmbus_chanmsg_gpadl_conn *)msg->msg_req.hc_data;
+	msg->msg_rsp = &rsp;
+	msg->msg_rsplen = sizeof(rsp);
+
+	left = total - inhdr;
+
+	/* Allocate additional gpadl_body structures if required */
+	if (left > 0) {
+		ncmds = MAX(1, left / VMBUS_NPFNBODY + left % VMBUS_NPFNBODY);
+		bodylen = ncmds * VMBUS_MSG_DSIZE_MAX;
+		body = kmem_zalloc(bodylen, KM_SLEEP);
+		if (body == NULL) {
+			kmem_free(frames, total * sizeof(*frames));
+			pool_cache_put_paddr(sc->sc_msgpool, msg, pa);
+			return ENOMEM;
+		}
+	}
+
+	*handle = atomic_add_int_nv(&sc->sc_handle, 1);
+
+	hdr->chm_hdr.chm_type = VMBUS_CHANMSG_GPADL_CONN;
+	hdr->chm_chanid = ch->ch_id;
+	hdr->chm_gpadl = *handle;
+
+	/* Single range for a contiguous buffer */
+	hdr->chm_range_cnt = 1;
+	hdr->chm_range_len = sizeof(struct vmbus_gpa_range) + total *
+	    sizeof(uint64_t);
+	hdr->chm_range.gpa_ofs = 0;
+	hdr->chm_range.gpa_len = buflen;
+
+	/* Fit as many pages as possible into the header */
+	for (i = 0; i < inhdr; i++)
+		hdr->chm_range.gpa_page[i] = frames[pfn++];
+
+	for (i = 0; i < ncmds; i++) {
+		cmd = (struct vmbus_chanmsg_gpadl_subconn *)(body +
+		    VMBUS_MSG_DSIZE_MAX * i);
+		cmd->chm_hdr.chm_type = VMBUS_CHANMSG_GPADL_SUBCONN;
+		cmd->chm_gpadl = *handle;
+		last = MIN(left, VMBUS_NPFNBODY);
+		for (j = 0; j < last; j++)
+			cmd->chm_gpa_page[j] = frames[pfn++];
+		left -= last;
+	}
+
+	rv = vmbus_start(sc, msg, pa);
+	if (rv != 0) {
+		DPRINTF("%s: GPADL_CONN failed\n", device_xname(sc->sc_dev));
+		goto out;
+	}
+	for (i = 0; i < ncmds; i++) {
+		int cmdlen = sizeof(*cmd);
+		cmd = (struct vmbus_chanmsg_gpadl_subconn *)(body +
+		    VMBUS_MSG_DSIZE_MAX * i);
+		/* Last element can be short */
+		if (i == ncmds - 1)
+			cmdlen += last * sizeof(uint64_t);
+		else
+			cmdlen += VMBUS_NPFNBODY * sizeof(uint64_t);
+		rv = vmbus_cmd(sc, cmd, cmdlen, NULL, 0, HCF_NOREPLY);
+		if (rv != 0) {
+			DPRINTF("%s: GPADL_SUBCONN (iteration %d/%d) failed "
+			    "with %d\n", device_xname(sc->sc_dev), i, ncmds,
+			    rv);
+			goto out;
+		}
+	}
+	rv = vmbus_reply(sc, msg);
+	if (rv != 0) {
+		DPRINTF("%s: GPADL allocation failed with %d\n",
+		    device_xname(sc->sc_dev), rv);
+	}
+
+ out:
+	if (bodylen > 0)
+		kmem_free(body, bodylen);
+	kmem_free(frames, total * sizeof(*frames));
+	pool_cache_put_paddr(sc->sc_msgpool, msg, pa);
+	if (rv)
+		return rv;
+
+	KASSERT(*handle == rsp.chm_gpadl);
+
+	return 0;
+}
+
+void
+vmbus_handle_free(struct vmbus_channel *ch, uint32_t handle)
+{
+	struct vmbus_softc *sc = ch->ch_sc;
+	struct vmbus_chanmsg_gpadl_disconn cmd;
+	struct vmbus_chanmsg_gpadl_disconn rsp;
+	int rv;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.chm_hdr.chm_type = VMBUS_CHANMSG_GPADL_DISCONN;
+	cmd.chm_chanid = ch->ch_id;
+	cmd.chm_gpadl = handle;
+
+	rv = vmbus_cmd(sc, &cmd, sizeof(cmd), &rsp, sizeof(rsp), 0);
+	if (rv) {
+		DPRINTF("%s: GPADL_DISCONN failed with %d\n",
+		    device_xname(sc->sc_dev), rv);
+	}
+}
+
+static int
+vmbus_attach_print(void *aux, const char *name)
+{
+	struct vmbus_attach_args *aa = aux;
+
+	if (name)
+		printf("\"%s\" at %s", aa->aa_ident, name);
+
+	return UNCONF;
+}
+
+static int
+vmbus_attach_icdevs(struct vmbus_softc *sc)
+{
+	struct vmbus_dev *dv;
+	struct vmbus_channel *ch;
+
+	SLIST_INIT(&sc->sc_icdevs);
+	mutex_init(&sc->sc_icdev_lock, MUTEX_DEFAULT, IPL_NET);
+
+	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
+		if (ch->ch_state != VMBUS_CHANSTATE_OFFERED)
+			continue;
+		if (ch->ch_flags & CHF_MONITOR)
+			continue;
+
+		dv = kmem_zalloc(sizeof(*dv), KM_SLEEP);
+		if (dv == NULL) {
+			aprint_error_dev(sc->sc_dev,
+			    "failed to allocate ic device object\n");
+			return ENOMEM;
+		}
+		dv->dv_aa.aa_type = &ch->ch_type;
+		dv->dv_aa.aa_inst = &ch->ch_inst;
+		dv->dv_aa.aa_ident = ch->ch_ident;
+		dv->dv_aa.aa_chan = ch;
+		mutex_enter(&sc->sc_icdev_lock);
+		SLIST_INSERT_HEAD(&sc->sc_icdevs, dv, dv_entry);
+		mutex_exit(&sc->sc_icdev_lock);
+		ch->ch_dev = config_found_ia(sc->sc_dev, "hypervvmbus",
+		    &dv->dv_aa, vmbus_attach_print);
+	}
+	return 0;
+}
+
+static int
+vmbus_attach_devices(struct vmbus_softc *sc)
+{
+	struct vmbus_dev *dv;
+	struct vmbus_channel *ch;
+
+	SLIST_INIT(&sc->sc_devs);
+	mutex_init(&sc->sc_dev_lock, MUTEX_DEFAULT, IPL_NET);
+
+	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
+		if (ch->ch_state != VMBUS_CHANSTATE_OFFERED)
+			continue;
+		if (!(ch->ch_flags & CHF_MONITOR))
+			continue;
+
+		dv = kmem_zalloc(sizeof(*dv), KM_SLEEP);
+		if (dv == NULL) {
+			aprint_error_dev(sc->sc_dev,
+			    "failed to allocate device object\n");
+			return ENOMEM;
+		}
+		dv->dv_aa.aa_type = &ch->ch_type;
+		dv->dv_aa.aa_inst = &ch->ch_inst;
+		dv->dv_aa.aa_ident = ch->ch_ident;
+		dv->dv_aa.aa_chan = ch;
+		mutex_enter(&sc->sc_dev_lock);
+		SLIST_INSERT_HEAD(&sc->sc_devs, dv, dv_entry);
+		mutex_exit(&sc->sc_dev_lock);
+		ch->ch_dev = config_found_ia(sc->sc_dev, "hypervvmbus",
+		    &dv->dv_aa, vmbus_attach_print);
+	}
+	return 0;
+}
+
+MODULE(MODULE_CLASS_DRIVER, vmbus, "hyperv");
+
+#ifdef _MODULE
+#include "ioconf.c"
+#endif
+
+static int
+vmbus_modcmd(modcmd_t cmd, void *aux)
+{
+	int rv = 0;
+
+	switch (cmd) {
+	case MODULE_CMD_INIT:
+#ifdef _MODULE
+		rv = config_init_component(cfdriver_ioconf_vmbus,
+		    cfattach_ioconf_vmbus, cfdata_ioconf_vmbus);
+#endif
+		break;
+
+	case MODULE_CMD_FINI:
+#ifdef _MODULE
+		rv = config_fini_component(cfdriver_ioconf_vmbus,
+		    cfattach_ioconf_vmbus, cfdata_ioconf_vmbus);
+#endif
+		break;
+
+	default:
+		rv = ENOTTY;
+		break;
+	}
+
+	return rv;
+}
diff --git a/sys/arch/x86/x86/vmbusvar.h b/sys/arch/x86/x86/vmbusvar.h
new file mode 100644
index 00000000000..e129c0c989f
--- /dev/null
+++ b/sys/arch/x86/x86/vmbusvar.h
@@ -0,0 +1,272 @@
+/*	$NetBSD$	*/
+/*	$OpenBSD: hypervvar.h,v 1.13 2017/06/23 19:05:42 mikeb Exp $	*/
+
+/*
+ * Copyright (c) 2016 Mike Belopuhov <mike@esdenera.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _VMBUSVAR_H_
+#define _VMBUSVAR_H_
+
+#include <sys/param.h>
+#include <sys/device.h>
+#include <sys/atomic.h>
+#include <sys/bus.h>
+#include <sys/evcnt.h>
+#include <sys/mutex.h>
+#include <sys/pool.h>
+
+#include <x86/x86/hypervreg.h>
+#include <x86/x86/hypervvar.h>
+
+/* #define HYPERV_DEBUG */
+
+#ifdef HYPERV_DEBUG
+#define DPRINTF(x...)		printf(x)
+#else
+#define DPRINTF(x...)
+#endif
+
+typedef void (*vmbus_channel_callback_t)(void *);
+
+struct vmbus_softc;
+
+struct vmbus_msg {
+	uint64_t			msg_flags;
+#define  MSGF_NOSLEEP			__BIT(0)
+#define  MSGF_NOQUEUE			__BIT(1)
+#define  MSGF_ORPHANED			__BIT(2)
+	struct hyperv_hypercall_postmsg_in msg_req __aligned(8);
+	void				*msg_rsp;
+	size_t				msg_rsplen;
+	TAILQ_ENTRY(vmbus_msg)		msg_entry;
+};
+__CTASSERT((offsetof(struct vmbus_msg, msg_req) % 8) == 0);
+TAILQ_HEAD(vmbus_queue, vmbus_msg);
+
+struct vmbus_offer {
+	struct vmbus_chanmsg_choffer	co_chan;
+	SIMPLEQ_ENTRY(vmbus_offer)	co_entry;
+};
+SIMPLEQ_HEAD(vmbus_offers, vmbus_offer);
+
+struct vmbus_ring_data {
+	struct vmbus_bufring		*rd_ring;
+	uint32_t			rd_size;
+	kmutex_t			rd_lock;
+	uint32_t			rd_prod;
+	uint32_t			rd_cons;
+	uint32_t			rd_dsize;
+};
+
+struct vmbus_channel;
+TAILQ_HEAD(vmbus_channels, vmbus_channel);
+
+struct vmbus_channel {
+	struct vmbus_softc		*ch_sc;
+	device_t			ch_dev;
+	u_int				ch_refs;
+
+	int				ch_state;
+#define  VMBUS_CHANSTATE_OFFERED	1
+#define  VMBUS_CHANSTATE_OPENED		2
+#define  VMBUS_CHANSTATE_CLOSING	3
+#define  VMBUS_CHANSTATE_CLOSED		4
+	uint32_t			ch_id;
+	uint16_t			ch_subidx;
+
+	struct hyperv_guid		ch_type;
+	struct hyperv_guid		ch_inst;
+	char				ch_ident[38];
+
+	void				*ch_ring;
+	uint32_t			ch_ring_gpadl;
+	u_long				ch_ring_size;
+	struct hyperv_dma		ch_ring_dma;
+
+	struct vmbus_ring_data		ch_wrd;
+	struct vmbus_ring_data		ch_rrd;
+
+	int				ch_cpuid;
+	uint32_t			ch_vcpu;
+
+	void				(*ch_handler)(void *);
+	void				*ch_ctx;
+	struct evcnt			ch_evcnt;
+	void				*ch_taskq;
+
+	uint32_t			ch_flags;
+#define  CHF_BATCHED			__BIT(0)
+#define  CHF_MONITOR			__BIT(1)
+
+	uint8_t				ch_mgroup;
+	uint8_t				ch_mindex;
+	struct hyperv_mon_param		*ch_monprm;
+	struct hyperv_dma		ch_monprm_dma;
+
+	TAILQ_ENTRY(vmbus_channel)	ch_entry;
+
+	kmutex_t			ch_subchannel_lock;
+	struct vmbus_channels		ch_subchannels;
+	u_int				ch_subchannel_count;
+	TAILQ_ENTRY(vmbus_channel)	ch_subentry;
+	struct vmbus_channel		*ch_primary_channel;
+};
+
+#define VMBUS_CHAN_ISPRIMARY(chan)	((chan)->ch_subidx == 0)
+
+struct vmbus_attach_args {
+	struct hyperv_guid		*aa_type;
+	struct hyperv_guid		*aa_inst;
+	char				*aa_ident;
+	struct vmbus_channel		*aa_chan;
+};
+
+struct vmbus_dev {
+	struct vmbus_attach_args	dv_aa;
+	SLIST_ENTRY(vmbus_dev)		dv_entry;
+};
+SLIST_HEAD(vmbus_devices, vmbus_dev);
+
+struct vmbus_percpu_data {
+	void			*simp;	/* Synthetic Interrupt Message Page */
+	void			*siep;	/* Synthetic Interrupt Event Flags Page */
+	uint32_t		vcpuid;	/* Virtual cpuid */
+
+	/* Rarely used fields */
+	struct hyperv_dma	simp_dma;
+	struct hyperv_dma	siep_dma;
+} __aligned(CACHE_LINE_SIZE);
+
+struct vmbus_softc {
+	device_t		sc_dev;
+	bus_dma_tag_t		sc_dmat;
+
+	pool_cache_t		sc_msgpool;
+	void			(*sc_event_proc)(struct vmbus_softc *, int);
+	int			sc_channel_max;
+
+	void			*sc_msg_sih;
+
+	u_long			*sc_wevents;	/* Write events */
+	u_long			*sc_revents;	/* Read events */
+	struct vmbus_mnf	*sc_monitor[2];
+	struct vmbus_percpu_data sc_percpu[MAXCPUS];
+
+	/*
+	 * Rarely used fields
+	 */
+	int			sc_idtvec;
+	uint32_t		sc_flags;
+#define  VMBUS_SCFLAG_SYNIC		__BIT(0)
+#define  VMBUS_SCFLAG_CONNECTED		__BIT(1)
+#define  VMBUS_SCFLAG_OFFERS_DELIVERED	__BIT(2)
+	uint32_t		sc_proto;
+
+	/* Shared memory for Write/Read events */
+	void			*sc_events;
+	struct hyperv_dma	sc_events_dma;
+
+	struct hyperv_dma	sc_monitor_dma[2];
+
+	struct vmbus_queue 	sc_reqs;	/* Request queue */
+	kmutex_t		sc_req_lock;
+	struct vmbus_queue 	sc_rsps;	/* Response queue */
+	kmutex_t		sc_rsp_lock;
+
+	struct vmbus_offers	sc_offers;
+	kmutex_t		sc_offer_lock;
+
+	struct vmbus_channels	sc_channels;
+	kmutex_t		sc_channel_lock;
+
+	volatile uint32_t	sc_handle;
+
+	struct vmbus_devices	sc_icdevs;
+	kmutex_t		sc_icdev_lock;
+
+	struct vmbus_devices	sc_devs;
+	kmutex_t		sc_dev_lock;
+};
+
+static __inline void
+clear_bit(u_int b, volatile void *p)
+{
+	atomic_and_32(((volatile u_int *)p) + (b >> 5), ~(1 << (b & 0x1f)));
+}
+
+static __inline void
+set_bit(u_int b, volatile void *p)
+{
+	atomic_or_32(((volatile u_int *)p) + (b >> 5), 1 << (b & 0x1f));
+}
+
+static __inline int
+test_bit(u_int b, volatile void *p)
+{
+	return !!(((volatile u_int *)p)[b >> 5] & (1 << (b & 0x1f)));
+}
+
+extern const struct hyperv_guid hyperv_guid_network;
+extern const struct hyperv_guid hyperv_guid_ide;
+extern const struct hyperv_guid hyperv_guid_scsi;
+extern const struct hyperv_guid hyperv_guid_shutdown;
+extern const struct hyperv_guid hyperv_guid_timesync;
+extern const struct hyperv_guid hyperv_guid_heartbeat;
+extern const struct hyperv_guid hyperv_guid_kvp;
+extern const struct hyperv_guid hyperv_guid_vss;
+extern const struct hyperv_guid hyperv_guid_dynmem;
+extern const struct hyperv_guid hyperv_guid_mouse;
+extern const struct hyperv_guid hyperv_guid_kbd;
+extern const struct hyperv_guid hyperv_guid_video;
+extern const struct hyperv_guid hyperv_guid_fc;
+extern const struct hyperv_guid hyperv_guid_fcopy;
+extern const struct hyperv_guid hyperv_guid_pcie;
+extern const struct hyperv_guid hyperv_guid_netdir;
+extern const struct hyperv_guid hyperv_guid_rdesktop;
+extern const struct hyperv_guid hyperv_guid_avma1;
+extern const struct hyperv_guid hyperv_guid_avma2;
+extern const struct hyperv_guid hyperv_guid_avma3;
+extern const struct hyperv_guid hyperv_guid_avma4;
+
+int	vmbus_match(device_t, cfdata_t, void *);
+int	vmbus_attach(struct vmbus_softc *);
+int	vmbus_detach(struct vmbus_softc *, int);
+
+int	vmbus_handle_alloc(struct vmbus_channel *, const struct hyperv_dma *,
+	    uint32_t, uint32_t *);
+void	vmbus_handle_free(struct vmbus_channel *, uint32_t);
+int	vmbus_channel_open(struct vmbus_channel *, size_t, void *, size_t,
+	    vmbus_channel_callback_t, void *);
+int	vmbus_channel_close(struct vmbus_channel *);
+int	vmbus_channel_close_direct(struct vmbus_channel *);
+int	vmbus_channel_setdeferred(struct vmbus_channel *, const char *);
+void	vmbus_channel_schedule(struct vmbus_channel *);
+int	vmbus_channel_send(struct vmbus_channel *, void *, uint32_t, uint64_t,
+	    int, uint32_t);
+int	vmbus_channel_send_sgl(struct vmbus_channel *, struct vmbus_gpa *,
+	    uint32_t, void *, uint32_t, uint64_t);
+int	vmbus_channel_send_prpl(struct vmbus_channel *,
+	    struct vmbus_gpa_range *, uint32_t, void *, uint32_t, uint64_t);
+int	vmbus_channel_recv(struct vmbus_channel *, void *, uint32_t, uint32_t *,
+	    uint64_t *, int);
+void	vmbus_channel_cpu_set(struct vmbus_channel *, int);
+void	vmbus_channel_cpu_rr(struct vmbus_channel *);
+
+struct vmbus_channel **
+	vmbus_subchannel_get(struct vmbus_channel *, int);
+void	vmbus_subchannel_put(struct vmbus_channel **, int);
+
+#endif	/* _VMBUSVAR_H_ */
-- 
2.17.0