diff --git a/dlls/ntdll/Makefile.in b/dlls/ntdll/Makefile.in
index 37bd6c86e31..89ee0286daa 100644
--- a/dlls/ntdll/Makefile.in
+++ b/dlls/ntdll/Makefile.in
@@ -49,6 +49,7 @@ SOURCES = \
 	unix/debug.c \
 	unix/env.c \
 	unix/file.c \
+	unix/msync.c \
 	unix/loader.c \
 	unix/loadorder.c \
 	unix/process.c \
diff --git a/dlls/ntdll/unix/loader.c b/dlls/ntdll/unix/loader.c
index 0dfee19f714..e4d3d3c8e99 100644
--- a/dlls/ntdll/unix/loader.c
+++ b/dlls/ntdll/unix/loader.c
@@ -90,6 +90,7 @@
 #include "winioctl.h"
 #include "winternl.h"
 #include "unix_private.h"
+#include "msync.h"
 #include "wine/list.h"
 #include "ntsyscalls.h"
 #include "wine/debug.h"
@@ -1900,6 +1901,7 @@ static void start_main_thread(void)
     signal_alloc_thread( teb );
     dbg_init();
     startup_info_size = server_init_process();
+    msync_init();
     virtual_map_user_shared_data();
     init_cpu_info();
     init_files();
diff --git a/dlls/ntdll/unix/msync.c b/dlls/ntdll/unix/msync.c
new file mode 100644
index 00000000000..f5f1f0aa82d
--- /dev/null
+++ b/dlls/ntdll/unix/msync.c
@@ -0,0 +1,1695 @@
+/*
+ * mach semaphore-based synchronization objects
+ *
+ * Copyright (C) 2018 Zebediah Figura
+ * Copyright (C) 2023 Marc-Aurel Zent
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#if 0
+#pragma makedep unix
+#endif
+
+#include "config.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef HAVE_SYS_SYSCALL_H
+# include <sys/syscall.h>
+#endif
+#ifdef __APPLE__
+# include <mach/mach_init.h>
+# include <mach/mach_port.h>
+# include <mach/message.h>
+# include <mach/port.h>
+# include <mach/task.h>
+# include <mach/semaphore.h>
+# include <mach/mach_error.h>
+# include <servers/bootstrap.h>
+# include <os/lock.h>
+#endif
+#include <dlfcn.h>
+#include <sched.h>
+#include <unistd.h>
+
+#include "ntstatus.h"
+#define WIN32_NO_STATUS
+#include "windef.h"
+#include "winternl.h"
+#include "wine/debug.h"
+#include "wine/server.h"
+
+#include "unix_private.h"
+#include "msync.h"
+
+WINE_DEFAULT_DEBUG_CHANNEL(msync);
+
+static LONGLONG update_timeout( ULONGLONG end )
+{
+    LARGE_INTEGER now;
+    LONGLONG timeleft;
+
+    NtQuerySystemTime( &now );
+    timeleft = end - now.QuadPart;
+    if (timeleft < 0) timeleft = 0;
+    return timeleft;
+}
+
+static inline mach_timespec_t convert_to_mach_time( LONGLONG win32_time )
+{
+    mach_timespec_t ret;
+
+    ret.tv_sec = win32_time / (ULONGLONG)TICKSPERSEC;
+    ret.tv_nsec = (win32_time % TICKSPERSEC) * 100;
+    return ret;
+}
+
+#define UL_COMPARE_AND_WAIT_SHARED  0x3
+#define ULF_WAKE_ALL                0x00000100
+#define ULF_NO_ERRNO                0x01000000
+extern int __ulock_wake( uint32_t operation, void *addr, uint64_t wake_value );
+
+typedef int (*__ulock_wait2_ptr_t)( uint32_t operation, void *addr, uint64_t value,
+                                    uint64_t timeout_ns, uint64_t value2 );
+static __ulock_wait2_ptr_t __ulock_wait2;
+
+/*
+ * Faster to directly do the syscall and inline everything, taken and slightly adapted
+ * from xnu/libsyscall/mach/mach_msg.c
+ */
+
+#define LIBMACH_OPTIONS64 (MACH_SEND_INTERRUPT|MACH_RCV_INTERRUPT)
+#define MACH64_SEND_MQ_CALL 0x0000000400000000ull
+
+typedef mach_msg_return_t (*mach_msg2_trap_ptr_t)( void *data, uint64_t options,
+    uint64_t msgh_bits_and_send_size, uint64_t msgh_remote_and_local_port,
+    uint64_t msgh_voucher_and_id, uint64_t desc_count_and_rcv_name,
+    uint64_t rcv_size_and_priority, uint64_t timeout );
+
+static mach_msg2_trap_ptr_t mach_msg2_trap;
+
+static inline mach_msg_return_t mach_msg2_internal( void *data, uint64_t option64, uint64_t msgh_bits_and_send_size,
+    uint64_t msgh_remote_and_local_port, uint64_t msgh_voucher_and_id, uint64_t desc_count_and_rcv_name,
+    uint64_t rcv_size_and_priority, uint64_t timeout)
+{
+    mach_msg_return_t mr;
+
+    mr = mach_msg2_trap( data, option64 & ~LIBMACH_OPTIONS64, msgh_bits_and_send_size,
+             msgh_remote_and_local_port, msgh_voucher_and_id, desc_count_and_rcv_name,
+             rcv_size_and_priority, timeout );
+
+    if (mr == MACH_MSG_SUCCESS)
+        return MACH_MSG_SUCCESS;
+
+    while (mr == MACH_SEND_INTERRUPTED)
+        mr = mach_msg2_trap( data, option64 & ~LIBMACH_OPTIONS64, msgh_bits_and_send_size,
+                 msgh_remote_and_local_port, msgh_voucher_and_id, desc_count_and_rcv_name,
+                 rcv_size_and_priority, timeout );
+
+    while (mr == MACH_RCV_INTERRUPTED)
+        mr = mach_msg2_trap( data, option64 & ~LIBMACH_OPTIONS64, msgh_bits_and_send_size & 0xffffffffull,
+                 msgh_remote_and_local_port, msgh_voucher_and_id, desc_count_and_rcv_name,
+                 rcv_size_and_priority, timeout);
+
+    return mr;
+}
+
+/* For older versions of macOS we need to provide fallback in case there is no mach_msg2... */
+extern mach_msg_return_t mach_msg_trap( mach_msg_header_t *msg, mach_msg_option_t option,
+        mach_msg_size_t send_size, mach_msg_size_t rcv_size, mach_port_name_t rcv_name, mach_msg_timeout_t timeout,
+        mach_port_name_t notify );
+
+static inline mach_msg_return_t mach_msg2( mach_msg_header_t *data, uint64_t option64,
+    mach_msg_size_t send_size, mach_msg_size_t rcv_size, mach_port_t rcv_name, uint64_t timeout,
+    uint32_t priority)
+{
+    mach_msg_base_t *base;
+    mach_msg_size_t descriptors;
+
+    if (!mach_msg2_trap)
+        return mach_msg_trap( data, (mach_msg_option_t)option64, send_size,
+                              rcv_size, rcv_name, timeout, priority );
+
+    base = (mach_msg_base_t *)data;
+
+    if ((option64 & MACH_SEND_MSG) &&
+        (base->header.msgh_bits & MACH_MSGH_BITS_COMPLEX))
+        descriptors = base->body.msgh_descriptor_count;
+    else
+        descriptors = 0;
+
+#define MACH_MSG2_SHIFT_ARGS(lo, hi) ((uint64_t)hi << 32 | (uint32_t)lo)
+    return mach_msg2_internal(data, option64 | MACH64_SEND_MQ_CALL,
+               MACH_MSG2_SHIFT_ARGS(data->msgh_bits, send_size),
+               MACH_MSG2_SHIFT_ARGS(data->msgh_remote_port, data->msgh_local_port),
+               MACH_MSG2_SHIFT_ARGS(data->msgh_voucher_port, data->msgh_id),
+               MACH_MSG2_SHIFT_ARGS(descriptors, rcv_name),
+               MACH_MSG2_SHIFT_ARGS(rcv_size, priority), timeout);
+#undef MACH_MSG2_SHIFT_ARGS
+}
+
+/* this is a lot, but running out cripples performance */
+#define MAX_POOL_SEMAPHORES 1024
+#define POOL_SHRINK_THRESHOLD 30
+#define POOL_SHRINK_COUNT 10
+
+struct semaphore_memory_pool
+{
+    semaphore_t semaphores[MAX_POOL_SEMAPHORES];
+    semaphore_t *free_semaphores[MAX_POOL_SEMAPHORES];
+    unsigned int count;
+    unsigned int total;
+    os_unfair_lock lock;
+};
+
+static struct semaphore_memory_pool *pool;
+
+static void semaphore_pool_init(void)
+{
+    unsigned int i;
+
+    pool = malloc( sizeof(struct semaphore_memory_pool) );
+
+    pool->lock = OS_UNFAIR_LOCK_INIT;
+
+    for (i = 0; i < MAX_POOL_SEMAPHORES; i++)
+    {
+        pool->free_semaphores[i] = &pool->semaphores[i];
+    }
+
+    pool->count = 0;
+    pool->total = 0;
+}
+
+static inline semaphore_t *semaphore_pool_alloc(void)
+{
+    semaphore_t *new_semaphore;
+    kern_return_t kr;
+
+    os_unfair_lock_lock(&pool->lock);
+
+    if (pool->count == 0)
+    {
+        if (pool->total < MAX_POOL_SEMAPHORES)
+        {
+            TRACE("Dynamically growing semaphore pool\n");
+            kr = semaphore_create(mach_task_self(), &pool->semaphores[pool->total], SYNC_POLICY_FIFO, 0);
+            if (kr != KERN_SUCCESS)
+                ERR("Cannot create dynamic semaphore: %#x %s\n", kr, mach_error_string(kr));
+
+            new_semaphore = &pool->semaphores[pool->total];
+            pool->total++;
+
+            os_unfair_lock_unlock(&pool->lock);
+
+            return new_semaphore;
+        }
+        else
+        {
+            os_unfair_lock_unlock(&pool->lock);
+
+            WARN("Semaphore pool exhausted, consider increasing MAX_POOL_SEMAPHORES\n");
+            new_semaphore = malloc(sizeof(semaphore_t));
+            kr = semaphore_create(mach_task_self(), new_semaphore, SYNC_POLICY_FIFO, 0);
+            if (kr != KERN_SUCCESS)
+                ERR("Cannot create dynamic semaphore: %#x %s\n", kr, mach_error_string(kr));
+
+            return new_semaphore;
+        }
+    }
+
+    new_semaphore = pool->free_semaphores[pool->count - 1];
+    pool->count--;
+
+    os_unfair_lock_unlock(&pool->lock);
+
+    return new_semaphore;
+}
+
+static inline void semaphore_pool_free(semaphore_t *sem)
+{
+    int i;
+
+    os_unfair_lock_lock(&pool->lock);
+
+    if (sem < pool->semaphores || sem >= pool->semaphores + MAX_POOL_SEMAPHORES)
+    {
+        os_unfair_lock_unlock(&pool->lock);
+
+        semaphore_destroy(mach_task_self(), *sem);
+        free(sem);
+
+        return;
+    }
+
+    if (pool->count >= POOL_SHRINK_THRESHOLD)
+    {
+        TRACE("Dynamically shrinking semaphore pool\n");
+        for (i = 0; i < POOL_SHRINK_COUNT; i++)
+        {
+            semaphore_destroy(mach_task_self(), *sem);
+            pool->total--;
+        }
+    }
+    else
+    {
+        pool->free_semaphores[pool->count] = sem;
+        pool->count++;
+    }
+
+    os_unfair_lock_unlock(&pool->lock);
+}
+
+struct msync
+{
+    void *shm;              /* pointer to shm section */
+    enum msync_type type;
+    unsigned int shm_idx;
+};
+
+typedef struct
+{
+    mach_msg_header_t header;
+    mach_msg_body_t body;
+    mach_msg_port_descriptor_t descriptor;
+} mach_register_message_prolog_t;
+
+typedef struct
+{
+    mach_register_message_prolog_t prolog;
+    unsigned int shm_idx[MAXIMUM_WAIT_OBJECTS + 1];
+} mach_register_message_t;
+
+typedef struct
+{
+    mach_msg_header_t header;
+    unsigned int shm_idx[MAXIMUM_WAIT_OBJECTS + 1];
+} mach_unregister_message_t;
+
+static mach_port_t server_port;
+
+static const mach_msg_bits_t msgh_bits_complex_send = MACH_MSGH_BITS_SET(
+                MACH_MSG_TYPE_COPY_SEND, 0, 0, MACH_MSGH_BITS_COMPLEX);
+
+static const mach_msg_bits_t msgh_bits_send = MACH_MSGH_BITS_REMOTE(MACH_MSG_TYPE_COPY_SEND);
+
+static inline mach_msg_return_t server_register_wait( semaphore_t sem, unsigned int msgh_id,
+                                         struct msync **wait_objs, const int count )
+{
+    int i, is_mutex;
+    mach_msg_return_t mr;
+    __thread static mach_register_message_t message;
+
+    message.prolog.header.msgh_remote_port = server_port;
+    message.prolog.header.msgh_bits = msgh_bits_complex_send;
+    message.prolog.header.msgh_id = msgh_id;
+
+    message.prolog.body.msgh_descriptor_count = 1;
+
+    message.prolog.descriptor.name = sem;
+    message.prolog.descriptor.disposition = MACH_MSG_TYPE_COPY_SEND;
+    message.prolog.descriptor.type = MACH_MSG_PORT_DESCRIPTOR;
+
+    for (i = 0; i < count; i++)
+    {
+        is_mutex = wait_objs[i]->type == MSYNC_MUTEX ? 1 : 0;
+        message.shm_idx[i] = wait_objs[i]->shm_idx | (is_mutex << 28);
+        __atomic_add_fetch( (int *)(wait_objs[i]->shm) + 3, 1, __ATOMIC_SEQ_CST);
+    }
+
+    message.prolog.header.msgh_size = sizeof(mach_register_message_prolog_t) +
+                                      count * sizeof(unsigned int);
+
+    mr = mach_msg2( (mach_msg_header_t *)&message, MACH_SEND_MSG, message.prolog.header.msgh_size,
+                     0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, 0 );
+
+    if (mr != MACH_MSG_SUCCESS)
+        ERR("Failed to send server register wait: %#x\n", mr);
+
+    return mr;
+}
+
+static inline void server_remove_wait( unsigned int msgh_id, struct msync **wait_objs, const int count )
+{
+    int i;
+    mach_msg_return_t mr;
+    __thread static mach_unregister_message_t message;
+
+    message.header.msgh_remote_port = server_port;
+    message.header.msgh_bits = msgh_bits_send;
+    message.header.msgh_id = msgh_id;
+
+    for (i = 0; i < count; i++)
+    {
+        int old_val, new_val;
+        do
+        {
+            old_val = __atomic_load_n( (int *)(wait_objs[i]->shm) + 3, __ATOMIC_SEQ_CST );
+            if (old_val <= 0) break;
+            new_val = old_val - 1;
+        } while (!__atomic_compare_exchange_n( (int *)(wait_objs[i]->shm) + 3, &old_val,
+                                               new_val, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ));
+
+        message.shm_idx[i] = wait_objs[i]->shm_idx;
+    }
+
+    message.header.msgh_size = sizeof(mach_msg_header_t) +
+                               count * sizeof(unsigned int);
+
+    mr = mach_msg2( (mach_msg_header_t *)&message, MACH_SEND_MSG, message.header.msgh_size,
+                     0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, 0 );
+
+    if (mr != MACH_MSG_SUCCESS)
+        ERR("Failed to send server remove wait: %#x\n", mr);
+}
+
+static NTSTATUS destroyed_wait( ULONGLONG *end )
+{
+    if (end)
+    {
+        usleep( update_timeout( *end ) / 10 );
+        return STATUS_TIMEOUT;
+    }
+    pause();
+    return STATUS_PENDING;
+}
+
+static inline int is_destroyed( struct msync **objs, int count)
+{
+    int i;
+
+    for (i = 0; i < count; i++)
+        if (__atomic_load_n( (int *)objs[i]->shm + 2, __ATOMIC_RELAXED ))
+            return 0;
+
+    return 1;
+}
+
+static inline NTSTATUS msync_wait_single( struct msync *wait_obj,
+                                          ULONGLONG *end, int tid )
+{
+    int ret, val = 0;
+    void *addr = wait_obj->shm;
+    ULONGLONG ns_timeleft = 0;
+
+    do
+    {
+        if (wait_obj->type == MSYNC_MUTEX)
+        {
+            val = __atomic_load_n( (int *)addr, __ATOMIC_ACQUIRE );
+            if (!val || val == ~0)
+                val = tid;
+        }
+
+        if (__atomic_load_n( (int *)addr, __ATOMIC_ACQUIRE ) != val)
+            return STATUS_PENDING;
+
+        if (end)
+        {
+            ns_timeleft = update_timeout( *end ) * 100;
+            if (!ns_timeleft) return STATUS_TIMEOUT;
+        }
+        ret = __ulock_wait2( UL_COMPARE_AND_WAIT_SHARED | ULF_NO_ERRNO, addr, val, ns_timeleft, 0 );
+    } while (ret == -EINTR);
+
+    if (ret == -ETIMEDOUT)
+        return STATUS_TIMEOUT;
+
+    if (is_destroyed( &wait_obj, 1 ))
+        return destroyed_wait( end );
+
+    return STATUS_SUCCESS;
+}
+
+static inline int resize_wait_objs( struct msync **wait_objs, struct msync **objs, int count )
+{
+    int read_index, write_index = 0;
+
+    for (read_index = 0; read_index < count; read_index++)
+    {
+        if (wait_objs[read_index] &&
+            __atomic_load_n( (int *)wait_objs[read_index]->shm + 2, __ATOMIC_RELAXED ))
+        {
+            objs[write_index] = wait_objs[read_index];
+            write_index++;
+        }
+    }
+
+    return write_index;
+}
+
+static inline int check_shm_contention( struct msync **wait_objs,
+                                        int count, int tid )
+{
+    int i, val;
+
+    for (i = 0; i < count; i++)
+    {
+        val = __atomic_load_n((int *)wait_objs[i]->shm, __ATOMIC_SEQ_CST);
+        if (wait_objs[i]->type == MSYNC_MUTEX)
+        {
+            if (val == 0 || val == ~0 || val == tid) return 1;
+        }
+        else
+        {
+            if (val != 0)  return 1;
+        }
+    }
+    return 0;
+}
+
+static NTSTATUS msync_wait_multiple( struct msync **wait_objs,
+                                     int count, ULONGLONG *end, int tid )
+{
+    semaphore_t *sem;
+    kern_return_t kr;
+    mach_msg_return_t mr;
+    unsigned int msgh_id;
+    __thread static struct msync *objs[MAXIMUM_WAIT_OBJECTS + 1];
+
+    count = resize_wait_objs( wait_objs, objs, count );
+
+    if (count == 1 && __ulock_wait2) return msync_wait_single( objs[0], end, tid );
+    if (!count) return destroyed_wait( end );
+
+    if (check_shm_contention( objs, count, tid ))
+        return STATUS_PENDING;
+
+    sem = semaphore_pool_alloc();
+    msgh_id = (tid << 8) | count;
+    mr = server_register_wait( *sem, msgh_id, objs, count );
+
+    if (mr != MACH_MSG_SUCCESS)
+    {
+        /* The os failed to send the mach message, which is either because
+         * we lost the wineserver process or the semaphore from semaphore_pool_alloc
+         * is not valid...
+         * To avoid pooling in a dead port, recreate it here.
+         * Worst case, this will behave effectively like a spinlock. */
+        semaphore_destroy( mach_task_self(), *sem) ;
+        kr = semaphore_create( mach_task_self(), sem, SYNC_POLICY_FIFO, 0 );
+
+        if (kr != KERN_SUCCESS)
+        {
+            ERR("Cannot create semaphore: %#x %s\n", kr, mach_error_string(kr));
+            semaphore_pool_free( sem );
+            return STATUS_PENDING;
+        }
+
+        mr = server_register_wait( *sem, msgh_id, objs, count );
+
+        if (mr != MACH_MSG_SUCCESS)
+        {
+            semaphore_pool_free( sem );
+            return STATUS_PENDING;
+        }
+    }
+
+    do
+    {
+        if (end)
+            kr = semaphore_timedwait( *sem,
+                        convert_to_mach_time( update_timeout( *end ) ) );
+        else
+            kr = semaphore_wait( *sem );
+    } while (kr == KERN_ABORTED);
+
+    semaphore_pool_free( sem );
+
+    if (is_destroyed( objs, count ))
+        return destroyed_wait( end );
+
+    server_remove_wait( msgh_id, objs, count );
+
+    switch (kr) {
+        case KERN_SUCCESS:
+            return STATUS_SUCCESS;
+        case KERN_OPERATION_TIMED_OUT:
+            if (check_shm_contention( objs, count, tid ))
+                return STATUS_PENDING;
+            return STATUS_TIMEOUT;
+        case KERN_TERMINATED:
+            return destroyed_wait( end );
+        default:
+            ERR("Unexpected kernel return code: %#x %s\n", kr, mach_error_string( kr ));
+            return STATUS_PENDING;
+    }
+}
+
+int do_msync(void)
+{
+#ifdef __APPLE__
+    static int do_msync_cached = -1;
+
+    if (do_msync_cached == -1)
+        do_msync_cached = getenv("WINEMSYNC") && atoi(getenv("WINEMSYNC"));
+
+    return do_msync_cached;
+#else
+    static int once;
+    if (!once++)
+        FIXME("mach semaphores not supported on this platform.\n");
+    return 0;
+#endif
+}
+
+struct semaphore
+{
+    int count;
+    int max;
+};
+C_ASSERT(sizeof(struct semaphore) == 8);
+
+struct event
+{
+    int signaled;
+    int unused;
+};
+C_ASSERT(sizeof(struct event) == 8);
+
+struct mutex
+{
+    int tid;
+    int count;  /* recursion count */
+};
+C_ASSERT(sizeof(struct mutex) == 8);
+
+static char shm_name[29];
+static int shm_fd;
+static void **shm_addrs;
+static int shm_addrs_size;  /* length of the allocated shm_addrs array */
+static long pagesize;
+
+static os_unfair_lock shm_addrs_lock = OS_UNFAIR_LOCK_INIT;
+
+static void *get_shm( unsigned int idx )
+{
+    int entry  = (idx * 16) / pagesize;
+    int offset = (idx * 16) % pagesize;
+    void *ret;
+
+    os_unfair_lock_lock( &shm_addrs_lock );
+
+    if (entry >= shm_addrs_size)
+    {
+        int new_size = max(shm_addrs_size * 2, entry + 1);
+
+        if (!(shm_addrs = realloc( shm_addrs, new_size * sizeof(shm_addrs[0]) )))
+            ERR("Failed to grow shm_addrs array to size %d.\n", shm_addrs_size);
+        memset( shm_addrs + shm_addrs_size, 0, (new_size - shm_addrs_size) * sizeof(shm_addrs[0]) );
+        shm_addrs_size = new_size;
+    }
+
+    if (!shm_addrs[entry])
+    {
+        void *addr = mmap( NULL, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, entry * pagesize );
+        if (addr == (void *)-1)
+            ERR("Failed to map page %d (offset %#lx).\n", entry, entry * pagesize);
+
+        TRACE("Mapping page %d at %p.\n", entry, addr);
+
+        if (__sync_val_compare_and_swap( &shm_addrs[entry], 0, addr ))
+            munmap( addr, pagesize ); /* someone beat us to it */
+    }
+
+    ret = (void *)((unsigned long)shm_addrs[entry] + offset);
+
+    os_unfair_lock_unlock( &shm_addrs_lock );
+
+    return ret;
+}
+
+/* We'd like lookup to be fast. To that end, we use a static list indexed by handle.
+ * This is copied and adapted from the fd cache code. */
+
+#define MSYNC_LIST_BLOCK_SIZE  (65536 / sizeof(struct msync))
+#define MSYNC_LIST_ENTRIES     256
+
+static struct msync *msync_list[MSYNC_LIST_ENTRIES];
+static struct msync msync_list_initial_block[MSYNC_LIST_BLOCK_SIZE];
+
+static inline UINT_PTR handle_to_index( HANDLE handle, UINT_PTR *entry )
+{
+    UINT_PTR idx = (((UINT_PTR)handle) >> 2) - 1;
+    *entry = idx / MSYNC_LIST_BLOCK_SIZE;
+    return idx % MSYNC_LIST_BLOCK_SIZE;
+}
+
+static struct msync *add_to_list( HANDLE handle, enum msync_type type, unsigned int shm_idx )
+{
+    UINT_PTR entry, idx = handle_to_index( handle, &entry );
+    void *shm = get_shm( shm_idx );
+
+    if (entry >= MSYNC_LIST_ENTRIES)
+    {
+        FIXME( "too many allocated handles, not caching %p\n", handle );
+        return FALSE;
+    }
+
+    if (!msync_list[entry])  /* do we need to allocate a new block of entries? */
+    {
+        if (!entry) msync_list[0] = msync_list_initial_block;
+        else
+        {
+            void *ptr = anon_mmap_alloc( MSYNC_LIST_BLOCK_SIZE * sizeof(struct msync),
+                                         PROT_READ | PROT_WRITE );
+            if (ptr == MAP_FAILED) return FALSE;
+            msync_list[entry] = ptr;
+        }
+    }
+
+    if (!__sync_val_compare_and_swap((int *)&msync_list[entry][idx].type, 0, type ))
+    {
+        msync_list[entry][idx].shm = shm;
+        msync_list[entry][idx].shm_idx = shm_idx;
+    }
+
+    return &msync_list[entry][idx];
+}
+
+static struct msync *get_cached_object( HANDLE handle )
+{
+    UINT_PTR entry, idx = handle_to_index( handle, &entry );
+
+    if (entry >= MSYNC_LIST_ENTRIES || !msync_list[entry]) return NULL;
+    if (!msync_list[entry][idx].type) return NULL;
+
+    return &msync_list[entry][idx];
+}
+
+/* Gets an object. This is either a proper msync object (i.e. an event,
+ * semaphore, etc. created using create_msync) or a generic synchronizable
+ * server-side object which the server will signal (e.g. a process, thread,
+ * message queue, etc.) */
+static NTSTATUS get_object( HANDLE handle, struct msync **obj )
+{
+    NTSTATUS ret = STATUS_SUCCESS;
+    unsigned int shm_idx = 0;
+    enum msync_type type;
+
+    if ((*obj = get_cached_object( handle ))) return STATUS_SUCCESS;
+
+    if ((INT_PTR)handle < 0)
+    {
+        /* We can deal with pseudo-handles, but it's just easier this way */
+        return STATUS_NOT_IMPLEMENTED;
+    }
+
+    /* We need to try grabbing it from the server. */
+    SERVER_START_REQ( get_msync_idx )
+    {
+        req->handle = wine_server_obj_handle( handle );
+        if (!(ret = wine_server_call( req )))
+        {
+            shm_idx = reply->shm_idx;
+            type    = reply->type;
+        }
+    }
+    SERVER_END_REQ;
+
+    if (ret)
+    {
+        WARN("Failed to retrieve shm index for handle %p, status %#x.\n", handle, ret);
+        *obj = NULL;
+        return ret;
+    }
+
+    TRACE("Got shm index %d for handle %p.\n", shm_idx, handle);
+    *obj = add_to_list( handle, type, shm_idx );
+    return ret;
+}
+
+NTSTATUS msync_close( HANDLE handle )
+{
+    UINT_PTR entry, idx = handle_to_index( handle, &entry );
+
+    TRACE("%p.\n", handle);
+
+    if (entry < MSYNC_LIST_ENTRIES && msync_list[entry])
+    {
+        if (__atomic_exchange_n( &msync_list[entry][idx].type, 0, __ATOMIC_SEQ_CST ))
+            return STATUS_SUCCESS;
+    }
+
+    return STATUS_INVALID_HANDLE;
+}
+
+static NTSTATUS create_msync( enum msync_type type, HANDLE *handle,
+    ACCESS_MASK access, const OBJECT_ATTRIBUTES *attr, int low, int high )
+{
+    NTSTATUS ret;
+    data_size_t len;
+    struct object_attributes *objattr;
+    unsigned int shm_idx;
+
+    if ((ret = alloc_object_attributes( attr, &objattr, &len ))) return ret;
+
+    SERVER_START_REQ( create_msync )
+    {
+        req->access = access;
+        req->low    = low;
+        req->high   = high;
+        req->type   = type;
+        wine_server_add_data( req, objattr, len );
+        ret = wine_server_call( req );
+        if (!ret || ret == STATUS_OBJECT_NAME_EXISTS)
+        {
+            *handle = wine_server_ptr_handle( reply->handle );
+            shm_idx = reply->shm_idx;
+            type    = reply->type;
+        }
+    }
+    SERVER_END_REQ;
+
+    if (!ret || ret == STATUS_OBJECT_NAME_EXISTS)
+    {
+        add_to_list( *handle, type, shm_idx );
+        TRACE("-> handle %p, shm index %d.\n", *handle, shm_idx);
+    }
+
+    free( objattr );
+    return ret;
+}
+
+static NTSTATUS open_msync( enum msync_type type, HANDLE *handle,
+    ACCESS_MASK access, const OBJECT_ATTRIBUTES *attr )
+{
+    NTSTATUS ret;
+    unsigned int shm_idx;
+
+    SERVER_START_REQ( open_msync )
+    {
+        req->access     = access;
+        req->attributes = attr->Attributes;
+        req->rootdir    = wine_server_obj_handle( attr->RootDirectory );
+        req->type       = type;
+        if (attr->ObjectName)
+            wine_server_add_data( req, attr->ObjectName->Buffer, attr->ObjectName->Length );
+        if (!(ret = wine_server_call( req )))
+        {
+            *handle = wine_server_ptr_handle( reply->handle );
+            type = reply->type;
+            shm_idx = reply->shm_idx;
+        }
+    }
+    SERVER_END_REQ;
+
+    if (!ret)
+    {
+        add_to_list( *handle, type, shm_idx );
+        TRACE("-> handle %p, shm index %u.\n", *handle, shm_idx);
+    }
+    return ret;
+}
+
+void msync_init(void)
+{
+    struct stat st;
+    mach_port_t bootstrap_port;
+    void *dlhandle = dlopen( NULL, RTLD_NOW );
+
+    if (!do_msync())
+    {
+        /* make sure the server isn't running with WINEMSYNC */
+        HANDLE handle;
+        NTSTATUS ret;
+
+        ret = create_msync( 0, &handle, 0, NULL, 0, 0 );
+        if (ret != STATUS_NOT_IMPLEMENTED)
+        {
+            ERR("Server is running with WINEMSYNC but this process is not, please enable WINEMSYNC or restart wineserver.\n");
+            exit(1);
+        }
+
+        dlclose( dlhandle );
+        return;
+    }
+
+    if (stat( config_dir, &st ) == -1)
+        ERR("Cannot stat %s\n", config_dir);
+
+    if (st.st_ino != (unsigned long)st.st_ino)
+        sprintf( shm_name, "/wine-%lx%08lx-msync", (unsigned long)((unsigned long long)st.st_ino >> 32), (unsigned long)st.st_ino );
+    else
+        sprintf( shm_name, "/wine-%lx-msync", (unsigned long)st.st_ino );
+
+    if ((shm_fd = shm_open( shm_name, O_RDWR, 0644 )) == -1)
+    {
+        /* probably the server isn't running with WINEMSYNC, tell the user and bail */
+        if (errno == ENOENT)
+            ERR("Failed to open msync shared memory file; make sure no stale wineserver instances are running without WINEMSYNC.\n");
+        else
+            ERR("Failed to initialize shared memory: %s\n", strerror( errno ));
+        exit(1);
+    }
+
+    pagesize = sysconf( _SC_PAGESIZE );
+
+    shm_addrs = calloc( 128, sizeof(shm_addrs[0]) );
+    shm_addrs_size = 128;
+
+    semaphore_pool_init();
+
+    __ulock_wait2 = (__ulock_wait2_ptr_t)dlsym( dlhandle, "__ulock_wait2" );
+    if (!__ulock_wait2)
+        WARN("__ulock_wait2 not available, performance will be lower\n");
+
+    /* Bootstrap mach wineserver communication */
+
+    mach_msg2_trap = (mach_msg2_trap_ptr_t)dlsym( dlhandle, "mach_msg2_trap" );
+    if (!mach_msg2_trap)
+        WARN("Using mach_msg_overwrite instead of mach_msg2\n");
+    dlclose( dlhandle );
+
+    if (task_get_special_port(mach_task_self(), TASK_BOOTSTRAP_PORT, &bootstrap_port) != KERN_SUCCESS)
+    {
+        ERR("Failed task_get_special_port\n");
+        exit(1);
+    }
+
+    if (bootstrap_look_up(bootstrap_port, shm_name + 1, &server_port) != KERN_SUCCESS)
+    {
+        ERR("Failed bootstrap_look_up for %s\n", shm_name + 1);
+        exit(1);
+    }
+}
+
+NTSTATUS msync_create_semaphore( HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr, LONG initial, LONG max )
+{
+    TRACE("name %s, initial %d, max %d.\n",
+        attr ? debugstr_us(attr->ObjectName) : "<no name>", initial, max);
+
+    return create_msync( MSYNC_SEMAPHORE, handle, access, attr, initial, max );
+}
+
+NTSTATUS msync_open_semaphore( HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr )
+{
+    TRACE("name %s.\n", debugstr_us(attr->ObjectName));
+
+    return open_msync( MSYNC_SEMAPHORE, handle, access, attr );
+}
+
+static inline void signal_all( struct msync *obj )
+{
+    __thread static mach_msg_header_t send_header;
+
+    __ulock_wake( UL_COMPARE_AND_WAIT_SHARED | ULF_WAKE_ALL, obj->shm, 0 );
+
+    if (!__atomic_load_n( (int *)obj->shm + 3, __ATOMIC_ACQUIRE ))
+        return;
+
+    send_header.msgh_bits = msgh_bits_send;
+    send_header.msgh_id = obj->shm_idx;
+    send_header.msgh_size = sizeof(send_header);
+    send_header.msgh_remote_port = server_port;
+
+    mach_msg2( &send_header, MACH_SEND_MSG, send_header.msgh_size, 0,
+               MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, 0 );
+}
+
+NTSTATUS msync_release_semaphore( HANDLE handle, ULONG count, ULONG *prev )
+{
+    struct msync *obj;
+    struct semaphore *semaphore;
+    ULONG current;
+    NTSTATUS ret;
+
+    TRACE("%p, %d, %p.\n", handle, count, prev);
+
+    if ((ret = get_object( handle, &obj ))) return ret;
+    semaphore = obj->shm;
+
+    do
+    {
+        current = semaphore->count;
+        if (count + current > semaphore->max)
+            return STATUS_SEMAPHORE_LIMIT_EXCEEDED;
+    } while (__sync_val_compare_and_swap( &semaphore->count, current, count + current ) != current);
+
+    if (prev) *prev = current;
+
+    signal_all( obj );
+
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS msync_query_semaphore( HANDLE handle, void *info, ULONG *ret_len )
+{
+    struct msync *obj;
+    struct semaphore *semaphore;
+    SEMAPHORE_BASIC_INFORMATION *out = info;
+    NTSTATUS ret;
+
+    TRACE("handle %p, info %p, ret_len %p.\n", handle, info, ret_len);
+
+    if ((ret = get_object( handle, &obj ))) return ret;
+    semaphore = obj->shm;
+
+    out->CurrentCount = semaphore->count;
+    out->MaximumCount = semaphore->max;
+    if (ret_len) *ret_len = sizeof(*out);
+
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS msync_create_event( HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr, EVENT_TYPE event_type, BOOLEAN initial )
+{
+    enum msync_type type = (event_type == SynchronizationEvent ? MSYNC_AUTO_EVENT : MSYNC_MANUAL_EVENT);
+
+    TRACE("name %s, %s-reset, initial %d.\n",
+        attr ? debugstr_us(attr->ObjectName) : "<no name>",
+        event_type == NotificationEvent ? "manual" : "auto", initial);
+
+    return create_msync( type, handle, access, attr, initial, 0xdeadbeef );
+}
+
+NTSTATUS msync_open_event( HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr )
+{
+    TRACE("name %s.\n", debugstr_us(attr->ObjectName));
+
+    return open_msync( MSYNC_AUTO_EVENT, handle, access, attr );
+}
+
+NTSTATUS msync_set_event( HANDLE handle, LONG *prev )
+{
+    struct event *event;
+    struct msync *obj;
+    LONG current;
+    NTSTATUS ret;
+
+    TRACE("%p.\n", handle);
+
+    if ((ret = get_object( handle, &obj ))) return ret;
+    event = obj->shm;
+
+    if (obj->type != MSYNC_MANUAL_EVENT && obj->type != MSYNC_AUTO_EVENT)
+        return STATUS_OBJECT_TYPE_MISMATCH;
+
+    if (!(current = __atomic_exchange_n( &event->signaled, 1, __ATOMIC_SEQ_CST )))
+        signal_all( obj );
+
+    if (prev) *prev = current;
+
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS msync_reset_event( HANDLE handle, LONG *prev )
+{
+    struct event *event;
+    struct msync *obj;
+    LONG current;
+    NTSTATUS ret;
+
+    TRACE("%p.\n", handle);
+
+    if ((ret = get_object( handle, &obj ))) return ret;
+    event = obj->shm;
+
+    if (obj->type != MSYNC_MANUAL_EVENT && obj->type != MSYNC_AUTO_EVENT)
+        return STATUS_OBJECT_TYPE_MISMATCH;
+
+    current = __atomic_exchange_n( &event->signaled, 0, __ATOMIC_SEQ_CST );
+
+    if (prev) *prev = current;
+
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS msync_pulse_event( HANDLE handle, LONG *prev )
+{
+    struct event *event;
+    struct msync *obj;
+    LONG current;
+    NTSTATUS ret;
+
+    TRACE("%p.\n", handle);
+
+    if ((ret = get_object( handle, &obj ))) return ret;
+    event = obj->shm;
+
+    if (obj->type != MSYNC_MANUAL_EVENT && obj->type != MSYNC_AUTO_EVENT)
+        return STATUS_OBJECT_TYPE_MISMATCH;
+
+    /* This isn't really correct; an application could miss the write.
+     * Unfortunately we can't really do much better. Fortunately this is rarely
+     * used (and publicly deprecated). */
+    if (!(current = __atomic_exchange_n( &event->signaled, 1, __ATOMIC_SEQ_CST )))
+        signal_all( obj );
+
+    /* Try to give other threads a chance to wake up. Hopefully erring on this
+     * side is the better thing to do... */
+    sched_yield();
+
+    __atomic_store_n( &event->signaled, 0, __ATOMIC_SEQ_CST );
+
+    if (prev) *prev = current;
+
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS msync_query_event( HANDLE handle, void *info, ULONG *ret_len )
+{
+    struct event *event;
+    struct msync *obj;
+    EVENT_BASIC_INFORMATION *out = info;
+    NTSTATUS ret;
+
+    TRACE("handle %p, info %p, ret_len %p.\n", handle, info, ret_len);
+
+    if ((ret = get_object( handle, &obj ))) return ret;
+    event = obj->shm;
+
+    out->EventState = event->signaled;
+    out->EventType = (obj->type == MSYNC_AUTO_EVENT ? SynchronizationEvent : NotificationEvent);
+    if (ret_len) *ret_len = sizeof(*out);
+
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS msync_create_mutex( HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr, BOOLEAN initial )
+{
+    TRACE("name %s, initial %d.\n",
+        attr ? debugstr_us(attr->ObjectName) : "<no name>", initial);
+
+    return create_msync( MSYNC_MUTEX, handle, access, attr,
+        initial ? GetCurrentThreadId() : 0, initial ? 1 : 0 );
+}
+
+NTSTATUS msync_open_mutex( HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr )
+{
+    TRACE("name %s.\n", debugstr_us(attr->ObjectName));
+
+    return open_msync( MSYNC_MUTEX, handle, access, attr );
+}
+
+NTSTATUS msync_release_mutex( HANDLE handle, LONG *prev )
+{
+    struct mutex *mutex;
+    struct msync *obj;
+    NTSTATUS ret;
+
+    TRACE("%p, %p.\n", handle, prev);
+
+    if ((ret = get_object( handle, &obj ))) return ret;
+    mutex = obj->shm;
+
+    if (mutex->tid != GetCurrentThreadId()) return STATUS_MUTANT_NOT_OWNED;
+
+    if (prev) *prev = mutex->count;
+
+    if (!--mutex->count)
+    {
+        __atomic_store_n( &mutex->tid, 0, __ATOMIC_SEQ_CST );
+        signal_all( obj );
+    }
+
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS msync_query_mutex( HANDLE handle, void *info, ULONG *ret_len )
+{
+    struct msync *obj;
+    struct mutex *mutex;
+    MUTANT_BASIC_INFORMATION *out = info;
+    NTSTATUS ret;
+
+    TRACE("handle %p, info %p, ret_len %p.\n", handle, info, ret_len);
+
+    if ((ret = get_object( handle, &obj ))) return ret;
+    mutex = obj->shm;
+
+    out->CurrentCount = 1 - mutex->count;
+    out->OwnedByCaller = (mutex->tid == GetCurrentThreadId());
+    out->AbandonedState = (mutex->tid == ~0);
+    if (ret_len) *ret_len = sizeof(*out);
+
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS do_single_wait( struct msync *obj, ULONGLONG *end, BOOLEAN alertable, int tid )
+{
+    NTSTATUS status;
+    struct msync *wait_objs[2];
+
+    wait_objs[0] = obj;
+
+    if (alertable)
+    {
+        struct msync apc_obj;
+        int *apc_addr = ntdll_get_thread_data()->msync_apc_addr;
+
+        apc_obj.type = MSYNC_AUTO_EVENT;
+        apc_obj.shm = (void *)apc_addr;
+        apc_obj.shm_idx = ntdll_get_thread_data()->msync_apc_idx;
+
+        if (__atomic_load_n( apc_addr, __ATOMIC_SEQ_CST ))
+            return STATUS_USER_APC;
+
+        wait_objs[1] = &apc_obj;
+
+        status = msync_wait_multiple( wait_objs, 2, end, tid );
+
+        if (__atomic_load_n( apc_addr, __ATOMIC_SEQ_CST ))
+            return STATUS_USER_APC;
+    }
+    else
+    {
+        status = msync_wait_multiple( wait_objs, 1, end, tid );
+    }
+    return status;
+}
+
+static NTSTATUS __msync_wait_objects( DWORD count, const HANDLE *handles,
+    BOOLEAN wait_any, BOOLEAN alertable, const LARGE_INTEGER *timeout )
+{
+    static const LARGE_INTEGER zero = {0};
+
+    __thread static int current_tid = 0;
+    __thread static struct msync *objs[MAXIMUM_WAIT_OBJECTS + 1];
+    struct msync apc_obj;
+    int has_msync = 0, has_server = 0;
+    BOOL msgwait = FALSE;
+    LONGLONG timeleft;
+    LARGE_INTEGER now;
+    DWORD waitcount;
+    ULONGLONG end;
+    int i, ret;
+
+    current_tid = current_tid ? current_tid : GetCurrentThreadId();
+
+    /* Grab the APC idx if we don't already have it. */
+    if (alertable && !ntdll_get_thread_data()->msync_apc_addr)
+    {
+        unsigned int idx = 0;
+        SERVER_START_REQ( get_msync_apc_idx )
+        {
+            if (!(ret = wine_server_call( req )))
+                idx = reply->shm_idx;
+        }
+        SERVER_END_REQ;
+
+        if (idx)
+        {
+            struct event *apc_event = get_shm( idx );
+            ntdll_get_thread_data()->msync_apc_addr = &apc_event->signaled;
+            ntdll_get_thread_data()->msync_apc_idx = idx;
+        }
+    }
+
+    NtQuerySystemTime( &now );
+    if (timeout)
+    {
+        if (timeout->QuadPart == TIMEOUT_INFINITE)
+            timeout = NULL;
+        else if (timeout->QuadPart > 0)
+            end = timeout->QuadPart;
+        else
+            end = now.QuadPart - timeout->QuadPart;
+    }
+
+    for (i = 0; i < count; i++)
+    {
+        ret = get_object( handles[i], &objs[i] );
+        if (ret == STATUS_SUCCESS)
+            has_msync = 1;
+        else if (ret == STATUS_NOT_IMPLEMENTED)
+            has_server = 1;
+        else
+            return ret;
+    }
+
+    if (count && objs[count - 1] && objs[count - 1]->type == MSYNC_QUEUE)
+        msgwait = TRUE;
+
+    if (has_msync && has_server)
+        FIXME("Can't wait on msync and server objects at the same time!\n");
+    else if (has_server)
+        return STATUS_NOT_IMPLEMENTED;
+
+    if (TRACE_ON(msync))
+    {
+        TRACE("Waiting for %s of %d handles:", wait_any ? "any" : "all", count);
+        for (i = 0; i < count; i++)
+            TRACE(" %p", handles[i]);
+
+        if (msgwait)
+            TRACE(" or driver events");
+        if (alertable)
+            TRACE(", alertable");
+
+        if (!timeout)
+            TRACE(", timeout = INFINITE.\n");
+        else
+        {
+            timeleft = update_timeout( end );
+            TRACE(", timeout = %ld.%07ld sec.\n",
+                (long) (timeleft / TICKSPERSEC), (long) (timeleft % TICKSPERSEC));
+        }
+    }
+
+    if (wait_any || count <= 1)
+    {
+        while (1)
+        {
+            /* Try to grab anything. */
+
+            if (alertable)
+            {
+                apc_obj.type = MSYNC_AUTO_EVENT;
+                /* We must check this first! The server may set an event that
+                 * we're waiting on, but we need to return STATUS_USER_APC. */
+                if (__atomic_load_n( ntdll_get_thread_data()->msync_apc_addr, __ATOMIC_SEQ_CST ))
+                    goto userapc;
+            }
+
+            for (i = 0; i < count; i++)
+            {
+                struct msync *obj = objs[i];
+
+                if (obj)
+                {
+                    if (!obj->type) /* gcc complains if we put this in the switch */
+                    {
+                        /* Someone probably closed an object while waiting on it. */
+                        WARN("Handle %p has type 0; was it closed?\n", handles[i]);
+                        return STATUS_INVALID_HANDLE;
+                    }
+
+                    switch (obj->type)
+                    {
+                    case MSYNC_SEMAPHORE:
+                    {
+                        struct semaphore *semaphore = obj->shm;
+                        int current;
+
+                        current = __atomic_load_n(&semaphore->count, __ATOMIC_ACQUIRE);
+                        if (current && __atomic_compare_exchange_n(&semaphore->count, &current, current - 1, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))
+                        {
+                            TRACE("Woken up by handle %p [%d].\n", handles[i], i);
+                            return i;
+                        }
+                        break;
+                    }
+                    case MSYNC_MUTEX:
+                    {
+                        struct mutex *mutex = obj->shm;
+                        int tid;
+
+                        if (mutex->tid == current_tid)
+                        {
+                            TRACE("Woken up by handle %p [%d].\n", handles[i], i);
+                            mutex->count++;
+                            return i;
+                        }
+
+                        tid = 0;
+                        if (__atomic_compare_exchange_n(&mutex->tid, &tid, current_tid, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED))
+                        {
+                            TRACE("Woken up by handle %p [%d].\n", handles[i], i);
+                            mutex->count = 1;
+                            return i;
+                        }
+                        else if (tid == ~0 && __atomic_compare_exchange_n(&mutex->tid, &tid, current_tid, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED))
+                        {
+                            TRACE("Woken up by abandoned mutex %p [%d].\n", handles[i], i);
+                            mutex->count = 1;
+                            return STATUS_ABANDONED_WAIT_0 + i;
+                        }
+
+                        break;
+                    }
+                    case MSYNC_AUTO_EVENT:
+                    case MSYNC_AUTO_SERVER:
+                    {
+                        struct event *event = obj->shm;
+                        int signaled = 1;
+
+                        if (__atomic_compare_exchange_n(&event->signaled, &signaled, 0, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED))
+                        {
+                            TRACE("Woken up by handle %p [%d].\n", handles[i], i);
+                            return i;
+                        }
+
+                        break;
+                    }
+                    case MSYNC_MANUAL_EVENT:
+                    case MSYNC_MANUAL_SERVER:
+                    case MSYNC_QUEUE:
+                    {
+                        struct event *event = obj->shm;
+
+                        if (__atomic_load_n(&event->signaled, __ATOMIC_ACQUIRE))
+                        {
+                            TRACE("Woken up by handle %p [%d].\n", handles[i], i);
+                            return i;
+                        }
+                        break;
+                    }
+                    default:
+                        ERR("Invalid type %#x for handle %p.\n", obj->type, handles[i]);
+                        assert(0);
+                    }
+                }
+            }
+
+            if (alertable)
+            {
+                /* We already checked if it was signaled; don't bother doing it again. */
+                apc_obj.shm = (void *)ntdll_get_thread_data()->msync_apc_addr;
+                apc_obj.shm_idx = ntdll_get_thread_data()->msync_apc_idx;
+                objs[i] = &apc_obj;
+                i++;
+            }
+            waitcount = i;
+
+            /* Looks like everything is contended, so wait. */
+
+            if (timeout && !timeout->QuadPart)
+            {
+                /* Unlike esync, we already know that we've timed out, so we
+                 * can avoid a syscall. */
+                TRACE("Wait timed out.\n");
+                return STATUS_TIMEOUT;
+            }
+
+            ret = msync_wait_multiple( objs, waitcount, timeout ? &end : NULL, current_tid );
+
+            if (ret == STATUS_TIMEOUT)
+            {
+                TRACE("Wait timed out.\n");
+                return STATUS_TIMEOUT;
+            }
+        } /* while (1) */
+    }
+    else
+    {
+        /* Wait-all is a little trickier to implement correctly. Fortunately,
+         * it's not as common.
+         *
+         * The idea is basically just to wait in sequence on every object in the
+         * set. Then when we're done, try to grab them all in a tight loop. If
+         * that fails, release any resources we've grabbed (and yes, we can
+         * reliably do this—it's just mutexes and semaphores that we have to
+         * put back, and in both cases we just put back 1), and if any of that
+         * fails we start over.
+         *
+         * What makes this inherently bad is that we might temporarily grab a
+         * resource incorrectly. Hopefully it'll be quick (and hey, it won't
+         * block on wineserver) so nobody will notice. Besides, consider: if
+         * object A becomes signaled but someone grabs it before we can grab it
+         * and everything else, then they could just as well have grabbed it
+         * before it became signaled. Similarly if object A was signaled and we
+         * were blocking on object B, then B becomes available and someone grabs
+         * A before we can, then they might have grabbed A before B became
+         * signaled. In either case anyone who tries to wait on A or B will be
+         * waiting for an instant while we put things back. */
+
+        NTSTATUS status = STATUS_SUCCESS;
+
+        while (1)
+        {
+            BOOL abandoned;
+
+tryagain:
+            abandoned = FALSE;
+
+            /* First step: try to wait on each object in sequence. */
+
+            for (i = 0; i < count; i++)
+            {
+                struct msync *obj = objs[i];
+
+                if (obj && obj->type == MSYNC_MUTEX)
+                {
+                    struct mutex *mutex = obj->shm;
+
+                    if (mutex->tid == current_tid)
+                        continue;
+
+                    while (__atomic_load_n( &mutex->tid, __ATOMIC_SEQ_CST ))
+                    {
+                        status = do_single_wait( obj, timeout ? &end : NULL, alertable, current_tid );
+                        if (status != STATUS_PENDING)
+                            break;
+                    }
+                }
+                else if (obj)
+                {
+                    /* this works for semaphores too */
+                    struct event *event = obj->shm;
+
+                    while (!__atomic_load_n( &event->signaled, __ATOMIC_SEQ_CST ))
+                    {
+                        status = do_single_wait( obj, timeout ? &end : NULL, alertable, current_tid );
+                        if (status != STATUS_PENDING)
+                            break;
+                    }
+                }
+
+                if (status == STATUS_TIMEOUT)
+                {
+                    TRACE("Wait timed out.\n");
+                    return STATUS_TIMEOUT;
+                }
+                else if (status == STATUS_USER_APC)
+                    goto userapc;
+            }
+
+            /* If we got here and we haven't timed out, that means all of the
+             * handles were signaled. Check to make sure they still are. */
+            for (i = 0; i < count; i++)
+            {
+                struct msync *obj = objs[i];
+
+                if (obj && obj->type == MSYNC_MUTEX)
+                {
+                    struct mutex *mutex = obj->shm;
+                    int tid = __atomic_load_n( &mutex->tid, __ATOMIC_SEQ_CST );
+
+                    if (tid && tid != ~0 && tid != current_tid)
+                        goto tryagain;
+                }
+                else if (obj)
+                {
+                    struct event *event = obj->shm;
+
+                    if (!__atomic_load_n( &event->signaled, __ATOMIC_SEQ_CST ))
+                        goto tryagain;
+                }
+            }
+
+            /* Yep, still signaled. Now quick, grab everything. */
+            for (i = 0; i < count; i++)
+            {
+                struct msync *obj = objs[i];
+                if (!obj) continue;
+                switch (obj->type)
+                {
+                case MSYNC_MUTEX:
+                {
+                    struct mutex *mutex = obj->shm;
+                    int tid = __atomic_load_n( &mutex->tid, __ATOMIC_SEQ_CST );
+                    if (tid == current_tid)
+                        break;
+                    if (tid && tid != ~0)
+                        goto tooslow;
+                    if (__sync_val_compare_and_swap( &mutex->tid, tid, current_tid ) != tid)
+                        goto tooslow;
+                    if (tid == ~0)
+                        abandoned = TRUE;
+                    break;
+                }
+                case MSYNC_SEMAPHORE:
+                {
+                    struct semaphore *semaphore = obj->shm;
+                    int current;
+
+                    if (!(current = __atomic_load_n( &semaphore->count, __ATOMIC_SEQ_CST ))
+                            || __sync_val_compare_and_swap( &semaphore->count, current, current - 1 ) != current)
+                        goto tooslow;
+                    break;
+                }
+                case MSYNC_AUTO_EVENT:
+                case MSYNC_AUTO_SERVER:
+                {
+                    struct event *event = obj->shm;
+                    if (!__sync_val_compare_and_swap( &event->signaled, 1, 0 ))
+                        goto tooslow;
+                    break;
+                }
+                default:
+                    /* If a manual-reset event changed between there and
+                     * here, it's shouldn't be a problem. */
+                    break;
+                }
+            }
+
+            /* If we got here, we successfully waited on every object.
+             * Make sure to let ourselves know that we grabbed the mutexes. */
+            for (i = 0; i < count; i++)
+            {
+                if (objs[i] && objs[i]->type == MSYNC_MUTEX)
+                {
+                    struct mutex *mutex = objs[i]->shm;
+                    mutex->count++;
+                }
+            }
+
+            if (abandoned)
+            {
+                TRACE("Wait successful, but some object(s) were abandoned.\n");
+                return STATUS_ABANDONED;
+            }
+            TRACE("Wait successful.\n");
+            return STATUS_SUCCESS;
+
+tooslow:
+            for (--i; i >= 0; i--)
+            {
+                struct msync *obj = objs[i];
+                if (!obj) continue;
+                switch (obj->type)
+                {
+                case MSYNC_MUTEX:
+                {
+                    struct mutex *mutex = obj->shm;
+                    /* HACK: This won't do the right thing with abandoned
+                     * mutexes, but fixing it is probably more trouble than
+                     * it's worth. */
+                    __atomic_store_n( &mutex->tid, 0, __ATOMIC_SEQ_CST );
+                    break;
+                }
+                case MSYNC_SEMAPHORE:
+                {
+                    struct semaphore *semaphore = obj->shm;
+                    __sync_fetch_and_add( &semaphore->count, 1 );
+                    break;
+                }
+                case MSYNC_AUTO_EVENT:
+                case MSYNC_AUTO_SERVER:
+                {
+                    struct event *event = obj->shm;
+                    __atomic_store_n( &event->signaled, 1, __ATOMIC_SEQ_CST );
+                    break;
+                }
+                default:
+                    /* doesn't need to be put back */
+                    break;
+                }
+            }
+        } /* while (1) */
+    } /* else (wait-all) */
+
+    assert(0);  /* shouldn't reach here... */
+
+userapc:
+    TRACE("Woken up by user APC.\n");
+
+    /* We have to make a server call anyway to get the APC to execute, so just
+     * delegate down to server_wait(). */
+    ret = server_wait( NULL, 0, SELECT_INTERRUPTIBLE | SELECT_ALERTABLE, &zero );
+
+    /* This can happen if we received a system APC, and the APC fd was woken up
+     * before we got SIGUSR1. poll() doesn't return EINTR in that case. The
+     * right thing to do seems to be to return STATUS_USER_APC anyway. */
+    if (ret == STATUS_TIMEOUT) ret = STATUS_USER_APC;
+    return ret;
+}
+
+/* Like esync, we need to let the server know when we are doing a message wait,
+ * and when we are done with one, so that all of the code surrounding hung
+ * queues works, and we also need this for WaitForInputIdle().
+ *
+ * Unlike esync, we can't wait on the queue fd itself locally. Instead we let
+ * the server do that for us, the way it normally does. This could actually
+ * work for esync too, and that might be better. */
+static void server_set_msgwait( int in_msgwait )
+{
+    SERVER_START_REQ( msync_msgwait )
+    {
+        req->in_msgwait = in_msgwait;
+        wine_server_call( req );
+    }
+    SERVER_END_REQ;
+}
+
+/* This is a very thin wrapper around the proper implementation above. The
+ * purpose is to make sure the server knows when we are doing a message wait.
+ * This is separated into a wrapper function since there are at least a dozen
+ * exit paths from msync_wait_objects(). */
+NTSTATUS msync_wait_objects( DWORD count, const HANDLE *handles, BOOLEAN wait_any,
+                             BOOLEAN alertable, const LARGE_INTEGER *timeout )
+{
+    BOOL msgwait = FALSE;
+    struct msync *obj;
+    NTSTATUS ret;
+
+    if (count && !get_object( handles[count - 1], &obj ) && obj->type == MSYNC_QUEUE)
+    {
+        msgwait = TRUE;
+        server_set_msgwait( 1 );
+    }
+
+    ret = __msync_wait_objects( count, handles, wait_any, alertable, timeout );
+
+    if (msgwait)
+        server_set_msgwait( 0 );
+
+    return ret;
+}
+
+NTSTATUS msync_signal_and_wait( HANDLE signal, HANDLE wait, BOOLEAN alertable,
+    const LARGE_INTEGER *timeout )
+{
+    struct msync *obj;
+    NTSTATUS ret;
+
+    if ((ret = get_object( signal, &obj ))) return ret;
+
+    switch (obj->type)
+    {
+    case MSYNC_SEMAPHORE:
+        ret = msync_release_semaphore( signal, 1, NULL );
+        break;
+    case MSYNC_AUTO_EVENT:
+    case MSYNC_MANUAL_EVENT:
+        ret = msync_set_event( signal, NULL );
+        break;
+    case MSYNC_MUTEX:
+        ret = msync_release_mutex( signal, NULL );
+        break;
+    default:
+        return STATUS_OBJECT_TYPE_MISMATCH;
+    }
+    if (ret) return ret;
+
+    return msync_wait_objects( 1, &wait, TRUE, alertable, timeout );
+}
diff --git a/dlls/ntdll/unix/msync.h b/dlls/ntdll/unix/msync.h
new file mode 100644
index 00000000000..3e6eb2c8b1e
--- /dev/null
+++ b/dlls/ntdll/unix/msync.h
@@ -0,0 +1,50 @@
+/*
+ * mach semaphore-based synchronization objects
+ *
+ * Copyright (C) 2018 Zebediah Figura
+ * Copyright (C) 2023 Marc-Aurel Zent
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+extern int do_msync(void);
+extern void msync_init(void);
+extern NTSTATUS msync_close( HANDLE handle );
+
+extern NTSTATUS msync_create_semaphore(HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr, LONG initial, LONG max);
+extern NTSTATUS msync_release_semaphore( HANDLE handle, ULONG count, ULONG *prev );
+extern NTSTATUS msync_open_semaphore( HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr );
+extern NTSTATUS msync_query_semaphore( HANDLE handle, void *info, ULONG *ret_len );
+extern NTSTATUS msync_create_event( HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr, EVENT_TYPE type, BOOLEAN initial );
+extern NTSTATUS msync_open_event( HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr );
+extern NTSTATUS msync_set_event( HANDLE handle, LONG *prev );
+extern NTSTATUS msync_reset_event( HANDLE handle, LONG *prev );
+extern NTSTATUS msync_pulse_event( HANDLE handle, LONG *prev );
+extern NTSTATUS msync_query_event( HANDLE handle, void *info, ULONG *ret_len );
+extern NTSTATUS msync_create_mutex( HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr, BOOLEAN initial );
+extern NTSTATUS msync_open_mutex( HANDLE *handle, ACCESS_MASK access,
+    const OBJECT_ATTRIBUTES *attr );
+extern NTSTATUS msync_release_mutex( HANDLE handle, LONG *prev );
+extern NTSTATUS msync_query_mutex( HANDLE handle, void *info, ULONG *ret_len );
+
+extern NTSTATUS msync_wait_objects( DWORD count, const HANDLE *handles, BOOLEAN wait_any,
+                                    BOOLEAN alertable, const LARGE_INTEGER *timeout );
+extern NTSTATUS msync_signal_and_wait( HANDLE signal, HANDLE wait,
+    BOOLEAN alertable, const LARGE_INTEGER *timeout );
diff --git a/dlls/ntdll/unix/server.c b/dlls/ntdll/unix/server.c
index f03d008e2ae..27ffbdf3001 100644
--- a/dlls/ntdll/unix/server.c
+++ b/dlls/ntdll/unix/server.c
@@ -79,6 +79,7 @@
 #include "wine/server.h"
 #include "wine/debug.h"
 #include "unix_private.h"
+#include "msync.h"
 #include "ddk/wdm.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(server);
@@ -1855,6 +1856,9 @@ NTSTATUS WINAPI NtClose( HANDLE handle )
      * retrieve it again */
     fd = remove_fd_from_cache( handle );
 
+    if (do_msync())
+        msync_close( handle );
+
     SERVER_START_REQ( close_handle )
     {
         req->handle = wine_server_obj_handle( handle );
diff --git a/dlls/ntdll/unix/sync.c b/dlls/ntdll/unix/sync.c
index 80f82c18730..2781384229e 100644
--- a/dlls/ntdll/unix/sync.c
+++ b/dlls/ntdll/unix/sync.c
@@ -66,6 +66,7 @@
 #include "wine/server.h"
 #include "wine/debug.h"
 #include "unix_private.h"
+#include "msync.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(sync);
 
@@ -315,6 +316,9 @@ NTSTATUS WINAPI NtCreateSemaphore( HANDLE *handle, ACCESS_MASK access, const OBJ
     if (max <= 0 || initial < 0 || initial > max) return STATUS_INVALID_PARAMETER;
     if ((ret = alloc_object_attributes( attr, &objattr, &len ))) return ret;
 
+    if (do_msync())
+        return msync_create_semaphore( handle, access, attr, initial, max );
+
     SERVER_START_REQ( create_semaphore )
     {
         req->access  = access;
@@ -339,6 +343,10 @@ NTSTATUS WINAPI NtOpenSemaphore( HANDLE *handle, ACCESS_MASK access, const OBJEC
     unsigned int ret;
 
     *handle = 0;
+
+    if (do_msync())
+        return msync_open_semaphore( handle, access, attr );
+
     if ((ret = validate_open_object_attributes( attr ))) return ret;
 
     SERVER_START_REQ( open_semaphore )
@@ -375,6 +383,9 @@ NTSTATUS WINAPI NtQuerySemaphore( HANDLE handle, SEMAPHORE_INFORMATION_CLASS cla
 
     if (len != sizeof(SEMAPHORE_BASIC_INFORMATION)) return STATUS_INFO_LENGTH_MISMATCH;
 
+    if (do_msync())
+        return msync_query_semaphore( handle, info, ret_len );
+
     SERVER_START_REQ( query_semaphore )
     {
         req->handle = wine_server_obj_handle( handle );
@@ -397,6 +408,9 @@ NTSTATUS WINAPI NtReleaseSemaphore( HANDLE handle, ULONG count, ULONG *previous
 {
     unsigned int ret;
 
+    if (do_msync())
+        return msync_release_semaphore( handle, count, previous );
+
     SERVER_START_REQ( release_semaphore )
     {
         req->handle = wine_server_obj_handle( handle );
@@ -423,6 +437,10 @@ NTSTATUS WINAPI NtCreateEvent( HANDLE *handle, ACCESS_MASK access, const OBJECT_
 
     *handle = 0;
     if (type != NotificationEvent && type != SynchronizationEvent) return STATUS_INVALID_PARAMETER;
+
+    if (do_msync())
+        return msync_create_event( handle, access, attr, type, state );
+
     if ((ret = alloc_object_attributes( attr, &objattr, &len ))) return ret;
 
     SERVER_START_REQ( create_event )
@@ -451,6 +469,9 @@ NTSTATUS WINAPI NtOpenEvent( HANDLE *handle, ACCESS_MASK access, const OBJECT_AT
     *handle = 0;
     if ((ret = validate_open_object_attributes( attr ))) return ret;
 
+    if (do_msync())
+        return msync_open_event( handle, access, attr );
+
     SERVER_START_REQ( open_event )
     {
         req->access     = access;
@@ -473,6 +494,9 @@ NTSTATUS WINAPI NtSetEvent( HANDLE handle, LONG *prev_state )
 {
     unsigned int ret;
 
+    if (do_msync())
+        return msync_set_event( handle, prev_state );
+
     SERVER_START_REQ( event_op )
     {
         req->handle = wine_server_obj_handle( handle );
@@ -492,6 +516,10 @@ NTSTATUS WINAPI NtResetEvent( HANDLE handle, LONG *prev_state )
 {
     unsigned int ret;
 
+    if (do_msync())
+        return msync_reset_event( handle, prev_state );
+
+
     SERVER_START_REQ( event_op )
     {
         req->handle = wine_server_obj_handle( handle );
@@ -521,6 +549,9 @@ NTSTATUS WINAPI NtPulseEvent( HANDLE handle, LONG *prev_state )
 {
     unsigned int ret;
 
+    if (do_msync())
+        return msync_pulse_event( handle, prev_state );
+
     SERVER_START_REQ( event_op )
     {
         req->handle = wine_server_obj_handle( handle );
@@ -552,6 +583,9 @@ NTSTATUS WINAPI NtQueryEvent( HANDLE handle, EVENT_INFORMATION_CLASS class,
 
     if (len != sizeof(EVENT_BASIC_INFORMATION)) return STATUS_INFO_LENGTH_MISMATCH;
 
+    if (do_msync())
+        return msync_query_event( handle, info, ret_len );
+
     SERVER_START_REQ( query_event )
     {
         req->handle = wine_server_obj_handle( handle );
@@ -578,6 +612,10 @@ NTSTATUS WINAPI NtCreateMutant( HANDLE *handle, ACCESS_MASK access, const OBJECT
     struct object_attributes *objattr;
 
     *handle = 0;
+
+    if (do_msync())
+        return msync_create_mutex( handle, access, attr, owned );
+
     if ((ret = alloc_object_attributes( attr, &objattr, &len ))) return ret;
 
     SERVER_START_REQ( create_mutex )
@@ -605,6 +643,9 @@ NTSTATUS WINAPI NtOpenMutant( HANDLE *handle, ACCESS_MASK access, const OBJECT_A
     *handle = 0;
     if ((ret = validate_open_object_attributes( attr ))) return ret;
 
+    if (do_msync())
+        return msync_open_mutex( handle, access, attr );
+
     SERVER_START_REQ( open_mutex )
     {
         req->access  = access;
@@ -627,6 +668,9 @@ NTSTATUS WINAPI NtReleaseMutant( HANDLE handle, LONG *prev_count )
 {
     unsigned int ret;
 
+    if (do_msync())
+        return msync_release_mutex( handle, prev_count );
+
     SERVER_START_REQ( release_mutex )
     {
         req->handle = wine_server_obj_handle( handle );
@@ -657,6 +701,9 @@ NTSTATUS WINAPI NtQueryMutant( HANDLE handle, MUTANT_INFORMATION_CLASS class,
 
     if (len != sizeof(MUTANT_BASIC_INFORMATION)) return STATUS_INFO_LENGTH_MISMATCH;
 
+    if (do_msync())
+        return msync_query_mutex( handle, info, ret_len );
+
     SERVER_START_REQ( query_mutex )
     {
         req->handle = wine_server_obj_handle( handle );
@@ -1577,6 +1624,13 @@ NTSTATUS WINAPI NtWaitForMultipleObjects( DWORD count, const HANDLE *handles, BO
 
     if (!count || count > MAXIMUM_WAIT_OBJECTS) return STATUS_INVALID_PARAMETER_1;
 
+    if (do_msync())
+    {
+        NTSTATUS ret = msync_wait_objects( count, handles, wait_any, alertable, timeout );
+        if (ret != STATUS_NOT_IMPLEMENTED)
+            return ret;
+    }
+
     if (alertable) flags |= SELECT_ALERTABLE;
     select_op.wait.op = wait_any ? SELECT_WAIT : SELECT_WAIT_ALL;
     for (i = 0; i < count; i++) select_op.wait.handles[i] = wine_server_obj_handle( handles[i] );
@@ -1602,6 +1656,9 @@ NTSTATUS WINAPI NtSignalAndWaitForSingleObject( HANDLE signal, HANDLE wait,
     select_op_t select_op;
     UINT flags = SELECT_INTERRUPTIBLE;
 
+    if (do_msync())
+        return msync_signal_and_wait( signal, wait, alertable, timeout );
+
     if (!signal) return STATUS_INVALID_HANDLE;
 
     if (alertable) flags |= SELECT_ALERTABLE;
@@ -1642,7 +1699,17 @@ NTSTATUS WINAPI NtYieldExecution(void)
 NTSTATUS WINAPI NtDelayExecution( BOOLEAN alertable, const LARGE_INTEGER *timeout )
 {
     /* if alertable, we need to query the server */
-    if (alertable) return server_wait( NULL, 0, SELECT_INTERRUPTIBLE | SELECT_ALERTABLE, timeout );
+    if (alertable)
+    {
+        if (do_msync())
+        {
+            NTSTATUS ret = msync_wait_objects( 0, NULL, TRUE, TRUE, timeout );
+            if (ret != STATUS_NOT_IMPLEMENTED)
+                return ret;
+        }
+
+        return server_wait( NULL, 0, SELECT_INTERRUPTIBLE | SELECT_ALERTABLE, timeout );
+    }
 
     if (!timeout || timeout->QuadPart == TIMEOUT_INFINITE)  /* sleep forever */
     {
diff --git a/dlls/ntdll/unix/unix_private.h b/dlls/ntdll/unix/unix_private.h
index 8815f174de9..bfdd188d828 100644
--- a/dlls/ntdll/unix/unix_private.h
+++ b/dlls/ntdll/unix/unix_private.h
@@ -103,6 +103,8 @@ struct ntdll_thread_data
 {
     void              *cpu_data[16];  /* reserved for CPU-specific data */
     void              *kernel_stack;  /* stack for thread startup and kernel syscalls */
+    int               *msync_apc_addr;
+    unsigned int       msync_apc_idx;
     int                request_fd;    /* fd for sending server requests */
     int                reply_fd;      /* fd for receiving server replies */
     int                wait_fd[2];    /* fd for sleeping server requests */
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c
index bbe82086085..1f958a0b4e3 100644
--- a/dlls/ntdll/unix/virtual.c
+++ b/dlls/ntdll/unix/virtual.c
@@ -3722,6 +3722,8 @@ static TEB *init_teb( void *ptr, BOOL is_wow )
     teb->StaticUnicodeString.Buffer = teb->StaticUnicodeBuffer;
     teb->StaticUnicodeString.MaximumLength = sizeof(teb->StaticUnicodeBuffer);
     thread_data = (struct ntdll_thread_data *)&teb->GdiTebBatch;
+    thread_data->msync_apc_addr = NULL;
+    thread_data->msync_apc_idx = 0;
     thread_data->request_fd = -1;
     thread_data->reply_fd   = -1;
     thread_data->wait_fd[0] = -1;
diff --git a/include/wine/server_protocol.h b/include/wine/server_protocol.h
index 0a9d9516f64..8d1b18367c1 100644
--- a/include/wine/server_protocol.h
+++ b/include/wine/server_protocol.h
@@ -5868,6 +5868,92 @@ struct set_keyboard_repeat_reply
     char __pad_12[4];
 };
 
+enum msync_type
+{
+    MSYNC_SEMAPHORE = 1,
+    MSYNC_AUTO_EVENT,
+    MSYNC_MANUAL_EVENT,
+    MSYNC_MUTEX,
+    MSYNC_AUTO_SERVER,
+    MSYNC_MANUAL_SERVER,
+    MSYNC_QUEUE,
+};
+
+
+struct create_msync_request
+{
+    struct request_header __header;
+    unsigned int access;
+    int low;
+    int high;
+    int type;
+    /* VARARG(objattr,object_attributes); */
+    char __pad_28[4];
+};
+struct create_msync_reply
+{
+    struct reply_header __header;
+    obj_handle_t handle;
+    int type;
+    unsigned int shm_idx;
+    char __pad_20[4];
+};
+
+
+struct open_msync_request
+{
+    struct request_header __header;
+    unsigned int access;
+    unsigned int attributes;
+    obj_handle_t rootdir;
+    int          type;
+    /* VARARG(name,unicode_str); */
+    char __pad_28[4];
+};
+struct open_msync_reply
+{
+    struct reply_header __header;
+    obj_handle_t handle;
+    int          type;
+    unsigned int shm_idx;
+    char __pad_20[4];
+};
+
+
+struct get_msync_idx_request
+{
+    struct request_header __header;
+    obj_handle_t handle;
+};
+struct get_msync_idx_reply
+{
+    struct reply_header __header;
+    int          type;
+    unsigned int shm_idx;
+};
+
+struct msync_msgwait_request
+{
+    struct request_header __header;
+    int          in_msgwait;
+};
+struct msync_msgwait_reply
+{
+    struct reply_header __header;
+};
+
+struct get_msync_apc_idx_request
+{
+    struct request_header __header;
+    char __pad_12[4];
+};
+struct get_msync_apc_idx_reply
+{
+    struct reply_header __header;
+    unsigned int shm_idx;
+    char __pad_12[4];
+};
+
 
 enum request
 {
@@ -6162,6 +6248,11 @@ enum request
     REQ_resume_process,
     REQ_get_next_thread,
     REQ_set_keyboard_repeat,
+    REQ_create_msync,
+    REQ_open_msync,
+    REQ_get_msync_idx,
+    REQ_msync_msgwait,
+    REQ_get_msync_apc_idx,
     REQ_NB_REQUESTS
 };
 
@@ -6460,6 +6551,11 @@ union generic_request
     struct resume_process_request resume_process_request;
     struct get_next_thread_request get_next_thread_request;
     struct set_keyboard_repeat_request set_keyboard_repeat_request;
+    struct create_msync_request create_msync_request;
+    struct open_msync_request open_msync_request;
+    struct get_msync_idx_request get_msync_idx_request;
+    struct msync_msgwait_request msync_msgwait_request;
+    struct get_msync_apc_idx_request get_msync_apc_idx_request;
 };
 union generic_reply
 {
@@ -6756,11 +6852,16 @@ union generic_reply
     struct resume_process_reply resume_process_reply;
     struct get_next_thread_reply get_next_thread_reply;
     struct set_keyboard_repeat_reply set_keyboard_repeat_reply;
+    struct create_msync_reply create_msync_reply;
+    struct open_msync_reply open_msync_reply;
+    struct get_msync_idx_reply get_msync_idx_reply;
+    struct msync_msgwait_reply msync_msgwait_reply;
+    struct get_msync_apc_idx_reply get_msync_apc_idx_reply;
 };
 
 /* ### protocol_version begin ### */
 
-#define SERVER_PROTOCOL_VERSION 847
+#define SERVER_PROTOCOL_VERSION 851
 
 /* ### protocol_version end ### */
 
diff --git a/server/Makefile.in b/server/Makefile.in
index 7e571ac2ba6..dabe3fe83a8 100644
--- a/server/Makefile.in
+++ b/server/Makefile.in
@@ -14,6 +14,7 @@ SOURCES = \
 	event.c \
 	fd.c \
 	file.c \
+	msync.c \
 	handle.c \
 	hook.c \
 	mach.c \
diff --git a/server/change.c b/server/change.c
index f42ce066340..7e94edf8eaa 100644
--- a/server/change.c
+++ b/server/change.c
@@ -125,7 +125,8 @@ static const struct object_ops dir_ops =
     no_open_file,             /* open_file */
     no_kernel_obj_list,       /* get_kernel_obj_list */
     dir_close_handle,         /* close_handle */
-    dir_destroy               /* destroy */
+    dir_destroy,              /* destroy */
+    default_fd_get_msync_idx  /* get_msync_idx */
 };
 
 static int dir_get_poll_events( struct fd *fd );
diff --git a/server/console.c b/server/console.c
index b64283baf4a..a9d9687dbf1 100644
--- a/server/console.c
+++ b/server/console.c
@@ -41,6 +41,7 @@
 #include "wincon.h"
 #include "winternl.h"
 #include "wine/condrv.h"
+#include "msync.h"
 
 struct screen_buffer;
 
@@ -139,11 +140,13 @@ struct console_server
     unsigned int          once_input : 1; /* flag if input thread has already been requested */
     int                   term_fd;        /* UNIX terminal fd */
     struct termios        termios;        /* original termios */
+    unsigned int          msync_idx;
 };
 
 static void console_server_dump( struct object *obj, int verbose );
 static void console_server_destroy( struct object *obj );
 static int console_server_signaled( struct object *obj, struct wait_queue_entry *entry );
+static unsigned int console_server_get_msync_idx( struct object *obj, enum msync_type *type );
 static struct fd *console_server_get_fd( struct object *obj );
 static struct object *console_server_lookup_name( struct object *obj, struct unicode_str *name,
                                                 unsigned int attr, struct object *root );
@@ -171,7 +174,8 @@ static const struct object_ops console_server_ops =
     console_server_open_file,         /* open_file */
     no_kernel_obj_list,               /* get_kernel_obj_list */
     no_close_handle,                  /* close_handle */
-    console_server_destroy            /* destroy */
+    console_server_destroy,           /* destroy */
+    console_server_get_msync_idx      /* get_msync_idx */
 };
 
 static void console_server_ioctl( struct fd *fd, ioctl_code_t code, struct async *async );
@@ -590,6 +594,10 @@ static void disconnect_console_server( struct console_server *server )
         list_remove( &call->entry );
         console_host_ioctl_terminate( call, STATUS_CANCELLED );
     }
+
+    if (do_msync())
+        msync_clear_shm( server->msync_idx );
+
     while (!list_empty( &server->read_queue ))
     {
         struct console_host_ioctl *call = LIST_ENTRY( list_head( &server->read_queue ), struct console_host_ioctl, entry );
@@ -872,6 +880,7 @@ static void console_server_destroy( struct object *obj )
     assert( obj->ops == &console_server_ops );
     disconnect_console_server( server );
     if (server->fd) release_object( server->fd );
+    if (do_msync()) msync_destroy_semaphore( server->msync_idx );
 }
 
 static struct object *console_server_lookup_name( struct object *obj, struct unicode_str *name,
@@ -913,6 +922,13 @@ static int console_server_signaled( struct object *obj, struct wait_queue_entry
     return !server->console || !list_empty( &server->queue );
 }
 
+static unsigned int console_server_get_msync_idx( struct object *obj, enum msync_type *type )
+{
+    struct console_server *server = (struct console_server*)obj;
+    *type = MSYNC_MANUAL_SERVER;
+    return server->msync_idx;
+}
+
 static struct fd *console_server_get_fd( struct object* obj )
 {
     struct console_server *server = (struct console_server*)obj;
@@ -945,6 +961,9 @@ static struct object *create_console_server( void )
     }
     allow_fd_caching(server->fd);
 
+    if (do_msync())
+        server->msync_idx = msync_alloc_shm( 0, 0 );
+
     return &server->obj;
 }
 
@@ -1557,6 +1576,10 @@ DECL_HANDLER(get_next_console_request)
         /* set result of previous ioctl */
         ioctl = LIST_ENTRY( list_head( &server->queue ), struct console_host_ioctl, entry );
         list_remove( &ioctl->entry );
+
+        if (do_msync() && list_empty( &server->queue ))
+            msync_clear_shm( server->msync_idx );
+
     }
 
     if (ioctl)
@@ -1643,5 +1666,9 @@ DECL_HANDLER(get_next_console_request)
         set_error( STATUS_PENDING );
     }
 
+    if (do_msync() && list_empty( &server->queue ))
+        msync_clear_shm( server->msync_idx );
+
+
     release_object( server );
 }
diff --git a/server/device.c b/server/device.c
index 436dac6bfe9..8274f3f0f83 100644
--- a/server/device.c
+++ b/server/device.c
@@ -38,6 +38,7 @@
 #include "handle.h"
 #include "request.h"
 #include "process.h"
+#include "msync.h"
 
 /* IRP object */
 
@@ -92,10 +93,12 @@ struct device_manager
     struct list            requests;       /* list of pending irps across all devices */
     struct irp_call       *current_call;   /* call currently executed on client side */
     struct wine_rb_tree    kernel_objects; /* map of objects that have client side pointer associated */
+    unsigned int           msync_idx;
 };
 
 static void device_manager_dump( struct object *obj, int verbose );
 static int device_manager_signaled( struct object *obj, struct wait_queue_entry *entry );
+static unsigned int device_manager_get_msync_idx( struct object *obj, enum msync_type *type );
 static void device_manager_destroy( struct object *obj );
 
 static const struct object_ops device_manager_ops =
@@ -119,7 +122,8 @@ static const struct object_ops device_manager_ops =
     no_open_file,                     /* open_file */
     no_kernel_obj_list,               /* get_kernel_obj_list */
     no_close_handle,                  /* close_handle */
-    device_manager_destroy            /* destroy */
+    device_manager_destroy,           /* destroy */
+    device_manager_get_msync_idx      /* get_msync_idx */
 };
 
 
@@ -747,6 +751,9 @@ static void delete_file( struct device_file *file )
     /* terminate all pending requests */
     LIST_FOR_EACH_ENTRY_SAFE( irp, next, &file->requests, struct irp_call, dev_entry )
     {
+        if (do_msync() && file->device->manager && list_empty( &file->device->manager->requests ))
+            msync_clear( &file->device->manager->obj );
+
         list_remove( &irp->mgr_entry );
         set_irp_result( irp, STATUS_FILE_DELETED, NULL, 0, 0 );
     }
@@ -782,6 +789,13 @@ static int device_manager_signaled( struct object *obj, struct wait_queue_entry
     return !list_empty( &manager->requests );
 }
 
+static unsigned int device_manager_get_msync_idx( struct object *obj, enum msync_type *type )
+{
+    struct device_manager *manager = (struct device_manager *)obj;
+    *type = MSYNC_MANUAL_SERVER;
+    return manager->msync_idx;
+}
+
 static void device_manager_destroy( struct object *obj )
 {
     struct device_manager *manager = (struct device_manager *)obj;
@@ -816,6 +830,9 @@ static void device_manager_destroy( struct object *obj )
         assert( !irp->file && !irp->async );
         release_object( irp );
     }
+
+    if (do_msync())
+        msync_destroy_semaphore( manager->msync_idx );
 }
 
 static struct device_manager *create_device_manager(void)
@@ -828,6 +845,9 @@ static struct device_manager *create_device_manager(void)
         list_init( &manager->devices );
         list_init( &manager->requests );
         wine_rb_init( &manager->kernel_objects, compare_kernel_object );
+
+        if (do_msync())
+            manager->msync_idx = msync_alloc_shm( 0, 0 );
     }
     return manager;
 }
@@ -1017,6 +1037,9 @@ DECL_HANDLER(get_next_device_request)
                 /* we already own the object if it's only on manager queue */
                 if (irp->file) grab_object( irp );
                 manager->current_call = irp;
+
+                if (do_msync() && list_empty( &manager->requests ))
+                    msync_clear( &manager->obj );
             }
             else close_handle( current->process, reply->next );
         }
diff --git a/server/event.c b/server/event.c
index f1b79b1b35e..8b941db7179 100644
--- a/server/event.c
+++ b/server/event.c
@@ -35,6 +35,7 @@
 #include "thread.h"
 #include "request.h"
 #include "security.h"
+#include "msync.h"
 
 static const WCHAR event_name[] = {'E','v','e','n','t'};
 
@@ -56,13 +57,16 @@ struct event
     struct list    kernel_object;   /* list of kernel object pointers */
     int            manual_reset;    /* is it a manual reset event? */
     int            signaled;        /* event has been signaled */
+    unsigned int   msync_idx;
 };
 
 static void event_dump( struct object *obj, int verbose );
 static int event_signaled( struct object *obj, struct wait_queue_entry *entry );
 static void event_satisfied( struct object *obj, struct wait_queue_entry *entry );
+static unsigned int event_get_msync_idx( struct object *obj, enum msync_type *type );
 static int event_signal( struct object *obj, unsigned int access);
 static struct list *event_get_kernel_obj_list( struct object *obj );
+static void event_destroy( struct object *obj );
 
 static const struct object_ops event_ops =
 {
@@ -85,7 +89,8 @@ static const struct object_ops event_ops =
     no_open_file,              /* open_file */
     event_get_kernel_obj_list, /* get_kernel_obj_list */
     no_close_handle,           /* close_handle */
-    no_destroy                 /* destroy */
+    event_destroy,             /* destroy */
+    event_get_msync_idx        /* get_msync_idx */
 };
 
 
@@ -150,6 +155,9 @@ struct event *create_event( struct object *root, const struct unicode_str *name,
             list_init( &event->kernel_object );
             event->manual_reset = manual_reset;
             event->signaled     = initial_state;
+
+            if (do_msync())
+                event->msync_idx = msync_alloc_shm( initial_state, 0 );
         }
     }
     return event;
@@ -157,6 +165,10 @@ struct event *create_event( struct object *root, const struct unicode_str *name,
 
 struct event *get_event_obj( struct process *process, obj_handle_t handle, unsigned int access )
 {
+    struct object *obj;
+    if (do_msync() && (obj = get_handle_obj( process, handle, access, &msync_ops)))
+        return (struct event *)obj; /* even though it's not an event */
+
     return (struct event *)get_handle_obj( process, handle, access, &event_ops );
 }
 
@@ -166,10 +178,19 @@ static void pulse_event( struct event *event )
     /* wake up all waiters if manual reset, a single one otherwise */
     wake_up( &event->obj, !event->manual_reset );
     event->signaled = 0;
+
+    if (do_msync())
+        msync_clear( &event->obj );
 }
 
 void set_event( struct event *event )
 {
+    if (do_msync() && event->obj.ops == &msync_ops)
+    {
+        msync_set_event( (struct msync *)event );
+        return;
+    }
+
     event->signaled = 1;
     /* wake up all waiters if manual reset, a single one otherwise */
     wake_up( &event->obj, !event->manual_reset );
@@ -177,7 +198,16 @@ void set_event( struct event *event )
 
 void reset_event( struct event *event )
 {
+    if (do_msync() && event->obj.ops == &msync_ops)
+    {
+        msync_reset_event( (struct msync *)event );
+        return;
+    }
+
     event->signaled = 0;
+
+    if (do_msync())
+        msync_clear( &event->obj );
 }
 
 static void event_dump( struct object *obj, int verbose )
@@ -195,6 +225,13 @@ static int event_signaled( struct object *obj, struct wait_queue_entry *entry )
     return event->signaled;
 }
 
+static unsigned int event_get_msync_idx( struct object *obj, enum msync_type *type )
+{
+    struct event *event = (struct event *)obj;
+    *type = MSYNC_MANUAL_SERVER;
+    return event->msync_idx;
+}
+
 static void event_satisfied( struct object *obj, struct wait_queue_entry *entry )
 {
     struct event *event = (struct event *)obj;
@@ -223,6 +260,14 @@ static struct list *event_get_kernel_obj_list( struct object *obj )
     return &event->kernel_object;
 }
 
+static void event_destroy( struct object *obj )
+{
+    struct event *event = (struct event *)obj;
+
+    if (do_msync())
+        msync_destroy_semaphore( event->msync_idx );
+}
+
 struct keyed_event *create_keyed_event( struct object *root, const struct unicode_str *name,
                                         unsigned int attr, const struct security_descriptor *sd )
 {
diff --git a/server/fd.c b/server/fd.c
index 04688c5eb0d..83a042137f0 100644
--- a/server/fd.c
+++ b/server/fd.c
@@ -94,6 +94,7 @@
 #include "handle.h"
 #include "process.h"
 #include "request.h"
+#include "msync.h"
 
 #include "winternl.h"
 #include "winioctl.h"
@@ -154,6 +155,7 @@ struct fd
     struct completion   *completion;  /* completion object attached to this fd */
     apc_param_t          comp_key;    /* completion key to set in completion events */
     unsigned int         comp_flags;  /* completion flags */
+    unsigned int         msync_idx;   /* msync shm index */
 };
 
 static void fd_dump( struct object *obj, int verbose );
@@ -1563,6 +1565,9 @@ static void fd_destroy( struct object *obj )
         if (fd->unix_fd != -1) close( fd->unix_fd );
         free( fd->unix_name );
     }
+
+    if (do_msync())
+        msync_destroy_semaphore( fd->msync_idx );
 }
 
 /* check if the desired access is possible without violating */
@@ -1681,12 +1686,16 @@ static struct fd *alloc_fd_object(void)
     fd->poll_index = -1;
     fd->completion = NULL;
     fd->comp_flags = 0;
+    fd->msync_idx  = 0;
     init_async_queue( &fd->read_q );
     init_async_queue( &fd->write_q );
     init_async_queue( &fd->wait_q );
     list_init( &fd->inode_entry );
     list_init( &fd->locks );
 
+    if (do_msync())
+        fd->msync_idx = msync_alloc_shm( 1, 0 );
+
     if ((fd->poll_index = add_poll_user( fd )) == -1)
     {
         release_object( fd );
@@ -1722,11 +1731,15 @@ struct fd *alloc_pseudo_fd( const struct fd_ops *fd_user_ops, struct object *use
     fd->completion = NULL;
     fd->comp_flags = 0;
     fd->no_fd_status = STATUS_BAD_DEVICE_TYPE;
+    fd->msync_idx  = 0;
     init_async_queue( &fd->read_q );
     init_async_queue( &fd->write_q );
     init_async_queue( &fd->wait_q );
     list_init( &fd->inode_entry );
     list_init( &fd->locks );
+
+    if (do_msync())
+        fd->msync_idx = msync_alloc_shm( 0, 0 );
     return fd;
 }
 
@@ -2131,6 +2144,9 @@ void set_fd_signaled( struct fd *fd, int signaled )
     if (fd->comp_flags & FILE_SKIP_SET_EVENT_ON_HANDLE) return;
     fd->signaled = signaled;
     if (signaled) wake_up( fd->user, 0 );
+
+    if (do_msync() && !signaled)
+        msync_clear( fd->user );
 }
 
 /* check if events are pending and if yes return which one(s) */
@@ -2156,6 +2172,15 @@ int default_fd_signaled( struct object *obj, struct wait_queue_entry *entry )
     return ret;
 }
 
+unsigned int default_fd_get_msync_idx( struct object *obj, enum msync_type *type )
+{
+    struct fd *fd = get_obj_fd( obj );
+    unsigned int ret = fd->msync_idx;
+    *type = MSYNC_MANUAL_SERVER;
+    release_object( fd );
+    return ret;
+}
+
 int default_fd_get_poll_events( struct fd *fd )
 {
     int events = 0;
diff --git a/server/file.h b/server/file.h
index 3d7cdc460ff..db54555a883 100644
--- a/server/file.h
+++ b/server/file.h
@@ -108,6 +108,7 @@ extern char *dup_fd_name( struct fd *root, const char *name ) __WINE_DEALLOC(fre
 extern void get_nt_name( struct fd *fd, struct unicode_str *name );
 
 extern int default_fd_signaled( struct object *obj, struct wait_queue_entry *entry );
+extern unsigned int default_fd_get_msync_idx( struct object *obj, enum msync_type *type );
 extern int default_fd_get_poll_events( struct fd *fd );
 extern void default_poll_event( struct fd *fd, int event );
 extern void fd_cancel_async( struct fd *fd, struct async *async );
diff --git a/server/main.c b/server/main.c
index 1248b92f24d..f5dcebe8d87 100644
--- a/server/main.c
+++ b/server/main.c
@@ -34,6 +34,7 @@
 #include "thread.h"
 #include "request.h"
 #include "unicode.h"
+#include "msync.h"
 
 /* command-line options */
 int debug_level = 0;
@@ -229,6 +230,12 @@ int main( int argc, char *argv[] )
     sock_init();
     open_master_socket();
 
+    if (do_msync())
+        msync_init();
+
+    if (!do_msync() && debug_level)
+        fprintf( stderr, "wineserver: using server-side synchronization.\n" );
+
     if (debug_level) fprintf( stderr, "wineserver: starting (pid=%ld)\n", (long) getpid() );
     set_current_time();
     init_signals();
diff --git a/server/msync.c b/server/msync.c
new file mode 100644
index 00000000000..8715d38634f
--- /dev/null
+++ b/server/msync.c
@@ -0,0 +1,991 @@
+/*
+ * mach semaphore-based synchronization objects
+ *
+ * Copyright (C) 2018 Zebediah Figura
+ * Copyright (C) 2023 Marc-Aurel Zent
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <sys/mman.h>
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef __APPLE__
+# include <mach/mach_init.h>
+# include <mach/mach_port.h>
+# include <mach/message.h>
+# include <mach/port.h>
+# include <mach/task.h>
+# include <mach/semaphore.h>
+# include <mach/mach_error.h>
+# include <mach/thread_act.h>
+# include <servers/bootstrap.h>
+#endif
+#include <sched.h>
+#include <dlfcn.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+
+#include "ntstatus.h"
+#define WIN32_NO_STATUS
+#include "windef.h"
+#include "winternl.h"
+
+#include "handle.h"
+#include "request.h"
+#include "msync.h"
+
+/*
+ * We need to set the maximum allowed shared memory size early, since on
+ * XNU it is not possible to call ftruncate() more than once...
+ * This isn't a problem in practice since it is lazily allocated.
+ */
+#define MAX_INDEX 0x100000
+
+#ifdef __APPLE__
+
+#define UL_COMPARE_AND_WAIT_SHARED  0x3
+#define ULF_WAKE_ALL                0x00000100
+extern int __ulock_wake( uint32_t operation, void *addr, uint64_t wake_value );
+
+
+#define MACH_CHECK_ERROR(ret, operation) \
+    if (ret != KERN_SUCCESS) \
+        fprintf(stderr, "msync: error: %s failed with %d: %s\n", \
+            operation, ret, mach_error_string(ret));
+
+/* Private API to register a mach port with the bootstrap server */
+extern kern_return_t bootstrap_register2( mach_port_t bp, name_t service_name, mach_port_t sp, int flags );
+
+/*
+ * Faster to directly do the syscall and inline everything, taken and slightly adapted
+ * from xnu/libsyscall/mach/mach_msg.c
+ */
+
+#define LIBMACH_OPTIONS64 (MACH_SEND_INTERRUPT|MACH_RCV_INTERRUPT)
+#define MACH64_SEND_MQ_CALL 0x0000000400000000ull
+
+typedef mach_msg_return_t (*mach_msg2_trap_ptr_t)( void *data, uint64_t options,
+    uint64_t msgh_bits_and_send_size, uint64_t msgh_remote_and_local_port,
+    uint64_t msgh_voucher_and_id, uint64_t desc_count_and_rcv_name,
+    uint64_t rcv_size_and_priority, uint64_t timeout );
+
+static mach_msg2_trap_ptr_t mach_msg2_trap;
+
+static inline mach_msg_return_t mach_msg2_internal( void *data, uint64_t option64, uint64_t msgh_bits_and_send_size,
+    uint64_t msgh_remote_and_local_port, uint64_t msgh_voucher_and_id, uint64_t desc_count_and_rcv_name,
+    uint64_t rcv_size_and_priority, uint64_t timeout)
+{
+    mach_msg_return_t mr;
+
+    mr = mach_msg2_trap( data, option64 & ~LIBMACH_OPTIONS64, msgh_bits_and_send_size,
+             msgh_remote_and_local_port, msgh_voucher_and_id, desc_count_and_rcv_name,
+             rcv_size_and_priority, timeout );
+
+    if (mr == MACH_MSG_SUCCESS)
+        return MACH_MSG_SUCCESS;
+
+    while (mr == MACH_SEND_INTERRUPTED)
+        mr = mach_msg2_trap( data, option64 & ~LIBMACH_OPTIONS64, msgh_bits_and_send_size,
+                 msgh_remote_and_local_port, msgh_voucher_and_id, desc_count_and_rcv_name,
+                 rcv_size_and_priority, timeout );
+
+    while (mr == MACH_RCV_INTERRUPTED)
+        mr = mach_msg2_trap( data, option64 & ~LIBMACH_OPTIONS64, msgh_bits_and_send_size & 0xffffffffull,
+                 msgh_remote_and_local_port, msgh_voucher_and_id, desc_count_and_rcv_name,
+                 rcv_size_and_priority, timeout);
+
+    return mr;
+}
+
+/* For older versions of macOS we need to provide fallback in case there is no mach_msg2... */
+extern mach_msg_return_t mach_msg_trap( mach_msg_header_t *msg, mach_msg_option_t option,
+        mach_msg_size_t send_size, mach_msg_size_t rcv_size, mach_port_name_t rcv_name, mach_msg_timeout_t timeout,
+        mach_port_name_t notify );
+
+static inline mach_msg_return_t mach_msg2( mach_msg_header_t *data, uint64_t option64,
+    mach_msg_size_t send_size, mach_msg_size_t rcv_size, mach_port_t rcv_name, uint64_t timeout,
+    uint32_t priority)
+{
+    mach_msg_base_t *base;
+    mach_msg_size_t descriptors;
+
+    if (!mach_msg2_trap)
+        return mach_msg_trap( data, (mach_msg_option_t)option64, send_size,
+                              rcv_size, rcv_name, timeout, priority );
+
+    base = (mach_msg_base_t *)data;
+
+    if ((option64 & MACH_SEND_MSG) &&
+        (base->header.msgh_bits & MACH_MSGH_BITS_COMPLEX))
+        descriptors = base->body.msgh_descriptor_count;
+    else
+        descriptors = 0;
+
+#define MACH_MSG2_SHIFT_ARGS(lo, hi) ((uint64_t)hi << 32 | (uint32_t)lo)
+    return mach_msg2_internal(data, option64 | MACH64_SEND_MQ_CALL,
+               MACH_MSG2_SHIFT_ARGS(data->msgh_bits, send_size),
+               MACH_MSG2_SHIFT_ARGS(data->msgh_remote_port, data->msgh_local_port),
+               MACH_MSG2_SHIFT_ARGS(data->msgh_voucher_port, data->msgh_id),
+               MACH_MSG2_SHIFT_ARGS(descriptors, rcv_name),
+               MACH_MSG2_SHIFT_ARGS(rcv_size, priority), timeout);
+#undef MACH_MSG2_SHIFT_ARGS
+}
+
+static mach_port_name_t receive_port;
+
+struct sem_node
+{
+    struct sem_node *next;
+    semaphore_t sem;
+    int tid;
+};
+
+#define MAX_POOL_NODES 0x80000
+
+struct node_memory_pool
+{
+    struct sem_node *nodes;
+    struct sem_node **free_nodes;
+    unsigned int count;
+};
+
+static struct node_memory_pool *pool;
+
+static void pool_init(void)
+{
+    unsigned int i;
+    pool = malloc( sizeof(struct node_memory_pool) );
+    pool->nodes = malloc( MAX_POOL_NODES * sizeof(struct sem_node) );
+    pool->free_nodes = malloc( MAX_POOL_NODES * sizeof(struct sem_node *) );
+    pool->count = MAX_POOL_NODES;
+
+    for (i = 0; i < MAX_POOL_NODES; i++)
+        pool->free_nodes[i] = &pool->nodes[i];
+}
+
+static inline struct sem_node *pool_alloc(void)
+{
+    if (pool->count == 0)
+    {
+        fprintf( stderr, "msync: warn: node memory pool exhausted\n" );
+        return malloc( sizeof(struct sem_node) );
+    }
+    return pool->free_nodes[--pool->count];
+}
+
+static inline void pool_free( struct sem_node *node )
+{
+    if (node < pool->nodes || node >= pool->nodes + MAX_POOL_NODES)
+    {
+        free(node);
+        return;
+    }
+    pool->free_nodes[pool->count++] = node;
+}
+
+struct sem_list
+{
+    struct sem_node *head;
+};
+
+static struct sem_list mach_semaphore_map[MAX_INDEX];
+
+static inline void add_sem( unsigned int shm_idx, semaphore_t sem, int tid )
+{
+    struct sem_node *new_node;
+    struct sem_list *list = mach_semaphore_map + shm_idx;
+
+    new_node = pool_alloc();
+    new_node->sem = sem;
+    new_node->tid = tid;
+
+    new_node->next = list->head;
+    list->head = new_node;
+}
+
+static inline void remove_sem( unsigned int shm_idx, int tid )
+{
+    struct sem_node *current, *prev = NULL;
+    struct sem_list *list = mach_semaphore_map + shm_idx;
+
+    current = list->head;
+    while (current != NULL)
+    {
+        if (current->tid == tid)
+        {
+            if (prev == NULL)
+                list->head = current->next;
+            else
+                prev->next = current->next;
+            pool_free(current);
+            break;
+        }
+        prev = current;
+        current = current->next;
+    }
+}
+
+static void *get_shm( unsigned int idx );
+
+static inline void destroy_all_internal( unsigned int shm_idx )
+{
+    struct sem_node *current, *temp;
+    struct sem_list *list = mach_semaphore_map + shm_idx;
+    int *shm = get_shm( shm_idx );
+
+    __atomic_store_n( shm + 2, 0, __ATOMIC_SEQ_CST );
+    __atomic_store_n( shm + 3, 0, __ATOMIC_SEQ_CST );
+    __ulock_wake( UL_COMPARE_AND_WAIT_SHARED | ULF_WAKE_ALL, (void *)shm, 0 );
+    current = list->head;
+    list->head = NULL;
+
+    while (current)
+    {
+        semaphore_destroy( mach_task_self(), current->sem );
+        temp = current;
+        current = current->next;
+        pool_free(temp);
+    }
+}
+
+static inline void signal_all_internal( unsigned int shm_idx )
+{
+    struct sem_node *current, *temp;
+    struct sem_list *list = mach_semaphore_map + shm_idx;
+
+    current = list->head;
+    list->head = NULL;
+
+    while (current)
+    {
+        semaphore_signal( current->sem );
+        semaphore_destroy( mach_task_self(), current->sem );
+        temp = current;
+        current = current->next;
+        pool_free(temp);
+    }
+}
+
+/*
+ * thread-safe sequentially consistent guarantees relative to register/unregister
+ * client-side are made by the mach messaging queue
+ */
+static inline mach_msg_return_t destroy_all( unsigned int shm_idx )
+{
+    static mach_msg_header_t send_header;
+    send_header.msgh_bits = MACH_MSGH_BITS_REMOTE(MACH_MSG_TYPE_COPY_SEND);
+    send_header.msgh_id = shm_idx | (1 << 28);
+    send_header.msgh_size = sizeof(send_header);
+    send_header.msgh_remote_port = receive_port;
+
+    return mach_msg2( &send_header, MACH_SEND_MSG, send_header.msgh_size,
+                0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, 0);
+}
+
+static inline mach_msg_return_t signal_all( unsigned int shm_idx, int *shm )
+{
+    static mach_msg_header_t send_header;
+
+    __ulock_wake( UL_COMPARE_AND_WAIT_SHARED | ULF_WAKE_ALL, (void *)shm, 0 );
+    if (!__atomic_load_n( shm + 3, __ATOMIC_ACQUIRE ))
+        return MACH_MSG_SUCCESS;
+
+    send_header.msgh_bits = MACH_MSGH_BITS_REMOTE(MACH_MSG_TYPE_COPY_SEND);
+    send_header.msgh_id = shm_idx;
+    send_header.msgh_size = sizeof(send_header);
+    send_header.msgh_remote_port = receive_port;
+
+    return mach_msg2( &send_header, MACH_SEND_MSG, send_header.msgh_size,
+                0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, 0 );
+}
+
+typedef struct
+{
+    mach_msg_header_t header;
+    mach_msg_body_t body;
+    mach_msg_port_descriptor_t descriptor;
+    unsigned int shm_idx[MAXIMUM_WAIT_OBJECTS + 1];
+    mach_msg_trailer_t trailer;
+} mach_register_message_t;
+
+typedef struct
+{
+    mach_msg_header_t header;
+    unsigned int shm_idx[MAXIMUM_WAIT_OBJECTS + 1];
+    mach_msg_trailer_t trailer;
+} mach_unregister_message_t;
+
+static inline mach_msg_return_t receive_mach_msg( mach_register_message_t *buffer )
+{
+    return mach_msg2( (mach_msg_header_t *)buffer, MACH_RCV_MSG, 0,
+            sizeof(*buffer), receive_port, MACH_MSG_TIMEOUT_NONE, 0 );
+}
+
+static inline void decode_msgh_id( unsigned int msgh_id, unsigned int *tid, unsigned int *count )
+{
+    *tid = msgh_id >> 8;
+    *count = msgh_id & 0xFF;
+}
+
+static inline unsigned int check_bit_29( unsigned int *shm_idx )
+{
+    unsigned int bit_29 = (*shm_idx >> 28) & 1;
+    *shm_idx &= ~(1 << 28);
+    return bit_29;
+}
+
+static void *mach_message_pump( void *args )
+{
+    int i, val;
+    unsigned int tid, count, is_mutex;
+    int *addr;
+    mach_msg_return_t mr;
+    semaphore_t sem;
+    mach_register_message_t receive_message = { 0 };
+    mach_unregister_message_t *mach_unregister_message;
+    sigset_t set;
+
+    sigfillset( &set );
+    pthread_sigmask( SIG_BLOCK, &set, NULL );
+
+    for (;;)
+    {
+        mr = receive_mach_msg( &receive_message );
+        if (mr != MACH_MSG_SUCCESS)
+        {
+            fprintf( stderr, "msync: failed to receive message\n");
+            continue;
+        }
+
+        /*
+         * A message with no body is a signal_all or destroy_all operation where the shm_idx
+         * is the msgh_id and the type of operation is decided by the 20th bit.
+         * (The shared memory index is only a 28-bit integer at max)
+         * See signal_all( unsigned int shm_idx ) and destroy_all( unsigned int shm_idx )above.
+         */
+        if (receive_message.header.msgh_size == sizeof(mach_msg_header_t))
+        {
+            if (check_bit_29( (unsigned int *)&receive_message.header.msgh_id ))
+                destroy_all_internal( receive_message.header.msgh_id );
+            else
+                signal_all_internal( receive_message.header.msgh_id );
+            continue;
+        }
+
+        /*
+         * A message with a body which is not complex means this is a
+         * server_remove_wait operation
+         */
+        decode_msgh_id( receive_message.header.msgh_id, &tid, &count );
+        if (!MACH_MSGH_BITS_IS_COMPLEX(receive_message.header.msgh_bits))
+        {
+            mach_unregister_message = (mach_unregister_message_t *)&receive_message;
+            for (i = 0; i < count; i++)
+                remove_sem( mach_unregister_message->shm_idx[i], tid );
+
+            continue;
+        }
+
+        /*
+         * Finally server_register_wait
+         */
+        sem = receive_message.descriptor.name;
+        for (i = 0; i < count; i++)
+        {
+            is_mutex = check_bit_29( receive_message.shm_idx + i );
+            addr = (int *)get_shm( receive_message.shm_idx[i] );
+            val = __atomic_load_n( addr, __ATOMIC_SEQ_CST );
+            if ((is_mutex && (val == 0 || val == ~0 || val == tid)) || (!is_mutex && val != 0)
+                || !__atomic_load_n( addr + 2, __ATOMIC_SEQ_CST ))
+            {
+                /* The client had a TOCTTOU we need to fix */
+                semaphore_signal( sem );
+                semaphore_destroy( mach_task_self(), sem );
+                continue;
+            }
+            add_sem( receive_message.shm_idx[i], sem, tid );
+        }
+    }
+
+    return NULL;
+}
+
+#endif
+
+int do_msync(void)
+{
+#ifdef __APPLE__
+    static int do_msync_cached = -1;
+
+    if (do_msync_cached == -1)
+    {
+        do_msync_cached = getenv("WINEMSYNC") && atoi(getenv("WINEMSYNC"));
+    }
+
+    return do_msync_cached;
+#else
+    return 0;
+#endif
+}
+
+static char shm_name[29];
+static int shm_fd;
+static const off_t shm_size = MAX_INDEX * 16;
+static void **shm_addrs;
+static int shm_addrs_size;  /* length of the allocated shm_addrs array */
+static long pagesize;
+static pthread_t message_thread;
+
+static int is_msync_initialized;
+
+static void cleanup(void)
+{
+    close( shm_fd );
+    if (shm_unlink( shm_name ) == -1)
+        perror( "shm_unlink" );
+}
+
+static void set_thread_policy_qos( mach_port_t mach_thread_id )
+{
+    thread_extended_policy_data_t extended_policy;
+    thread_precedence_policy_data_t precedence_policy;
+    int throughput_qos, latency_qos;
+    kern_return_t kr;
+
+    latency_qos = LATENCY_QOS_TIER_0;
+    kr = thread_policy_set( mach_thread_id, THREAD_LATENCY_QOS_POLICY,
+                            (thread_policy_t)&latency_qos,
+                            THREAD_LATENCY_QOS_POLICY_COUNT);
+    if (kr != KERN_SUCCESS)
+        fprintf( stderr, "msync: error setting thread latency QoS.\n" );
+
+    throughput_qos = THROUGHPUT_QOS_TIER_0;
+    kr = thread_policy_set( mach_thread_id, THREAD_THROUGHPUT_QOS_POLICY,
+                            (thread_policy_t)&throughput_qos,
+                            THREAD_THROUGHPUT_QOS_POLICY_COUNT);
+    if (kr != KERN_SUCCESS)
+        fprintf( stderr, "msync: error setting thread throughput QoS.\n" );
+
+    extended_policy.timeshare = 0;
+    kr = thread_policy_set( mach_thread_id, THREAD_EXTENDED_POLICY,
+                            (thread_policy_t)&extended_policy,
+                            THREAD_EXTENDED_POLICY_COUNT );
+    if (kr != KERN_SUCCESS)
+        fprintf( stderr, "msync: error setting extended policy\n" );
+
+    precedence_policy.importance = 63;
+    kr = thread_policy_set( mach_thread_id, THREAD_PRECEDENCE_POLICY,
+                            (thread_policy_t)&precedence_policy,
+                            THREAD_PRECEDENCE_POLICY_COUNT );
+    if (kr != KERN_SUCCESS)
+        fprintf( stderr, "msync: error setting precedence policy\n" );
+}
+
+void msync_init(void)
+{
+#ifdef __APPLE__
+    struct stat st;
+    mach_port_t bootstrap_port;
+    mach_port_limits_t limits;
+    void *dlhandle = dlopen( NULL, RTLD_NOW );
+    int *shm;
+
+    if (fstat( config_dir_fd, &st ) == -1)
+        fatal_error( "cannot stat config dir\n" );
+
+    if (st.st_ino != (unsigned long)st.st_ino)
+        sprintf( shm_name, "/wine-%lx%08lx-msync", (unsigned long)((unsigned long long)st.st_ino >> 32), (unsigned long)st.st_ino );
+    else
+        sprintf( shm_name, "/wine-%lx-msync", (unsigned long)st.st_ino );
+
+    if (!shm_unlink( shm_name ))
+        fprintf( stderr, "msync: warning: a previous shm file %s was not properly removed\n", shm_name );
+
+    shm_fd = shm_open( shm_name, O_RDWR | O_CREAT | O_EXCL, 0644 );
+    if (shm_fd == -1)
+        perror( "shm_open" );
+
+    pagesize = sysconf( _SC_PAGESIZE );
+
+    shm_addrs = calloc( 128, sizeof(shm_addrs[0]) );
+    shm_addrs_size = 128;
+
+    if (ftruncate( shm_fd, shm_size ) == -1)
+    {
+        perror( "ftruncate" );
+        fatal_error( "could not initialize shared memory\n" );
+    }
+
+    shm = get_shm( 0 );
+    __atomic_store_n( shm + 2, 1, __ATOMIC_SEQ_CST );
+
+    /* Bootstrap mach server message pump */
+
+    mach_msg2_trap = (mach_msg2_trap_ptr_t)dlsym( dlhandle, "mach_msg2_trap" );
+    if (!mach_msg2_trap)
+        fprintf( stderr, "msync: warning: using mach_msg_overwrite instead of mach_msg2\n");
+    dlclose( dlhandle );
+
+    MACH_CHECK_ERROR(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &receive_port), "mach_port_allocate");
+
+    MACH_CHECK_ERROR(mach_port_insert_right(mach_task_self(), receive_port, receive_port, MACH_MSG_TYPE_MAKE_SEND), "mach_port_insert_right");
+
+    limits.mpl_qlimit = 50;
+
+    if (getenv("WINEMSYNC_QLIMIT"))
+        limits.mpl_qlimit = atoi(getenv("WINEMSYNC_QLIMIT"));
+
+    MACH_CHECK_ERROR(mach_port_set_attributes( mach_task_self(), receive_port, MACH_PORT_LIMITS_INFO,
+                                        (mach_port_info_t)&limits, MACH_PORT_LIMITS_INFO_COUNT), "mach_port_set_attributes");
+
+    MACH_CHECK_ERROR(task_get_special_port(mach_task_self(), TASK_BOOTSTRAP_PORT, &bootstrap_port), "task_get_special_port");
+
+    MACH_CHECK_ERROR(bootstrap_register2(bootstrap_port, shm_name + 1, receive_port, 0), "bootstrap_register2");
+
+    pool_init();
+
+    if (pthread_create( &message_thread, NULL, mach_message_pump, NULL ))
+    {
+        perror("pthread_create");
+        fatal_error( "could not create mach message pump thread\n" );
+    }
+
+    set_thread_policy_qos( pthread_mach_thread_np( message_thread )) ;
+
+    fprintf( stderr, "msync: bootstrapped mach port on %s.\n", shm_name + 1 );
+
+    is_msync_initialized = 1;
+
+    fprintf( stderr, "msync: up and running.\n" );
+
+    atexit( cleanup );
+#endif
+}
+
+static struct list mutex_list = LIST_INIT(mutex_list);
+
+struct msync
+{
+    struct object  obj;
+    unsigned int   shm_idx;
+    enum msync_type type;
+    struct list     mutex_entry;
+};
+
+static void msync_dump( struct object *obj, int verbose );
+static unsigned int msync_get_msync_idx( struct object *obj, enum msync_type *type );
+static unsigned int msync_map_access( struct object *obj, unsigned int access );
+static void msync_destroy( struct object *obj );
+
+const struct object_ops msync_ops =
+{
+    sizeof(struct msync),      /* size */
+    &no_type,                  /* type */
+    msync_dump,                /* dump */
+    no_add_queue,              /* add_queue */
+    NULL,                      /* remove_queue */
+    NULL,                      /* signaled */
+    NULL,                      /* satisfied */
+    no_signal,                 /* signal */
+    no_get_fd,                 /* get_fd */
+    msync_map_access,          /* map_access */
+    default_get_sd,            /* get_sd */
+    default_set_sd,            /* set_sd */
+    no_get_full_name,          /* get_full_name */
+    no_lookup_name,            /* lookup_name */
+    directory_link_name,       /* link_name */
+    default_unlink_name,       /* unlink_name */
+    no_open_file,              /* open_file */
+    no_kernel_obj_list,        /* get_kernel_obj_list */
+    no_close_handle,           /* close_handle */
+    msync_destroy,             /* destroy */
+    msync_get_msync_idx        /* get_msync_idx */
+};
+
+static void msync_dump( struct object *obj, int verbose )
+{
+    struct msync *msync = (struct msync *)obj;
+    assert( obj->ops == &msync_ops );
+    fprintf( stderr, "msync idx=%d\n", msync->shm_idx );
+}
+
+static unsigned int msync_get_msync_idx( struct object *obj, enum msync_type *type)
+{
+    struct msync *msync = (struct msync *)obj;
+    *type = msync->type;
+    return msync->shm_idx;
+}
+
+static unsigned int msync_map_access( struct object *obj, unsigned int access )
+{
+    /* Sync objects have the same flags. */
+    if (access & GENERIC_READ)    access |= STANDARD_RIGHTS_READ | EVENT_QUERY_STATE;
+    if (access & GENERIC_WRITE)   access |= STANDARD_RIGHTS_WRITE | EVENT_MODIFY_STATE;
+    if (access & GENERIC_EXECUTE) access |= STANDARD_RIGHTS_EXECUTE | SYNCHRONIZE;
+    if (access & GENERIC_ALL)     access |= STANDARD_RIGHTS_ALL | EVENT_QUERY_STATE | EVENT_MODIFY_STATE;
+    return access & ~(GENERIC_READ | GENERIC_WRITE | GENERIC_EXECUTE | GENERIC_ALL);
+}
+
+static void msync_destroy( struct object *obj )
+{
+    struct msync *msync = (struct msync *)obj;
+    if (msync->type == MSYNC_MUTEX)
+        list_remove( &msync->mutex_entry );
+#ifdef __APPLE__
+    msync_destroy_semaphore( msync->shm_idx );
+#endif
+}
+
+static void *get_shm( unsigned int idx )
+{
+    int entry  = (idx * 16) / pagesize;
+    int offset = (idx * 16) % pagesize;
+
+    if (entry >= shm_addrs_size)
+    {
+        int new_size = max(shm_addrs_size * 2, entry + 1);
+
+        if (!(shm_addrs = realloc( shm_addrs, new_size * sizeof(shm_addrs[0]) )))
+            fprintf( stderr, "msync: couldn't expand shm_addrs array to size %d\n", entry + 1 );
+
+        memset( shm_addrs + shm_addrs_size, 0, (new_size - shm_addrs_size) * sizeof(shm_addrs[0]) );
+
+        shm_addrs_size = new_size;
+    }
+
+    if (!shm_addrs[entry])
+    {
+        void *addr = mmap( NULL, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, entry * pagesize );
+        if (addr == (void *)-1)
+        {
+            fprintf( stderr, "msync: failed to map page %d (offset %#lx): ", entry, entry * pagesize );
+            perror( "mmap" );
+        }
+
+        if (debug_level)
+            fprintf( stderr, "msync: Mapping page %d at %p.\n", entry, addr );
+
+        if (__sync_val_compare_and_swap( &shm_addrs[entry], 0, addr ))
+            munmap( addr, pagesize ); /* someone beat us to it */
+        else
+            memset( addr, 0, pagesize );
+    }
+
+    return (void *)((unsigned long)shm_addrs[entry] + offset);
+}
+
+static unsigned int shm_idx_counter = 1;
+
+unsigned int msync_alloc_shm( int low, int high )
+{
+#ifdef __APPLE__
+    int shm_idx, tries = 0;
+    int *shm;
+
+    /* this is arguably a bit of a hack, but we need some way to prevent
+     * allocating shm for the master socket */
+    if (!is_msync_initialized)
+        return 0;
+
+    shm_idx = shm_idx_counter;
+
+    for(;;)
+    {
+        shm = get_shm( shm_idx );
+        if (!__atomic_load_n( shm + 2, __ATOMIC_SEQ_CST ))
+            break;
+
+        shm_idx = (shm_idx + 1) % MAX_INDEX;
+        if (tries++ > MAX_INDEX)
+        {
+            /* The ftruncate call can only be successfully done with a non-zero length
+             * once per shared memory region with XNU. We need to terminate now.
+             * Also, we initialized with more than what is reasonable anyways... */
+            fatal_error( "too many msync objects\n" );
+        }
+    }
+    __atomic_store_n( shm + 2, 1, __ATOMIC_SEQ_CST );
+    assert(mach_semaphore_map[shm_idx].head == NULL);
+    shm_idx_counter = (shm_idx + 1) % MAX_INDEX;
+
+
+    assert(shm);
+    shm[0] = low;
+    shm[1] = high;
+
+    return shm_idx;
+#else
+    return 0;
+#endif
+}
+
+static int type_matches( enum msync_type type1, enum msync_type type2 )
+{
+    return (type1 == type2) ||
+           ((type1 == MSYNC_AUTO_EVENT || type1 == MSYNC_MANUAL_EVENT) &&
+            (type2 == MSYNC_AUTO_EVENT || type2 == MSYNC_MANUAL_EVENT));
+}
+
+struct msync *create_msync( struct object *root, const struct unicode_str *name,
+    unsigned int attr, int low, int high, enum msync_type type,
+    const struct security_descriptor *sd )
+{
+#ifdef __APPLE__
+    struct msync *msync;
+
+    if ((msync = create_named_object( root, &msync_ops, name, attr, sd )))
+    {
+        if (get_error() != STATUS_OBJECT_NAME_EXISTS)
+        {
+            /* initialize it if it didn't already exist */
+
+            /* Initialize the shared memory portion. We want to do this on the
+             * server side to avoid a potential though unlikely race whereby
+             * the same object is opened and used between the time it's created
+             * and the time its shared memory portion is initialized. */
+
+            msync->shm_idx = msync_alloc_shm( low, high );
+            msync->type = type;
+            if (type == MSYNC_MUTEX)
+                list_add_tail( &mutex_list, &msync->mutex_entry );
+        }
+        else
+        {
+            /* validate the type */
+            if (!type_matches( type, msync->type ))
+            {
+                release_object( &msync->obj );
+                set_error( STATUS_OBJECT_TYPE_MISMATCH );
+                return NULL;
+            }
+        }
+    }
+
+    return msync;
+#else
+    set_error( STATUS_NOT_IMPLEMENTED );
+    return NULL;
+#endif
+}
+
+/* shm layout for events or event-like objects. */
+struct msync_event
+{
+    int signaled;
+    int unused;
+};
+
+void msync_signal_all( unsigned int shm_idx )
+{
+    struct msync_event *event;
+
+    if (debug_level)
+        fprintf( stderr, "msync_signal_all: index %u\n", shm_idx );
+
+    if (!shm_idx)
+        return;
+
+    event = get_shm( shm_idx );
+    if (!__atomic_exchange_n( &event->signaled, 1, __ATOMIC_SEQ_CST ))
+        signal_all( shm_idx, (int *)event );
+}
+
+void msync_wake_up( struct object *obj )
+{
+    enum msync_type type;
+
+    if (debug_level)
+        fprintf( stderr, "msync_wake_up: object %p\n", obj );
+
+    if (obj->ops->get_msync_idx)
+        msync_signal_all( obj->ops->get_msync_idx( obj, &type ) );
+}
+
+void msync_destroy_semaphore( unsigned int shm_idx )
+{
+    if (!shm_idx) return;
+
+    destroy_all( shm_idx );
+}
+
+void msync_clear_shm( unsigned int shm_idx )
+{
+    struct msync_event *event;
+
+    if (debug_level)
+        fprintf( stderr, "msync_clear_shm: index %u\n", shm_idx );
+
+    if (!shm_idx)
+        return;
+
+    event = get_shm( shm_idx );
+    __atomic_store_n( &event->signaled, 0, __ATOMIC_SEQ_CST );
+}
+
+void msync_clear( struct object *obj )
+{
+    enum msync_type type;
+
+    if (debug_level)
+        fprintf( stderr, "msync_clear: object %p\n", obj );
+
+    if (obj->ops->get_msync_idx)
+        msync_clear_shm( obj->ops->get_msync_idx( obj, &type ) );
+}
+
+void msync_set_event( struct msync *msync )
+{
+    struct msync_event *event = get_shm( msync->shm_idx );
+    assert( msync->obj.ops == &msync_ops );
+
+    if (!__atomic_exchange_n( &event->signaled, 1, __ATOMIC_SEQ_CST ))
+        signal_all( msync->shm_idx, (int *)event );
+}
+
+void msync_reset_event( struct msync *msync )
+{
+    struct msync_event *event = get_shm( msync->shm_idx );
+    assert( msync->obj.ops == &msync_ops );
+
+    __atomic_store_n( &event->signaled, 0, __ATOMIC_SEQ_CST );
+}
+
+struct mutex
+{
+    int tid;
+    int count;  /* recursion count */
+};
+
+void msync_abandon_mutexes( struct thread *thread )
+{
+    struct msync *msync;
+
+    LIST_FOR_EACH_ENTRY( msync, &mutex_list, struct msync, mutex_entry )
+    {
+        struct mutex *mutex = get_shm( msync->shm_idx );
+
+        if (mutex->tid == thread->id)
+        {
+            if (debug_level)
+                fprintf( stderr, "msync_abandon_mutexes() idx=%d\n", msync->shm_idx );
+            mutex->tid = ~0;
+            mutex->count = 0;
+            signal_all ( msync->shm_idx, (int *)mutex );
+        }
+    }
+}
+
+DECL_HANDLER(create_msync)
+{
+    struct msync *msync;
+    struct unicode_str name;
+    struct object *root;
+    const struct security_descriptor *sd;
+    const struct object_attributes *objattr = get_req_object_attributes( &sd, &name, &root );
+
+    if (!do_msync())
+    {
+        set_error( STATUS_NOT_IMPLEMENTED );
+        return;
+    }
+
+    if (!objattr) return;
+
+    if ((msync = create_msync( root, &name, objattr->attributes, req->low,
+                               req->high, req->type, sd )))
+    {
+        if (get_error() == STATUS_OBJECT_NAME_EXISTS)
+            reply->handle = alloc_handle( current->process, msync, req->access, objattr->attributes );
+        else
+            reply->handle = alloc_handle_no_access_check( current->process, msync,
+                                                          req->access, objattr->attributes );
+
+        reply->shm_idx = msync->shm_idx;
+        reply->type = msync->type;
+        release_object( msync );
+    }
+
+    if (root) release_object( root );
+}
+
+DECL_HANDLER(open_msync)
+{
+    struct unicode_str name = get_req_unicode_str();
+
+    reply->handle = open_object( current->process, req->rootdir, req->access,
+                                 &msync_ops, &name, req->attributes );
+
+    if (reply->handle)
+    {
+        struct msync *msync;
+
+        if (!(msync = (struct msync *)get_handle_obj( current->process, reply->handle,
+                                                      0, &msync_ops )))
+            return;
+
+        if (!type_matches( req->type, msync->type ))
+        {
+            set_error( STATUS_OBJECT_TYPE_MISMATCH );
+            release_object( msync );
+            return;
+        }
+
+        reply->type = msync->type;
+        reply->shm_idx = msync->shm_idx;
+        release_object( msync );
+    }
+}
+
+/* Retrieve the index of a shm section which will be signaled by the server. */
+DECL_HANDLER(get_msync_idx)
+{
+    struct object *obj;
+    enum msync_type type;
+
+    if (!(obj = get_handle_obj( current->process, req->handle, SYNCHRONIZE, NULL )))
+        return;
+
+    if (obj->ops->get_msync_idx)
+    {
+        reply->shm_idx = obj->ops->get_msync_idx( obj, &type );
+        reply->type = type;
+    }
+    else
+    {
+        if (debug_level)
+        {
+            fprintf( stderr, "%04x: msync: can't wait on object: ", current->id );
+            obj->ops->dump( obj, 0 );
+        }
+        set_error( STATUS_NOT_IMPLEMENTED );
+    }
+
+    release_object( obj );
+}
+
+DECL_HANDLER(get_msync_apc_idx)
+{
+    reply->shm_idx = current->msync_apc_idx;
+}
diff --git a/server/msync.h b/server/msync.h
new file mode 100644
index 00000000000..000aa48c53d
--- /dev/null
+++ b/server/msync.h
@@ -0,0 +1,36 @@
+/*
+ * mach semaphore-based synchronization objects
+ *
+ * Copyright (C) 2018 Zebediah Figura
+ * Copyright (C) 2023 Marc-Aurel Zent
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+extern int do_msync(void);
+extern void msync_init(void);
+extern unsigned int msync_alloc_shm( int low, int high );
+extern void msync_signal_all( unsigned int shm_idx );
+extern void msync_clear_shm( unsigned int shm_idx );
+extern void msync_destroy_semaphore( unsigned int shm_idx );
+extern void msync_wake_up( struct object *obj );
+extern void msync_clear( struct object *obj );
+
+struct msync;
+
+extern const struct object_ops msync_ops;
+extern void msync_set_event( struct msync *msync );
+extern void msync_reset_event( struct msync *msync );
+extern void msync_abandon_mutexes( struct thread *thread );
diff --git a/server/named_pipe.c b/server/named_pipe.c
index 59a90c36663..7110a3ddfcf 100644
--- a/server/named_pipe.c
+++ b/server/named_pipe.c
@@ -180,7 +180,8 @@ static const struct object_ops pipe_server_ops =
     pipe_server_open_file,        /* open_file */
     no_kernel_obj_list,           /* get_kernel_obj_list */
     async_close_obj_handle,       /* close_handle */
-    pipe_server_destroy           /* destroy */
+    pipe_server_destroy,          /* destroy */
+    default_fd_get_msync_idx      /* get_msync_idx */
 };
 
 static const struct fd_ops pipe_server_fd_ops =
@@ -224,7 +225,8 @@ static const struct object_ops pipe_client_ops =
     no_open_file,                 /* open_file */
     no_kernel_obj_list,           /* get_kernel_obj_list */
     async_close_obj_handle,       /* close_handle */
-    pipe_end_destroy              /* destroy */
+    pipe_end_destroy,             /* destroy */
+    default_fd_get_msync_idx      /* get_msync_idx */
 };
 
 static const struct fd_ops pipe_client_fd_ops =
diff --git a/server/object.h b/server/object.h
index 6222e3352ed..3d8b9e992ce 100644
--- a/server/object.h
+++ b/server/object.h
@@ -107,6 +107,8 @@ struct object_ops
     int (*close_handle)(struct object *,struct process *,obj_handle_t);
     /* destroy on refcount == 0 */
     void (*destroy)(struct object *);
+    /* return the msync shm idx for this object */
+    unsigned int (*get_msync_idx)(struct object *, enum msync_type *type);
 };
 
 struct object
diff --git a/server/process.c b/server/process.c
index 49f5c75005f..11d684746f0 100644
--- a/server/process.c
+++ b/server/process.c
@@ -63,6 +63,7 @@
 #include "request.h"
 #include "user.h"
 #include "security.h"
+#include "msync.h"
 
 /* process object */
 
@@ -95,6 +96,7 @@ static struct security_descriptor *process_get_sd( struct object *obj );
 static void process_poll_event( struct fd *fd, int event );
 static struct list *process_get_kernel_obj_list( struct object *obj );
 static void process_destroy( struct object *obj );
+static unsigned int process_get_msync_idx( struct object *obj, enum msync_type *type );
 static void terminate_process( struct process *process, struct thread *skip, int exit_code );
 
 static const struct object_ops process_ops =
@@ -118,7 +120,8 @@ static const struct object_ops process_ops =
     no_open_file,                /* open_file */
     process_get_kernel_obj_list, /* get_kernel_obj_list */
     no_close_handle,             /* close_handle */
-    process_destroy              /* destroy */
+    process_destroy,             /* destroy */
+    process_get_msync_idx        /* get_msync_idx */
 };
 
 static const struct fd_ops process_fd_ops =
@@ -684,6 +687,7 @@ struct process *create_process( int fd, struct process *parent, unsigned int fla
     process->rawinput_mouse  = NULL;
     process->rawinput_kbd    = NULL;
     memset( &process->image_info, 0, sizeof(process->image_info) );
+    process->msync_idx       = 0;
     list_init( &process->rawinput_entry );
     list_init( &process->kernel_object );
     list_init( &process->thread_list );
@@ -735,6 +739,9 @@ struct process *create_process( int fd, struct process *parent, unsigned int fla
     if (!process->handles || !process->token) goto error;
     process->session_id = token_get_session_id( process->token );
 
+    if (do_msync())
+        process->msync_idx = msync_alloc_shm( 0, 0 );
+
     set_fd_events( process->msg_fd, POLLIN );  /* start listening to events */
     return process;
 
@@ -783,6 +790,7 @@ static void process_destroy( struct object *obj )
     free( process->rawinput_devices );
     free( process->dir_cache );
     free( process->image );
+    if (do_msync()) msync_destroy_semaphore( process->msync_idx );
 }
 
 /* dump a process on stdout for debugging purposes */
@@ -800,6 +808,13 @@ static int process_signaled( struct object *obj, struct wait_queue_entry *entry
     return !process->running_threads;
 }
 
+static unsigned int process_get_msync_idx( struct object *obj, enum msync_type *type )
+{
+    struct process *process = (struct process *)obj;
+    *type = MSYNC_MANUAL_SERVER;
+    return process->msync_idx;
+}
+
 static unsigned int process_map_access( struct object *obj, unsigned int access )
 {
     access = default_map_access( obj, access );
diff --git a/server/process.h b/server/process.h
index 1e73e9d47dc..217d94aa63a 100644
--- a/server/process.h
+++ b/server/process.h
@@ -86,6 +86,7 @@ struct process
     struct list          rawinput_entry;  /* entry in the rawinput process list */
     struct list          kernel_object;   /* list of kernel object pointers */
     pe_image_info_t      image_info;      /* main exe image info */
+    unsigned int         msync_idx;
 };
 
 /* process functions */
diff --git a/server/protocol.def b/server/protocol.def
index 9ecb14cbac4..09de9793118 100644
--- a/server/protocol.def
+++ b/server/protocol.def
@@ -4067,3 +4067,57 @@ typedef union
 @REPLY
     int enable;                /* previous state of auto-repeat enable */
 @END
+
+enum msync_type
+{
+    MSYNC_SEMAPHORE = 1,
+    MSYNC_AUTO_EVENT,
+    MSYNC_MANUAL_EVENT,
+    MSYNC_MUTEX,
+    MSYNC_AUTO_SERVER,
+    MSYNC_MANUAL_SERVER,
+    MSYNC_QUEUE,
+};
+
+/* Create a new mach-based synchronization object */
+@REQ(create_msync)
+    unsigned int access;        /* wanted access rights */
+    int low;                    /* initial value of low word */
+    int high;                   /* initial value of high word */
+    int type;                   /* type of msync object */
+    VARARG(objattr,object_attributes); /* object attributes */
+@REPLY
+    obj_handle_t handle;        /* handle to the object */
+    int type;                   /* type of msync object */
+    unsigned int shm_idx;       /* this object's index into the shm section */
+@END
+
+/* Open an msync object */
+@REQ(open_msync)
+    unsigned int access;        /* wanted access rights */
+    unsigned int attributes;    /* object attributes */
+    obj_handle_t rootdir;       /* root directory */
+    int          type;          /* type of msync object */
+    VARARG(name,unicode_str);   /* object name */
+@REPLY
+    obj_handle_t handle;        /* handle to the event */
+    int          type;          /* type of msync object */
+    unsigned int shm_idx;       /* this object's index into the shm section */
+@END
+
+/* Retrieve the shm index for an object. */
+@REQ(get_msync_idx)
+    obj_handle_t handle;        /* handle to the object */
+@REPLY
+    int          type;
+    unsigned int shm_idx;
+@END
+
+@REQ(msync_msgwait)
+    int          in_msgwait;    /* are we in a message wait? */
+@END
+
+@REQ(get_msync_apc_idx)
+@REPLY
+    unsigned int shm_idx;
+@END
diff --git a/server/queue.c b/server/queue.c
index 2641a9ba037..3911b58eea2 100644
--- a/server/queue.c
+++ b/server/queue.c
@@ -44,6 +44,7 @@
 #include "process.h"
 #include "request.h"
 #include "user.h"
+#include "msync.h"
 
 #define WM_NCMOUSEFIRST WM_NCMOUSEMOVE
 #define WM_NCMOUSELAST  (WM_NCMOUSEFIRST+(WM_MOUSELAST-WM_MOUSEFIRST))
@@ -133,6 +134,8 @@ struct msg_queue
     timeout_t              last_get_msg;    /* time of last get message call */
     int                    keystate_lock;   /* owns an input keystate lock */
     const queue_shm_t     *shared;          /* queue in session shared memory */
+    unsigned int           msync_idx;
+    int                    msync_in_msgwait; /* our thread is currently waiting on us */
 };
 
 struct hotkey
@@ -149,6 +152,7 @@ static void msg_queue_dump( struct object *obj, int verbose );
 static int msg_queue_add_queue( struct object *obj, struct wait_queue_entry *entry );
 static void msg_queue_remove_queue( struct object *obj, struct wait_queue_entry *entry );
 static int msg_queue_signaled( struct object *obj, struct wait_queue_entry *entry );
+static unsigned int msg_queue_get_msync_idx( struct object *obj, enum msync_type *type );
 static void msg_queue_satisfied( struct object *obj, struct wait_queue_entry *entry );
 static void msg_queue_destroy( struct object *obj );
 static void msg_queue_poll_event( struct fd *fd, int event );
@@ -177,7 +181,8 @@ static const struct object_ops msg_queue_ops =
     no_open_file,              /* open_file */
     no_kernel_obj_list,        /* get_kernel_obj_list */
     no_close_handle,           /* close_handle */
-    msg_queue_destroy          /* destroy */
+    msg_queue_destroy,         /* destroy */
+    msg_queue_get_msync_idx    /* get_msync_idx */
 };
 
 static const struct fd_ops msg_queue_fd_ops =
@@ -312,6 +317,8 @@ static struct msg_queue *create_msg_queue( struct thread *thread, struct thread_
         queue->hooks           = NULL;
         queue->last_get_msg    = current_time;
         queue->keystate_lock   = 0;
+        queue->msync_idx       = 0;
+        queue->msync_in_msgwait = 0;
         list_init( &queue->send_result );
         list_init( &queue->callback_result );
         list_init( &queue->pending_timers );
@@ -334,6 +341,9 @@ static struct msg_queue *create_msg_queue( struct thread *thread, struct thread_
         }
         SHARED_WRITE_END;
 
+        if (do_msync())
+            queue->msync_idx = msync_alloc_shm( 0, 0 );
+
         thread->queue = queue;
 
         if ((desktop = get_thread_desktop( thread, 0 )))
@@ -1227,6 +1237,10 @@ static int is_queue_hung( struct msg_queue *queue )
         if (get_wait_queue_thread(entry)->queue == queue)
             return 0;  /* thread is waiting on queue -> not hung */
     }
+
+    if (do_msync() && queue->msync_in_msgwait)
+        return 0;   /* thread is waiting on queue in absentia -> not hung */
+
     return 1;
 }
 
@@ -1281,6 +1295,13 @@ static int msg_queue_signaled( struct object *obj, struct wait_queue_entry *entr
     return ret || is_signaled( queue );
 }
 
+static unsigned int msg_queue_get_msync_idx( struct object *obj, enum msync_type *type )
+{
+    struct msg_queue *queue = (struct msg_queue *)obj;
+    *type = MSYNC_QUEUE;
+    return queue->msync_idx;
+}
+
 static void msg_queue_satisfied( struct object *obj, struct wait_queue_entry *entry )
 {
     struct msg_queue *queue = (struct msg_queue *)obj;
@@ -1337,6 +1358,8 @@ static void msg_queue_destroy( struct object *obj )
     if (queue->hooks) release_object( queue->hooks );
     if (queue->fd) release_object( queue->fd );
     if (queue->shared) free_shared_object( queue->shared );
+    if (do_msync())
+        msync_destroy_semaphore( queue->msync_idx );
 }
 
 static void msg_queue_poll_event( struct fd *fd, int event )
@@ -3115,6 +3138,9 @@ DECL_HANDLER(set_queue_mask)
             }
             else wake_up( &queue->obj, 0 );
         }
+
+        if (do_msync() && !is_signaled( queue ))
+            msync_clear( &queue->obj );
     }
 }
 
@@ -3135,6 +3161,9 @@ DECL_HANDLER(get_queue_status)
             shared->changed_bits &= ~req->clear_bits;
         }
         SHARED_WRITE_END;
+
+        if (do_msync() && !is_signaled( queue ))
+            msync_clear( &queue->obj );
     }
     else reply->wake_bits = reply->changed_bits = 0;
 }
@@ -3392,6 +3421,11 @@ DECL_HANDLER(get_message)
     SHARED_WRITE_END;
 
     set_error( STATUS_PENDING );  /* FIXME */
+
+    if (do_msync() && !is_signaled( queue ))
+        msync_clear( &queue->obj );
+
+    return;
 }
 
 
@@ -4182,3 +4216,18 @@ DECL_HANDLER(set_keyboard_repeat)
 
     release_object( desktop );
 }
+
+DECL_HANDLER(msync_msgwait)
+{
+    struct msg_queue *queue = get_current_queue();
+
+    if (!queue) return;
+    queue->msync_in_msgwait = req->in_msgwait;
+
+    if (current->process->idle_event && !(queue->shared->wake_mask & QS_SMRESULT))
+        set_event( current->process->idle_event );
+
+    /* and start/stop waiting on the driver */
+    if (queue->fd)
+        set_fd_events( queue->fd, req->in_msgwait ? POLLIN : 0 );
+}
diff --git a/server/request.h b/server/request.h
index 0ec527f2b4f..0144a9bbf4b 100644
--- a/server/request.h
+++ b/server/request.h
@@ -410,6 +410,11 @@ DECL_HANDLER(suspend_process);
 DECL_HANDLER(resume_process);
 DECL_HANDLER(get_next_thread);
 DECL_HANDLER(set_keyboard_repeat);
+DECL_HANDLER(create_msync);
+DECL_HANDLER(open_msync);
+DECL_HANDLER(get_msync_idx);
+DECL_HANDLER(msync_msgwait);
+DECL_HANDLER(get_msync_apc_idx);
 
 #ifdef WANT_REQUEST_HANDLERS
 
@@ -707,6 +712,11 @@ static const req_handler req_handlers[REQ_NB_REQUESTS] =
     (req_handler)req_resume_process,
     (req_handler)req_get_next_thread,
     (req_handler)req_set_keyboard_repeat,
+    (req_handler)req_create_msync,
+    (req_handler)req_open_msync,
+    (req_handler)req_get_msync_idx,
+    (req_handler)req_msync_msgwait,
+    (req_handler)req_get_msync_apc_idx,
 };
 
 C_ASSERT( sizeof(abstime_t) == 8 );
@@ -2374,6 +2384,34 @@ C_ASSERT( FIELD_OFFSET(struct set_keyboard_repeat_request, period) == 20 );
 C_ASSERT( sizeof(struct set_keyboard_repeat_request) == 24 );
 C_ASSERT( FIELD_OFFSET(struct set_keyboard_repeat_reply, enable) == 8 );
 C_ASSERT( sizeof(struct set_keyboard_repeat_reply) == 16 );
+C_ASSERT( FIELD_OFFSET(struct create_msync_request, access) == 12 );
+C_ASSERT( FIELD_OFFSET(struct create_msync_request, low) == 16 );
+C_ASSERT( FIELD_OFFSET(struct create_msync_request, high) == 20 );
+C_ASSERT( FIELD_OFFSET(struct create_msync_request, type) == 24 );
+C_ASSERT( sizeof(struct create_msync_request) == 32 );
+C_ASSERT( FIELD_OFFSET(struct create_msync_reply, handle) == 8 );
+C_ASSERT( FIELD_OFFSET(struct create_msync_reply, type) == 12 );
+C_ASSERT( FIELD_OFFSET(struct create_msync_reply, shm_idx) == 16 );
+C_ASSERT( sizeof(struct create_msync_reply) == 24 );
+C_ASSERT( FIELD_OFFSET(struct open_msync_request, access) == 12 );
+C_ASSERT( FIELD_OFFSET(struct open_msync_request, attributes) == 16 );
+C_ASSERT( FIELD_OFFSET(struct open_msync_request, rootdir) == 20 );
+C_ASSERT( FIELD_OFFSET(struct open_msync_request, type) == 24 );
+C_ASSERT( sizeof(struct open_msync_request) == 32 );
+C_ASSERT( FIELD_OFFSET(struct open_msync_reply, handle) == 8 );
+C_ASSERT( FIELD_OFFSET(struct open_msync_reply, type) == 12 );
+C_ASSERT( FIELD_OFFSET(struct open_msync_reply, shm_idx) == 16 );
+C_ASSERT( sizeof(struct open_msync_reply) == 24 );
+C_ASSERT( FIELD_OFFSET(struct get_msync_idx_request, handle) == 12 );
+C_ASSERT( sizeof(struct get_msync_idx_request) == 16 );
+C_ASSERT( FIELD_OFFSET(struct get_msync_idx_reply, type) == 8 );
+C_ASSERT( FIELD_OFFSET(struct get_msync_idx_reply, shm_idx) == 12 );
+C_ASSERT( sizeof(struct get_msync_idx_reply) == 16 );
+C_ASSERT( FIELD_OFFSET(struct msync_msgwait_request, in_msgwait) == 12 );
+C_ASSERT( sizeof(struct msync_msgwait_request) == 16 );
+C_ASSERT( sizeof(struct get_msync_apc_idx_request) == 16 );
+C_ASSERT( FIELD_OFFSET(struct get_msync_apc_idx_reply, shm_idx) == 8 );
+C_ASSERT( sizeof(struct get_msync_apc_idx_reply) == 16 );
 
 #endif  /* WANT_REQUEST_HANDLERS */
 
diff --git a/server/thread.c b/server/thread.c
index f3880eebedb..78c635fc524 100644
--- a/server/thread.c
+++ b/server/thread.c
@@ -50,6 +50,7 @@
 #include "request.h"
 #include "user.h"
 #include "security.h"
+#include "msync.h"
 
 
 /* thread queues */
@@ -174,6 +175,7 @@ struct type_descr thread_type =
 
 static void dump_thread( struct object *obj, int verbose );
 static int thread_signaled( struct object *obj, struct wait_queue_entry *entry );
+static unsigned int thread_get_msync_idx( struct object *obj, enum msync_type *type );
 static unsigned int thread_map_access( struct object *obj, unsigned int access );
 static void thread_poll_event( struct fd *fd, int event );
 static struct list *thread_get_kernel_obj_list( struct object *obj );
@@ -200,7 +202,8 @@ static const struct object_ops thread_ops =
     no_open_file,               /* open_file */
     thread_get_kernel_obj_list, /* get_kernel_obj_list */
     no_close_handle,            /* close_handle */
-    destroy_thread              /* destroy */
+    destroy_thread,             /* destroy */
+    thread_get_msync_idx        /* get_msync_idx */
 };
 
 static const struct fd_ops thread_fd_ops =
@@ -226,6 +229,8 @@ static inline void init_thread_structure( struct thread *thread )
     thread->context         = NULL;
     thread->teb             = 0;
     thread->entry_point     = 0;
+    thread->msync_idx       = 0;
+    thread->msync_apc_idx   = 0;
     thread->system_regs     = 0;
     thread->queue           = NULL;
     thread->wait            = NULL;
@@ -373,6 +378,12 @@ struct thread *create_thread( int fd, struct process *process, const struct secu
         }
     }
 
+    if (do_msync())
+    {
+        thread->msync_idx = msync_alloc_shm( 0, 0 );
+        thread->msync_apc_idx = msync_alloc_shm( 0, 0 );
+    }
+
     set_fd_events( thread->request_fd, POLLIN );  /* start listening to events */
     add_process_thread( thread->process, thread );
     return thread;
@@ -452,6 +463,12 @@ static void destroy_thread( struct object *obj )
     release_object( thread->process );
     if (thread->id) free_ptid( thread->id );
     if (thread->token) release_object( thread->token );
+
+    if (do_msync())
+    {
+        msync_destroy_semaphore( thread->msync_idx );
+        msync_destroy_semaphore( thread->msync_apc_idx );
+    }
 }
 
 /* dump a thread on stdout for debugging purposes */
@@ -470,6 +487,13 @@ static int thread_signaled( struct object *obj, struct wait_queue_entry *entry )
     return (mythread->state == TERMINATED);
 }
 
+static unsigned int thread_get_msync_idx( struct object *obj, enum msync_type *type )
+{
+    struct thread *thread = (struct thread *)obj;
+    *type = MSYNC_MANUAL_SERVER;
+    return thread->msync_idx;
+}
+
 static unsigned int thread_map_access( struct object *obj, unsigned int access )
 {
     access = default_map_access( obj, access );
@@ -1065,6 +1089,9 @@ void wake_up( struct object *obj, int max )
     struct list *ptr;
     int ret;
 
+    if (do_msync())
+        msync_wake_up( obj );
+
     LIST_FOR_EACH( ptr, &obj->wait_queue )
     {
         struct wait_queue_entry *entry = LIST_ENTRY( ptr, struct wait_queue_entry, entry );
@@ -1149,8 +1176,12 @@ static int queue_apc( struct process *process, struct thread *thread, struct thr
     grab_object( apc );
     list_add_tail( queue, &apc->entry );
     if (!list_prev( queue, &apc->entry ))  /* first one */
+    {
         wake_thread( thread );
 
+        if (do_msync() && queue == &thread->user_apc)
+            msync_signal_all( thread->msync_apc_idx );
+    }
     return 1;
 }
 
@@ -1196,6 +1227,10 @@ static struct thread_apc *thread_dequeue_apc( struct thread *thread, int system
         apc = LIST_ENTRY( ptr, struct thread_apc, entry );
         list_remove( ptr );
     }
+
+    if (do_msync() && list_empty( &thread->system_apc ) && list_empty( &thread->user_apc ))
+        msync_clear_shm( thread->msync_apc_idx );
+
     return apc;
 }
 
@@ -1291,6 +1326,8 @@ void kill_thread( struct thread *thread, int violent_death )
     }
     kill_console_processes( thread, 0 );
     abandon_mutexes( thread );
+    if (do_msync())
+        msync_abandon_mutexes( thread );
     wake_up( &thread->obj, 0 );
     if (violent_death) send_thread_signal( thread, SIGQUIT );
     cleanup_thread( thread );
diff --git a/server/thread.h b/server/thread.h
index 3448f332b0b..44a4bab1148 100644
--- a/server/thread.h
+++ b/server/thread.h
@@ -56,6 +56,8 @@ struct thread
     struct process        *process;
     thread_id_t            id;            /* thread id */
     struct list            mutex_list;    /* list of currently owned mutexes */
+    unsigned int           msync_idx;
+    unsigned int           msync_apc_idx;
     unsigned int           system_regs;   /* which system regs have been set */
     struct msg_queue      *queue;         /* message queue */
     struct thread_wait    *wait;          /* current wait condition if sleeping */
diff --git a/server/timer.c b/server/timer.c
index 96dc9d00ca1..2b3880dbb31 100644
--- a/server/timer.c
+++ b/server/timer.c
@@ -35,6 +35,7 @@
 #include "file.h"
 #include "handle.h"
 #include "request.h"
+#include "msync.h"
 
 static const WCHAR timer_name[] = {'T','i','m','e','r'};
 
@@ -61,10 +62,12 @@ struct timer
     struct thread       *thread;    /* thread that set the APC function */
     client_ptr_t         callback;  /* callback APC function */
     client_ptr_t         arg;       /* callback argument */
+    unsigned int         msync_idx; /* msync shm index */
 };
 
 static void timer_dump( struct object *obj, int verbose );
 static int timer_signaled( struct object *obj, struct wait_queue_entry *entry );
+static unsigned int timer_get_msync_idx( struct object *obj, enum msync_type *type );
 static void timer_satisfied( struct object *obj, struct wait_queue_entry *entry );
 static void timer_destroy( struct object *obj );
 
@@ -89,7 +92,8 @@ static const struct object_ops timer_ops =
     no_open_file,              /* open_file */
     no_kernel_obj_list,        /* get_kernel_obj_list */
     no_close_handle,           /* close_handle */
-    timer_destroy              /* destroy */
+    timer_destroy,             /* destroy */
+    timer_get_msync_idx        /* get_msync_idx */
 };
 
 
@@ -110,6 +114,10 @@ static struct timer *create_timer( struct object *root, const struct unicode_str
             timer->period   = 0;
             timer->timeout  = NULL;
             timer->thread   = NULL;
+
+            if (do_msync())
+                timer->msync_idx = msync_alloc_shm( 0, 0 );
+
         }
     }
     return timer;
@@ -181,6 +189,9 @@ static int set_timer( struct timer *timer, timeout_t expire, unsigned int period
     {
         period = 0;  /* period doesn't make any sense for a manual timer */
         timer->signaled = 0;
+
+        if (do_msync())
+            msync_clear( &timer->obj );
     }
     timer->when     = (expire <= 0) ? expire - monotonic_time : max( expire, current_time );
     timer->period   = period;
@@ -208,6 +219,13 @@ static int timer_signaled( struct object *obj, struct wait_queue_entry *entry )
     return timer->signaled;
 }
 
+static unsigned int timer_get_msync_idx( struct object *obj, enum msync_type *type )
+{
+    struct timer *timer = (struct timer *)obj;
+    *type = timer->manual ? MSYNC_MANUAL_SERVER : MSYNC_AUTO_SERVER;
+    return timer->msync_idx;
+}
+
 static void timer_satisfied( struct object *obj, struct wait_queue_entry *entry )
 {
     struct timer *timer = (struct timer *)obj;
@@ -222,6 +240,8 @@ static void timer_destroy( struct object *obj )
 
     if (timer->timeout) remove_timeout_user( timer->timeout );
     if (timer->thread) release_object( timer->thread );
+    if (do_msync())
+        msync_destroy_semaphore( timer->msync_idx );
 }
 
 /* create a timer */
diff --git a/server/trace.c b/server/trace.c
index 5fd69aa420b..8a6825b0b84 100644
--- a/server/trace.c
+++ b/server/trace.c
@@ -4808,6 +4808,63 @@ static void dump_set_keyboard_repeat_reply( const struct set_keyboard_repeat_rep
     fprintf( stderr, " enable=%d", req->enable );
 }
 
+static void dump_create_msync_request( const struct create_msync_request *req )
+{
+    fprintf( stderr, " access=%08x", req->access );
+    fprintf( stderr, ", low=%d", req->low );
+    fprintf( stderr, ", high=%d", req->high );
+    fprintf( stderr, ", type=%d", req->type );
+    dump_varargs_object_attributes( ", objattr=", cur_size );
+}
+
+static void dump_create_msync_reply( const struct create_msync_reply *req )
+{
+    fprintf( stderr, " handle=%04x", req->handle );
+    fprintf( stderr, ", type=%d", req->type );
+    fprintf( stderr, ", shm_idx=%08x", req->shm_idx );
+}
+
+static void dump_open_msync_request( const struct open_msync_request *req )
+{
+    fprintf( stderr, " access=%08x", req->access );
+    fprintf( stderr, ", attributes=%08x", req->attributes );
+    fprintf( stderr, ", rootdir=%04x", req->rootdir );
+    fprintf( stderr, ", type=%d", req->type );
+    dump_varargs_unicode_str( ", name=", cur_size );
+}
+
+static void dump_open_msync_reply( const struct open_msync_reply *req )
+{
+    fprintf( stderr, " handle=%04x", req->handle );
+    fprintf( stderr, ", type=%d", req->type );
+    fprintf( stderr, ", shm_idx=%08x", req->shm_idx );
+}
+
+static void dump_get_msync_idx_request( const struct get_msync_idx_request *req )
+{
+    fprintf( stderr, " handle=%04x", req->handle );
+}
+
+static void dump_get_msync_idx_reply( const struct get_msync_idx_reply *req )
+{
+    fprintf( stderr, " type=%d", req->type );
+    fprintf( stderr, ", shm_idx=%08x", req->shm_idx );
+}
+
+static void dump_msync_msgwait_request( const struct msync_msgwait_request *req )
+{
+    fprintf( stderr, " in_msgwait=%d", req->in_msgwait );
+}
+
+static void dump_get_msync_apc_idx_request( const struct get_msync_apc_idx_request *req )
+{
+}
+
+static void dump_get_msync_apc_idx_reply( const struct get_msync_apc_idx_reply *req )
+{
+    fprintf( stderr, " shm_idx=%08x", req->shm_idx );
+}
+
 static const dump_func req_dumpers[REQ_NB_REQUESTS] = {
     (dump_func)dump_new_process_request,
     (dump_func)dump_get_new_process_info_request,
@@ -5100,6 +5157,11 @@ static const dump_func req_dumpers[REQ_NB_REQUESTS] = {
     (dump_func)dump_resume_process_request,
     (dump_func)dump_get_next_thread_request,
     (dump_func)dump_set_keyboard_repeat_request,
+    (dump_func)dump_create_msync_request,
+    (dump_func)dump_open_msync_request,
+    (dump_func)dump_get_msync_idx_request,
+    (dump_func)dump_msync_msgwait_request,
+    (dump_func)dump_get_msync_apc_idx_request,
 };
 
 static const dump_func reply_dumpers[REQ_NB_REQUESTS] = {
@@ -5394,6 +5456,11 @@ static const dump_func reply_dumpers[REQ_NB_REQUESTS] = {
     NULL,
     (dump_func)dump_get_next_thread_reply,
     (dump_func)dump_set_keyboard_repeat_reply,
+    (dump_func)dump_create_msync_reply,
+    (dump_func)dump_open_msync_reply,
+    (dump_func)dump_get_msync_idx_reply,
+    NULL,
+    (dump_func)dump_get_msync_apc_idx_reply,
 };
 
 static const char * const req_names[REQ_NB_REQUESTS] = {
@@ -5688,6 +5755,11 @@ static const char * const req_names[REQ_NB_REQUESTS] = {
     "resume_process",
     "get_next_thread",
     "set_keyboard_repeat",
+    "create_msync",
+    "open_msync",
+    "get_msync_idx",
+    "msync_msgwait",
+    "get_msync_apc_idx",
 };
 
 static const struct
