diff --git a/envconfig/config.go b/envconfig/config.go
index fc702198fa3..09dce62d9d2 100644
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -164,6 +164,8 @@ var (
 	SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
 	// IntelGPU enables experimental Intel GPU detection.
 	IntelGPU = Bool("OLLAMA_INTEL_GPU")
+	// NoMMap disables memory mapping of the model file.
+	NoMMap = Bool("OLLAMA_NO_MMAP")
 	// MultiUserCache optimizes prompt caching for multi-user scenarios
 	MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
 	// Enable the new Ollama engine
@@ -251,6 +253,7 @@ func AsMap() map[string]EnvVar {
 		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
 		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
 		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
+		"OLLAMA_NO_MMAP":           {"OLLAMA_NO_MMAP", NoMMap(), "Disable memory mapping of the model file"},
 		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
diff --git a/llm/server.go b/llm/server.go
index e6046db608e..a7b8bda9f71 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -209,7 +209,8 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
 	// Windows CUDA should not use mmap for best performance
 	// Linux  with a model larger than free space, mmap leads to thrashing
 	// For CPU loads we want the memory to be allocated, not FS cache
-	if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) ||
+	if envconfig.NoMMap() ||
+		(runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) ||
 		(runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == nil) ||
 		(gpus[0].Library == "cpu" && opts.UseMMap == nil) ||
 		(opts.UseMMap != nil && !*opts.UseMMap) {
