Why Node.js Performance Optimization Matters
Node.js operates on a single-threaded event loop model. While it excels at I/O tasks, neglecting CPU-intensive work or memory management can cause severe performance degradation across the entire server. This article covers the core techniques for maximizing Node.js performance: cluster mode, worker threads, memory management, and profiling.
Understanding the Event Loop
The Node.js event loop consists of 6 phases. Not blocking the event loop is the key to performance.
const { performance, PerformanceObserver } = require("node:perf_hooks");
// Detect event loop blocking
function detectEventLoopDelay() {
let lastCheck = performance.now();
setInterval(() => {
const now = performance.now();
const delay = now - lastCheck - 1000; // Based on 1-second interval
if (delay > 100) {
console.warn(`[Warning] Event loop delay: ${delay.toFixed(0)}ms`);
}
lastCheck = now;
}, 1000);
}
// Bad example: synchronous operation that blocks the event loop
function badExample() {
const start = Date.now();
// Blocks the event loop for 5 seconds — all requests stall
while (Date.now() - start < 5000) {
// Heavy computation occupying the CPU
}
}
// Good example: split into chunks and yield to the event loop
async function goodExample(items) {
const CHUNK_SIZE = 1000;
const results = [];
for (let i = 0; i < items.length; i += CHUNK_SIZE) {
const chunk = items.slice(i, i + CHUNK_SIZE);
const processed = chunk.map((item) => item * 2); // Lightweight operation
results.push(...processed);
// Yield control to the event loop
if (i + CHUNK_SIZE < items.length) {
await new Promise((resolve) => setImmediate(resolve));
}
}
return results;
}
| Phase | Role | Example |
|---|---|---|
| timers | Execute setTimeout, setInterval callbacks | Delayed tasks |
| pending callbacks | System operation callbacks | TCP errors |
| poll | Process I/O callbacks | File reads, network |
| check | Execute setImmediate callbacks | Immediate execution after poll |
| close callbacks | Close event callbacks | socket.on('close') |
Cluster Mode — Utilizing Multiple Cores
By default, Node.js only uses a single CPU core. The cluster module lets you spawn multiple processes to take advantage of multi-core CPUs.
const cluster = require("node:cluster");
const http = require("node:http");
const os = require("node:os");
const NUM_WORKERS = os.cpus().length; // Number of CPU cores
if (cluster.isPrimary) {
console.log(`Primary process ${process.pid} started`);
console.log(`Spawning ${NUM_WORKERS} workers...`);
// Create workers equal to the number of CPU cores
for (let i = 0; i < NUM_WORKERS; i++) {
cluster.fork();
}
// Automatically restart workers on exit
cluster.on("exit", (worker, code, signal) => {
console.warn(
`Worker ${worker.process.pid} exited (code: ${code}). Restarting...`
);
cluster.fork();
});
} else {
// Each worker handles requests on the same port
http
.createServer((req, res) => {
res.writeHead(200, { "Content-Type": "application/json" });
res.end(
JSON.stringify({
pid: process.pid,
message: "Response complete",
})
);
})
.listen(3000);
console.log(`Worker ${process.pid} started`);
}
// Output:
// Primary process 12345 started
// Spawning 8 workers...
// Worker 12346 started
// Worker 12347 started
// ...
Worker Threads — Offloading CPU-Intensive Tasks
The worker_threads module runs CPU-intensive tasks in separate threads without blocking the main thread.
// worker.js — code that runs in the worker thread
const { parentPort, workerData } = require("node:worker_threads");
function fibonacci(n) {
if (n <= 1) return n;
return fibonacci(n - 1) + fibonacci(n - 2);
}
// Send result back to the parent
const result = fibonacci(workerData.number);
parentPort.postMessage({ number: workerData.number, result });
// main.js — main thread
const { Worker } = require("node:worker_threads");
function runFibonacci(number) {
return new Promise((resolve, reject) => {
const worker = new Worker("./worker.js", {
workerData: { number },
});
worker.on("message", resolve);
worker.on("error", reject);
worker.on("exit", (code) => {
if (code !== 0) {
reject(new Error(`Worker exit code: ${code}`));
}
});
});
}
// Run multiple Fibonacci calculations in parallel
async function main() {
console.time("Parallel execution");
const results = await Promise.all([
runFibonacci(40),
runFibonacci(41),
runFibonacci(42),
]);
console.timeEnd("Parallel execution");
results.forEach(({ number, result }) => {
console.log(`fibonacci(${number}) = ${result}`);
});
}
main();
// Parallel execution: 2340ms (about 3x faster than sequential)
// fibonacci(40) = 102334155
// fibonacci(41) = 165580141
// fibonacci(42) = 267914296
| Approach | Best for | Memory sharing | Communication |
|---|---|---|---|
| Cluster | HTTP server scaling | Not possible | IPC |
| Worker threads | CPU-intensive computation | SharedArrayBuffer | postMessage |
Memory Management and Leak Detection
Here is how to monitor V8 heap memory and detect leaks.
// Monitor memory usage
function logMemoryUsage(label = "") {
const usage = process.memoryUsage();
const format = (bytes) => `${(bytes / 1024 / 1024).toFixed(1)}MB`;
console.log(`[Memory${label ? ` - ${label}` : ""}]`, {
rss: format(usage.rss), // Total memory
heapTotal: format(usage.heapTotal), // Total heap
heapUsed: format(usage.heapUsed), // Heap used
external: format(usage.external), // C++ objects
});
}
// Memory leak example and solution
class CacheManager {
#cache = new Map();
#maxSize;
constructor(maxSize = 1000) {
this.#maxSize = maxSize;
}
set(key, value) {
// LRU strategy: remove oldest entry when max size is exceeded
if (this.#cache.size >= this.#maxSize) {
const firstKey = this.#cache.keys().next().value;
this.#cache.delete(firstKey);
}
this.#cache.set(key, value);
}
get(key) {
if (!this.#cache.has(key)) return undefined;
// Move accessed entry to the end (LRU)
const value = this.#cache.get(key);
this.#cache.delete(key);
this.#cache.set(key, value);
return value;
}
get size() {
return this.#cache.size;
}
}
// Weak reference management with WeakRef/FinalizationRegistry
const registry = new FinalizationRegistry((key) => {
console.log(`[GC] ${key} object was garbage collected`);
});
logMemoryUsage("start");
// [Memory - start] { rss: '25.3MB', heapTotal: '6.2MB', heapUsed: '4.1MB', external: '0.4MB' }
Profiling — Finding Bottlenecks
const { performance } = require("node:perf_hooks");
// Utility for measuring function execution time
function measureTime(label, fn) {
const start = performance.now();
const result = fn();
const elapsed = performance.now() - start;
console.log(`[${label}] ${elapsed.toFixed(2)}ms`);
return result;
}
// Processing large files with streams (memory-efficient)
const fs = require("node:fs");
const { createReadStream, createWriteStream } = fs;
const { pipeline } = require("node:stream/promises");
const { Transform } = require("node:stream");
async function processLargeFile(inputPath, outputPath) {
const upperCaseTransform = new Transform({
transform(chunk, encoding, callback) {
// Process in chunks — no need to load the entire file into memory
callback(null, chunk.toString().toUpperCase());
},
});
await pipeline(
createReadStream(inputPath),
upperCaseTransform,
createWriteStream(outputPath)
);
console.log("File processing complete (stream-based)");
}
# Node.js built-in profiler
node --prof app.js
node --prof-process isolate-*.log > profile.txt
# Profiling with Chrome DevTools
node --inspect app.js
# Connect at chrome://inspect
Practical Tips
- Never block the event loop: Offload synchronous tasks that take more than 100ms to worker threads
- Use streams: Process large data with
createReadStreaminstead ofreadFile - Connection pooling: Manage DB connections via a pool instead of creating new ones each time
- Limit cache size: Unbounded caches are a leading cause of memory leaks. Apply an LRU strategy
- Cluster + PM2: In production, PM2’s cluster mode simplifies process management
- Heap snapshots: Use the
--inspectflag to compare memory snapshots in Chrome DevTools - HTTP Keep-Alive: Reuse connections to reduce TCP handshake overhead