Node.js Performance Optimization Guide — Clusters to Memory

Why Node.js Performance Optimization Matters

Node.js operates on a single-threaded event loop model. While it excels at I/O tasks, neglecting CPU-intensive work or memory management can cause severe performance degradation across the entire server. This article covers the core techniques for maximizing Node.js performance: cluster mode, worker threads, memory management, and profiling.

Understanding the Event Loop

The Node.js event loop consists of 6 phases. Not blocking the event loop is the key to performance.

const { performance, PerformanceObserver } = require("node:perf_hooks");

// Detect event loop blocking
function detectEventLoopDelay() {
  let lastCheck = performance.now();

  setInterval(() => {
    const now = performance.now();
    const delay = now - lastCheck - 1000; // Based on 1-second interval
    if (delay > 100) {
      console.warn(`[Warning] Event loop delay: ${delay.toFixed(0)}ms`);
    }
    lastCheck = now;
  }, 1000);
}

// Bad example: synchronous operation that blocks the event loop
function badExample() {
  const start = Date.now();
  // Blocks the event loop for 5 seconds — all requests stall
  while (Date.now() - start < 5000) {
    // Heavy computation occupying the CPU
  }
}

// Good example: split into chunks and yield to the event loop
async function goodExample(items) {
  const CHUNK_SIZE = 1000;
  const results = [];

  for (let i = 0; i < items.length; i += CHUNK_SIZE) {
    const chunk = items.slice(i, i + CHUNK_SIZE);
    const processed = chunk.map((item) => item * 2); // Lightweight operation
    results.push(...processed);

    // Yield control to the event loop
    if (i + CHUNK_SIZE < items.length) {
      await new Promise((resolve) => setImmediate(resolve));
    }
  }
  return results;
}

Phase	Role	Example
timers	Execute `setTimeout`, `setInterval` callbacks	Delayed tasks
pending callbacks	System operation callbacks	TCP errors
poll	Process I/O callbacks	File reads, network
check	Execute `setImmediate` callbacks	Immediate execution after poll
close callbacks	Close event callbacks	`socket.on('close')`

Cluster Mode — Utilizing Multiple Cores

By default, Node.js only uses a single CPU core. The cluster module lets you spawn multiple processes to take advantage of multi-core CPUs.

const cluster = require("node:cluster");
const http = require("node:http");
const os = require("node:os");

const NUM_WORKERS = os.cpus().length; // Number of CPU cores

if (cluster.isPrimary) {
  console.log(`Primary process ${process.pid} started`);
  console.log(`Spawning ${NUM_WORKERS} workers...`);

  // Create workers equal to the number of CPU cores
  for (let i = 0; i < NUM_WORKERS; i++) {
    cluster.fork();
  }

  // Automatically restart workers on exit
  cluster.on("exit", (worker, code, signal) => {
    console.warn(
      `Worker ${worker.process.pid} exited (code: ${code}). Restarting...`
    );
    cluster.fork();
  });
} else {
  // Each worker handles requests on the same port
  http
    .createServer((req, res) => {
      res.writeHead(200, { "Content-Type": "application/json" });
      res.end(
        JSON.stringify({
          pid: process.pid,
          message: "Response complete",
        })
      );
    })
    .listen(3000);

  console.log(`Worker ${process.pid} started`);
}

// Output:
// Primary process 12345 started
// Spawning 8 workers...
// Worker 12346 started
// Worker 12347 started
// ...

Worker Threads — Offloading CPU-Intensive Tasks

The worker_threads module runs CPU-intensive tasks in separate threads without blocking the main thread.

// worker.js — code that runs in the worker thread
const { parentPort, workerData } = require("node:worker_threads");

function fibonacci(n) {
  if (n <= 1) return n;
  return fibonacci(n - 1) + fibonacci(n - 2);
}

// Send result back to the parent
const result = fibonacci(workerData.number);
parentPort.postMessage({ number: workerData.number, result });

// main.js — main thread
const { Worker } = require("node:worker_threads");

function runFibonacci(number) {
  return new Promise((resolve, reject) => {
    const worker = new Worker("./worker.js", {
      workerData: { number },
    });

    worker.on("message", resolve);
    worker.on("error", reject);
    worker.on("exit", (code) => {
      if (code !== 0) {
        reject(new Error(`Worker exit code: ${code}`));
      }
    });
  });
}

// Run multiple Fibonacci calculations in parallel
async function main() {
  console.time("Parallel execution");
  const results = await Promise.all([
    runFibonacci(40),
    runFibonacci(41),
    runFibonacci(42),
  ]);
  console.timeEnd("Parallel execution");

  results.forEach(({ number, result }) => {
    console.log(`fibonacci(${number}) = ${result}`);
  });
}

main();
// Parallel execution: 2340ms (about 3x faster than sequential)
// fibonacci(40) = 102334155
// fibonacci(41) = 165580141
// fibonacci(42) = 267914296

Approach	Best for	Memory sharing	Communication
Cluster	HTTP server scaling	Not possible	IPC
Worker threads	CPU-intensive computation	SharedArrayBuffer	postMessage

Memory Management and Leak Detection

Here is how to monitor V8 heap memory and detect leaks.

// Monitor memory usage
function logMemoryUsage(label = "") {
  const usage = process.memoryUsage();
  const format = (bytes) => `${(bytes / 1024 / 1024).toFixed(1)}MB`;

  console.log(`[Memory${label ? ` - ${label}` : ""}]`, {
    rss: format(usage.rss),             // Total memory
    heapTotal: format(usage.heapTotal), // Total heap
    heapUsed: format(usage.heapUsed),   // Heap used
    external: format(usage.external),   // C++ objects
  });
}

// Memory leak example and solution
class CacheManager {
  #cache = new Map();
  #maxSize;

  constructor(maxSize = 1000) {
    this.#maxSize = maxSize;
  }

  set(key, value) {
    // LRU strategy: remove oldest entry when max size is exceeded
    if (this.#cache.size >= this.#maxSize) {
      const firstKey = this.#cache.keys().next().value;
      this.#cache.delete(firstKey);
    }
    this.#cache.set(key, value);
  }

  get(key) {
    if (!this.#cache.has(key)) return undefined;
    // Move accessed entry to the end (LRU)
    const value = this.#cache.get(key);
    this.#cache.delete(key);
    this.#cache.set(key, value);
    return value;
  }

  get size() {
    return this.#cache.size;
  }
}

// Weak reference management with WeakRef/FinalizationRegistry
const registry = new FinalizationRegistry((key) => {
  console.log(`[GC] ${key} object was garbage collected`);
});

logMemoryUsage("start");
// [Memory - start] { rss: '25.3MB', heapTotal: '6.2MB', heapUsed: '4.1MB', external: '0.4MB' }

Profiling — Finding Bottlenecks

const { performance } = require("node:perf_hooks");

// Utility for measuring function execution time
function measureTime(label, fn) {
  const start = performance.now();
  const result = fn();
  const elapsed = performance.now() - start;
  console.log(`[${label}] ${elapsed.toFixed(2)}ms`);
  return result;
}

// Processing large files with streams (memory-efficient)
const fs = require("node:fs");
const { createReadStream, createWriteStream } = fs;
const { pipeline } = require("node:stream/promises");
const { Transform } = require("node:stream");

async function processLargeFile(inputPath, outputPath) {
  const upperCaseTransform = new Transform({
    transform(chunk, encoding, callback) {
      // Process in chunks — no need to load the entire file into memory
      callback(null, chunk.toString().toUpperCase());
    },
  });

  await pipeline(
    createReadStream(inputPath),
    upperCaseTransform,
    createWriteStream(outputPath)
  );
  console.log("File processing complete (stream-based)");
}

# Node.js built-in profiler
node --prof app.js
node --prof-process isolate-*.log > profile.txt

# Profiling with Chrome DevTools
node --inspect app.js
# Connect at chrome://inspect

Practical Tips

Never block the event loop: Offload synchronous tasks that take more than 100ms to worker threads
Use streams: Process large data with createReadStream instead of readFile
Connection pooling: Manage DB connections via a pool instead of creating new ones each time
Limit cache size: Unbounded caches are a leading cause of memory leaks. Apply an LRU strategy
Cluster + PM2: In production, PM2’s cluster mode simplifies process management
Heap snapshots: Use the --inspect flag to compare memory snapshots in Chrome DevTools
HTTP Keep-Alive: Reuse connections to reduce TCP handshake overhead