Skip to main content

🟥 Find Median from Data Stream (#295)

📋 Problem Statement

The median is the middle value in an ordered integer list. If the size of the list is even, there is no middle value, and the median is the mean of the two middle values.

Implement the MedianFinder class:

  • MedianFinder() initializes the MedianFinder object.
  • void addNum(int num) adds the integer num from the data stream to the data structure.
  • double findMedian() returns the median of all elements so far.

💡 Examples

Example 1

Input:
["MedianFinder", "addNum", "addNum", "findMedian", "addNum", "findMedian"]
[[], [1], [2], [], [3], []]
Output:
[null, null, null, 1.5, null, 2.0]

🔑 Key Insights & Approach

Core Observation: Use two heaps - max heap for smaller half, min heap for larger half. Balance heaps to maintain median access in O(1).

Why Two Heaps?

  • addNum: O(log n)
  • findMedian: O(1)
  • Space: O(n)
  • Optimal for streaming data

Approaches:

  1. Two heaps (max + min): O(log n) add, O(1) median - optimal
  2. Sorted array: O(n) add, O(1) median
  3. Binary search insert: O(n) add, O(1) median

Pattern: "Two Heaps" pattern for median/balance problems.

🐍 Solution: Python

Approach: Two Heaps

Time Complexity: addNum O(log n), findMedian O(1) | Space Complexity: O(n)

import heapq

class MedianFinder:
def __init__(self):
# Max heap for smaller half (invert values for max heap)
self.small = [] # max heap (negate values)
# Min heap for larger half
self.large = [] # min heap

def addNum(self, num: int) -> None:
# Add to small (max heap)
heapq.heappush(self.small, -num)

# Balance: ensure every element in small <= every element in large
if self.small and self.large and (-self.small[0] > self.large[0]):
val = -heapq.heappop(self.small)
heapq.heappush(self.large, val)

# Balance sizes: small can have at most 1 more element than large
if len(self.small) > len(self.large) + 1:
val = -heapq.heappop(self.small)
heapq.heappush(self.large, val)

if len(self.large) > len(self.small):
val = heapq.heappop(self.large)
heapq.heappush(self.small, -val)

def findMedian(self) -> float:
if len(self.small) > len(self.large):
return float(-self.small[0])

return (-self.small[0] + self.large[0]) / 2.0

🔵 Solution: Golang

Approach: Two Heaps

Time Complexity: addNum O(log n), findMedian O(1) | Space Complexity: O(n)

import "container/heap"

type MaxHeap []int
func (h MaxHeap) Len() int { return len(h) }
func (h MaxHeap) Less(i, j int) bool { return h[i] > h[j] }
func (h MaxHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *MaxHeap) Push(x interface{}) { *h = append(*h, x.(int)) }
func (h *MaxHeap) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}

type MinHeap []int
func (h MinHeap) Len() int { return len(h) }
func (h MinHeap) Less(i, j int) bool { return h[i] < h[j] }
func (h MinHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *MinHeap) Push(x interface{}) { *h = append(*h, x.(int)) }
func (h *MinHeap) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}

type MedianFinder struct {
small *MaxHeap
large *MinHeap
}

func Constructor() MedianFinder {
small := &MaxHeap{}
large := &MinHeap{}
heap.Init(small)
heap.Init(large)
return MedianFinder{small, large}
}

func (this *MedianFinder) AddNum(num int) {
heap.Push(this.small, num)

// Balance
if this.small.Len() > 0 && this.large.Len() > 0 && (*this.small)[0] > (*this.large)[0] {
val := heap.Pop(this.small).(int)
heap.Push(this.large, val)
}

// Balance sizes
if this.small.Len() > this.large.Len()+1 {
val := heap.Pop(this.small).(int)
heap.Push(this.large, val)
}
if this.large.Len() > this.small.Len() {
val := heap.Pop(this.large).(int)
heap.Push(this.small, val)
}
}

func (this *MedianFinder) FindMedian() float64 {
if this.small.Len() > this.large.Len() {
return float64((*this.small)[0])
}
return float64((*this.small)[0]+(*this.large)[0]) / 2.0
}

📚 References