|
|
@@ -0,0 +1,129 @@
|
|
|
+# Heap queue algorithm (a.k.a. priority queue)
|
|
|
+
|
|
|
+__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
|
|
|
+ 'nlargest', 'nsmallest', 'heappushpop']
|
|
|
+
|
|
|
+def heappush(heap, item):
|
|
|
+ """Push item onto heap, maintaining the heap invariant."""
|
|
|
+ heap.append(item)
|
|
|
+ _siftdown(heap, 0, len(heap)-1)
|
|
|
+
|
|
|
+def heappop(heap):
|
|
|
+ """Pop the smallest item off the heap, maintaining the heap invariant."""
|
|
|
+ lastelt = heap.pop() # raises appropriate IndexError if heap is empty
|
|
|
+ if heap:
|
|
|
+ returnitem = heap[0]
|
|
|
+ heap[0] = lastelt
|
|
|
+ _siftup(heap, 0)
|
|
|
+ return returnitem
|
|
|
+ return lastelt
|
|
|
+
|
|
|
+def heapreplace(heap, item):
|
|
|
+ """Pop and return the current smallest value, and add the new item.
|
|
|
+
|
|
|
+ This is more efficient than heappop() followed by heappush(), and can be
|
|
|
+ more appropriate when using a fixed-size heap. Note that the value
|
|
|
+ returned may be larger than item! That constrains reasonable uses of
|
|
|
+ this routine unless written as part of a conditional replacement:
|
|
|
+
|
|
|
+ if item > heap[0]:
|
|
|
+ item = heapreplace(heap, item)
|
|
|
+ """
|
|
|
+ returnitem = heap[0] # raises appropriate IndexError if heap is empty
|
|
|
+ heap[0] = item
|
|
|
+ _siftup(heap, 0)
|
|
|
+ return returnitem
|
|
|
+
|
|
|
+def heappushpop(heap, item):
|
|
|
+ """Fast version of a heappush followed by a heappop."""
|
|
|
+ if heap and heap[0] < item:
|
|
|
+ item, heap[0] = heap[0], item
|
|
|
+ _siftup(heap, 0)
|
|
|
+ return item
|
|
|
+
|
|
|
+def heapify(x):
|
|
|
+ """Transform list into a heap, in-place, in O(len(x)) time."""
|
|
|
+ n = len(x)
|
|
|
+ # Transform bottom-up. The largest index there's any point to looking at
|
|
|
+ # is the largest with a child index in-range, so must have 2*i + 1 < n,
|
|
|
+ # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so
|
|
|
+ # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is
|
|
|
+ # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.
|
|
|
+ for i in reversed(range(n//2)):
|
|
|
+ _siftup(x, i)
|
|
|
+
|
|
|
+# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
|
|
|
+# is the index of a leaf with a possibly out-of-order value. Restore the
|
|
|
+# heap invariant.
|
|
|
+def _siftdown(heap, startpos, pos):
|
|
|
+ newitem = heap[pos]
|
|
|
+ # Follow the path to the root, moving parents down until finding a place
|
|
|
+ # newitem fits.
|
|
|
+ while pos > startpos:
|
|
|
+ parentpos = (pos - 1) >> 1
|
|
|
+ parent = heap[parentpos]
|
|
|
+ if newitem < parent:
|
|
|
+ heap[pos] = parent
|
|
|
+ pos = parentpos
|
|
|
+ continue
|
|
|
+ break
|
|
|
+ heap[pos] = newitem
|
|
|
+
|
|
|
+# The child indices of heap index pos are already heaps, and we want to make
|
|
|
+# a heap at index pos too. We do this by bubbling the smaller child of
|
|
|
+# pos up (and so on with that child's children, etc) until hitting a leaf,
|
|
|
+# then using _siftdown to move the oddball originally at index pos into place.
|
|
|
+#
|
|
|
+# We *could* break out of the loop as soon as we find a pos where newitem <=
|
|
|
+# both its children, but turns out that's not a good idea, and despite that
|
|
|
+# many books write the algorithm that way. During a heap pop, the last array
|
|
|
+# element is sifted in, and that tends to be large, so that comparing it
|
|
|
+# against values starting from the root usually doesn't pay (= usually doesn't
|
|
|
+# get us out of the loop early). See Knuth, Volume 3, where this is
|
|
|
+# explained and quantified in an exercise.
|
|
|
+#
|
|
|
+# Cutting the # of comparisons is important, since these routines have no
|
|
|
+# way to extract "the priority" from an array element, so that intelligence
|
|
|
+# is likely to be hiding in custom comparison methods, or in array elements
|
|
|
+# storing (priority, record) tuples. Comparisons are thus potentially
|
|
|
+# expensive.
|
|
|
+#
|
|
|
+# On random arrays of length 1000, making this change cut the number of
|
|
|
+# comparisons made by heapify() a little, and those made by exhaustive
|
|
|
+# heappop() a lot, in accord with theory. Here are typical results from 3
|
|
|
+# runs (3 just to demonstrate how small the variance is):
|
|
|
+#
|
|
|
+# Compares needed by heapify Compares needed by 1000 heappops
|
|
|
+# -------------------------- --------------------------------
|
|
|
+# 1837 cut to 1663 14996 cut to 8680
|
|
|
+# 1855 cut to 1659 14966 cut to 8678
|
|
|
+# 1847 cut to 1660 15024 cut to 8703
|
|
|
+#
|
|
|
+# Building the heap by using heappush() 1000 times instead required
|
|
|
+# 2198, 2148, and 2219 compares: heapify() is more efficient, when
|
|
|
+# you can use it.
|
|
|
+#
|
|
|
+# The total compares needed by list.sort() on the same lists were 8627,
|
|
|
+# 8627, and 8632 (this should be compared to the sum of heapify() and
|
|
|
+# heappop() compares): list.sort() is (unsurprisingly!) more efficient
|
|
|
+# for sorting.
|
|
|
+
|
|
|
+def _siftup(heap, pos):
|
|
|
+ endpos = len(heap)
|
|
|
+ startpos = pos
|
|
|
+ newitem = heap[pos]
|
|
|
+ # Bubble up the smaller child until hitting a leaf.
|
|
|
+ childpos = 2*pos + 1 # leftmost child position
|
|
|
+ while childpos < endpos:
|
|
|
+ # Set childpos to index of smaller child.
|
|
|
+ rightpos = childpos + 1
|
|
|
+ if rightpos < endpos and not heap[childpos] < heap[rightpos]:
|
|
|
+ childpos = rightpos
|
|
|
+ # Move the smaller child up.
|
|
|
+ heap[pos] = heap[childpos]
|
|
|
+ pos = childpos
|
|
|
+ childpos = 2*pos + 1
|
|
|
+ # The leaf at pos is empty now. Put newitem there, and bubble it up
|
|
|
+ # to its final resting place (by sifting its parents down).
|
|
|
+ heap[pos] = newitem
|
|
|
+ _siftdown(heap, startpos, pos)
|