heapq.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # Heap queue algorithm (a.k.a. priority queue)
  2. __all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
  3. 'nlargest', 'nsmallest', 'heappushpop']
  4. def heappush(heap, item):
  5. """Push item onto heap, maintaining the heap invariant."""
  6. heap.append(item)
  7. _siftdown(heap, 0, len(heap)-1)
  8. def heappop(heap):
  9. """Pop the smallest item off the heap, maintaining the heap invariant."""
  10. lastelt = heap.pop() # raises appropriate IndexError if heap is empty
  11. if heap:
  12. returnitem = heap[0]
  13. heap[0] = lastelt
  14. _siftup(heap, 0)
  15. return returnitem
  16. return lastelt
  17. def heapreplace(heap, item):
  18. """Pop and return the current smallest value, and add the new item.
  19. This is more efficient than heappop() followed by heappush(), and can be
  20. more appropriate when using a fixed-size heap. Note that the value
  21. returned may be larger than item! That constrains reasonable uses of
  22. this routine unless written as part of a conditional replacement:
  23. if item > heap[0]:
  24. item = heapreplace(heap, item)
  25. """
  26. returnitem = heap[0] # raises appropriate IndexError if heap is empty
  27. heap[0] = item
  28. _siftup(heap, 0)
  29. return returnitem
  30. def heappushpop(heap, item):
  31. """Fast version of a heappush followed by a heappop."""
  32. if heap and heap[0] < item:
  33. item, heap[0] = heap[0], item
  34. _siftup(heap, 0)
  35. return item
  36. def heapify(x):
  37. """Transform list into a heap, in-place, in O(len(x)) time."""
  38. n = len(x)
  39. # Transform bottom-up. The largest index there's any point to looking at
  40. # is the largest with a child index in-range, so must have 2*i + 1 < n,
  41. # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so
  42. # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is
  43. # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.
  44. for i in reversed(range(n//2)):
  45. _siftup(x, i)
  46. # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
  47. # is the index of a leaf with a possibly out-of-order value. Restore the
  48. # heap invariant.
  49. def _siftdown(heap, startpos, pos):
  50. newitem = heap[pos]
  51. # Follow the path to the root, moving parents down until finding a place
  52. # newitem fits.
  53. while pos > startpos:
  54. parentpos = (pos - 1) >> 1
  55. parent = heap[parentpos]
  56. if newitem < parent:
  57. heap[pos] = parent
  58. pos = parentpos
  59. continue
  60. break
  61. heap[pos] = newitem
  62. # The child indices of heap index pos are already heaps, and we want to make
  63. # a heap at index pos too. We do this by bubbling the smaller child of
  64. # pos up (and so on with that child's children, etc) until hitting a leaf,
  65. # then using _siftdown to move the oddball originally at index pos into place.
  66. #
  67. # We *could* break out of the loop as soon as we find a pos where newitem <=
  68. # both its children, but turns out that's not a good idea, and despite that
  69. # many books write the algorithm that way. During a heap pop, the last array
  70. # element is sifted in, and that tends to be large, so that comparing it
  71. # against values starting from the root usually doesn't pay (= usually doesn't
  72. # get us out of the loop early). See Knuth, Volume 3, where this is
  73. # explained and quantified in an exercise.
  74. #
  75. # Cutting the # of comparisons is important, since these routines have no
  76. # way to extract "the priority" from an array element, so that intelligence
  77. # is likely to be hiding in custom comparison methods, or in array elements
  78. # storing (priority, record) tuples. Comparisons are thus potentially
  79. # expensive.
  80. #
  81. # On random arrays of length 1000, making this change cut the number of
  82. # comparisons made by heapify() a little, and those made by exhaustive
  83. # heappop() a lot, in accord with theory. Here are typical results from 3
  84. # runs (3 just to demonstrate how small the variance is):
  85. #
  86. # Compares needed by heapify Compares needed by 1000 heappops
  87. # -------------------------- --------------------------------
  88. # 1837 cut to 1663 14996 cut to 8680
  89. # 1855 cut to 1659 14966 cut to 8678
  90. # 1847 cut to 1660 15024 cut to 8703
  91. #
  92. # Building the heap by using heappush() 1000 times instead required
  93. # 2198, 2148, and 2219 compares: heapify() is more efficient, when
  94. # you can use it.
  95. #
  96. # The total compares needed by list.sort() on the same lists were 8627,
  97. # 8627, and 8632 (this should be compared to the sum of heapify() and
  98. # heappop() compares): list.sort() is (unsurprisingly!) more efficient
  99. # for sorting.
  100. def _siftup(heap, pos):
  101. endpos = len(heap)
  102. startpos = pos
  103. newitem = heap[pos]
  104. # Bubble up the smaller child until hitting a leaf.
  105. childpos = 2*pos + 1 # leftmost child position
  106. while childpos < endpos:
  107. # Set childpos to index of smaller child.
  108. rightpos = childpos + 1
  109. if rightpos < endpos and not heap[childpos] < heap[rightpos]:
  110. childpos = rightpos
  111. # Move the smaller child up.
  112. heap[pos] = heap[childpos]
  113. pos = childpos
  114. childpos = 2*pos + 1
  115. # The leaf at pos is empty now. Put newitem there, and bubble it up
  116. # to its final resting place (by sifting its parents down).
  117. heap[pos] = newitem
  118. _siftdown(heap, startpos, pos)