Common Algorithms
Merge Sort
Large-scale data processing requires sorting algorithms with predictable, efficient performance. Merge sort guarantees O(n log n) time complexity regardless of input order, making it the algorithm of choice for sorting large datasets, external files, and situations where consistent performance is critical.
In-Place Variation
basic.py
# Basic merge sort
def merge_sort(arr):
"""Merge sort implementation"""
# Base case: single element is sorted
if len(arr) <= 1:
return arr
# Divide
mid = len(arr) // 2
left = arr[:mid]
right = arr[mid:]
# Recursively sort
left = merge_sort(left)
right = merge_sort(right)
# Merge sorted halves
return merge(left, right)
def merge(left, right):
"""Merge two sorted lists"""
result = []
i = j = 0
# Compare and merge
while i < len(left) and j < len(right):
if left[i] <= right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
# Add remaining elements
result.extend(left[i:])
result.extend(right[j:])
return result
# Test merge sort
numbers =
print("Before:", numbers)
sorted_numbers = merge_sort(numbers)
print("After: ", sorted_numbers)
# Basic merge sort
def merge_sort(arr):
"""Merge sort implementation"""
# Base case: single element is sorted
if len(arr) <= 1:
return arr
# Divide
mid = len(arr) // 2
left = arr[:mid]
right = arr[mid:]
# Recursively sort
left = merge_sort(left)
right = merge_sort(right)
# Merge sorted halves
return merge(left, right)
def merge(left, right):
"""Merge two sorted lists"""
result = []
i = j = 0
# Compare and merge
while i < len(left) and j < len(right):
if left[i] <= right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
# Add remaining elements
result.extend(left[i:])
result.extend(right[j:])
return result
# Test merge sort
numbers =
print("Before:", numbers)
sorted_numbers = merge_sort(numbers)
print("After: ", sorted_numbers)
# Basic merge sort
def merge_sort(arr):
"""Merge sort implementation"""
# Base case: single element is sorted
if len(arr) <= 1:
return arr
# Divide
mid = len(arr) // 2
left = arr[:mid]
right = arr[mid:]
# Recursively sort
left = merge_sort(left)
right = merge_sort(right)
# Merge sorted halves
return merge(left, right)
def merge(left, right):
"""Merge two sorted lists"""
result = []
i = j = 0
# Compare and merge
while i < len(left) and j < len(right):
if left[i] <= right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
# Add remaining elements
result.extend(left[i:])
result.extend(right[j:])
return result
# Test merge sort
numbers =
print("Before:", numbers)
sorted_numbers = merge_sort(numbers)
print("After: ", sorted_numbers)
merge_only.py
# Merge two sorted lists
def merge_two_sorted(left, right):
"""Merge two already sorted lists"""
result = []
i = j = 0
# Compare and merge
while i < len(left) and j < len(right):
if left[i] <= right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
# Add remaining elements
result.extend(left[i:])
result.extend(right[j:])
return result
# Test merging sorted lists
left = [1, 3, 5, 7]
right = [2, 4, 6, 8]
print("Left: ", left)
print("Right:", right)
merged = merge_two_sorted(left, right)
print("Merged:", merged)
# Different sizes
arr1 = [1, 5, 9]
arr2 = [2, 3, 4, 6, 7]
print("\nLeft: ", arr1)
print("Right:", arr2)
merged2 = merge_two_sorted(arr1, arr2)
print("Merged:", merged2)
trace.py
# Merge sort with trace
depth = 0
def merge_sort_trace(arr):
"""Merge sort with step-by-step trace"""
global depth
if len(arr) <= 1:
return arr
# Trace divide
print(" " * depth + f"Divide: {arr}")
depth += 1
mid = len(arr) // 2
left = merge_sort_trace(arr[:mid])
right = merge_sort_trace(arr[mid:])
result = merge_trace(left, right)
depth -= 1
print(" " * depth + f"Merged: {result}")
return result
def merge_trace(left, right):
"""Merge with trace"""
global depth
print(" " * depth + f" Merge {left} and {right}")
result = []
i = j = 0
while i < len(left) and j < len(right):
if left[i] <= right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
result.extend(left[i:])
result.extend(right[j:])
return result
# Test with trace
numbers = [5, 2, 8, 1, 9]
print("Initial:", numbers)
print()
sorted_numbers = merge_sort_trace(numbers)
print()
print("Final: ", sorted_numbers)
inplace.py
# In-place merge sort
def merge_sort_inplace(arr, left=0, right=None):
"""Merge sort that modifies array in place"""
if right is None:
right = len(arr) - 1
# Divide and conquer
if left < right:
mid = left + (right - left) // 2
merge_sort_inplace(arr, left, mid)
merge_sort_inplace(arr, mid + 1, right)
merge_inplace(arr, left, mid, right)
def merge_inplace(arr, left, mid, right):
"""Merge in place using temporary list"""
# Create temporary arrays
left_arr = arr[left:mid + 1]
right_arr = arr[mid + 1:right + 1]
# Merge back into original
i = j = 0
k = left
while i < len(left_arr) and j < len(right_arr):
if left_arr[i] <= right_arr[j]:
arr[k] = left_arr[i]
i += 1
else:
arr[k] = right_arr[j]
j += 1
k += 1
# Copy remaining
while i < len(left_arr):
arr[k] = left_arr[i]
i += 1
k += 1
while j < len(right_arr):
arr[k] = right_arr[j]
j += 1
k += 1
# Test in-place version
numbers = [38, 27, 43, 3, 9, 82, 10]
print("Before:", numbers)
merge_sort_inplace(numbers)
print("After: ", numbers)
Characteristics
- Time complexity: O(n log n) - always
- Space complexity: O(n) - requires temporary lists
- Stable: maintains relative order of equal elements
- Not in-place: requires additional memory
- Predictable: always same performance
When to Use
- Need guaranteed O(n log n) performance
- Stability matters
- Large datasets
- External sorting
divide_conquer_sort
Split the problem into smaller parts, solve each, then combine the solutions
merge_operation
Combine two sorted lists into one sorted list by comparing front elements
guaranteed_performance
O(n log n) in all cases - best, average, and worst
Exercise: practical.py
Implement merge sort to sort a list of log entries by timestamp, then merge two sorted log files into one