Collections
Sets
Unique Collections
You're tracking which pages a user has visited. You don't want duplicates, and
you need fast "have they seen this?" checks. Sets automatically ignore duplicates
and provide O(1) membership testing with in.
Collect unique tags
Add items to a set - duplicates are ignored.
def main():
# Create a set of tags for a blog post
tags = set()
print("=== Adding Blog Tags ===")
# Add tags
tags.add("python")
print(f"Added 'python': {tags}")
tags.add("programming")
print(f"Added 'programming': {tags}")
tags.add("tutorial")
print(f"Added 'tutorial': {tags}")
# Try to add duplicate
tags.add("python")
print(f"Added 'python' again: {tags}")
print("Duplicate ignored!")
# Add more tags
tags.add("beginner")
tags.add("coding")
tags.add("python")
print(f"\nWith more tags: {tags}")
print("\n=== Blog Post Tags ===")
print(f"Total unique tags: {len(tags)}")
print(f"Tags: {tags}")
# Display as hashtags
print("\n=== Formatted ===")
hashtags = " ".join(f"#{tag}" for tag in tags)
print(hashtags)
# Alternative creation with literal
print("\n=== Set Literal ===")
colors = {"red", "green", "blue"} # Note: {} is empty dict, not set!
print(f"Colors: {colors}")
main()
Use add() to add elements. Duplicates are silently ignored.
Fast membership check
Test if an item is in the set.
def main():
# Create an allowlist of premium users
premium_users = {"alice", "bob", "carol", "david"}
print("=== Premium Membership ===")
print(f"Premium users: {premium_users}")
# Check if user has access
current_user =
print("\n=== Access Check ===")
print(f"User: {current_user}")
if current_user in premium_users:
print("✓ Premium access granted!")
print("Welcome to exclusive content.")
else:
print("✗ Not a premium user.")
print("Upgrade to access premium features!")
# Blocked users check
blocked_users = {"spammer", "troll", "eve"}
print("\n=== Security Check ===")
if current_user in blocked_users:
print("⛔ User is blocked!")
else:
print("✓ User is not blocked.")
# Check multiple users
print("\n=== Batch Check ===")
users_to_check = ["alice", "eve", "bob", "frank"]
for user in users_to_check:
status = "Premium" if user in premium_users else "Regular"
print(f"{user}: {status}")
# Speed comparison note
print("\n=== Performance Note ===")
print("'in' check for set: O(1) - instant!")
print("'in' check for list: O(n) - slower")
main()
def main():
# Create an allowlist of premium users
premium_users = {"alice", "bob", "carol", "david"}
print("=== Premium Membership ===")
print(f"Premium users: {premium_users}")
# Check if user has access
current_user =
print("\n=== Access Check ===")
print(f"User: {current_user}")
if current_user in premium_users:
print("✓ Premium access granted!")
print("Welcome to exclusive content.")
else:
print("✗ Not a premium user.")
print("Upgrade to access premium features!")
# Blocked users check
blocked_users = {"spammer", "troll", "eve"}
print("\n=== Security Check ===")
if current_user in blocked_users:
print("⛔ User is blocked!")
else:
print("✓ User is not blocked.")
# Check multiple users
print("\n=== Batch Check ===")
users_to_check = ["alice", "eve", "bob", "frank"]
for user in users_to_check:
status = "Premium" if user in premium_users else "Regular"
print(f"{user}: {status}")
# Speed comparison note
print("\n=== Performance Note ===")
print("'in' check for set: O(1) - instant!")
print("'in' check for list: O(n) - slower")
main()
item in s is O(1) - much faster than list's O(n) search.
Remove an item
Remove elements with remove() or discard().
def main():
cart = {"Laptop", "Mouse", "Keyboard", "Monitor", "Headphones"}
print("=== Shopping Cart ===")
print(f"Items: {cart}")
print(f"Count: {len(cart)}")
# remove() vs discard()
print("\n=== Removing Items ===")
# remove() raises KeyError if not found
to_remove = "Mouse"
cart.remove(to_remove)
print(f"Removed '{to_remove}': {cart}")
# discard() does nothing if not found (no error!)
not_in_cart = "Tablet"
cart.discard(not_in_cart)
print(f"Discard '{not_in_cart}' (not in cart): no error!")
print(f"Cart unchanged: {cart}")
# This would cause error:
# cart.remove("Tablet") # KeyError!
# Remove more items
cart.discard("Keyboard")
cart.discard("Monitor")
print(f"\nAfter more removals: {cart}")
# Clear all
print("\n=== Clearing Cart ===")
print(f"Before clear: {len(cart)} items")
cart.clear()
print(f"After clear: {len(cart)} items")
print(f"Cart is empty: {len(cart) == 0}")
# pop() - remove arbitrary item
demo_set = {1, 2, 3, 4, 5}
print("\n=== pop() Demo ===")
print(f"Set: {demo_set}")
popped = demo_set.pop() # Removes and returns ONE item
print(f"Popped: {popped}")
print(f"Remaining: {demo_set}")
main()
remove() raises KeyError if missing. discard() doesn't.
Combine two sets
Create a union of two sets.
def main():
# Two teams' skill sets
team_a = {"Java", "Python", "SQL", "Git"}
team_b = {"JavaScript", "Python", "CSS", "Git"}
print("=== Team Skills ===")
print(f"Team A: {team_a}")
print(f"Team B: {team_b}")
# Union: All skills combined
all_skills = team_a | team_b # Or: team_a.union(team_b)
print("\n=== Union (All Skills) ===")
print(f"Combined: {all_skills}")
print("(Duplicates automatically removed)")
# Alternative syntax
all_skills_method = team_a.union(team_b)
print(f"Using .union(): {all_skills_method}")
# Count unique skills
print("\n=== Statistics ===")
print(f"Team A skills: {len(team_a)}")
print(f"Team B skills: {len(team_b)}")
print(f"Unique combined: {len(all_skills)}")
overlap = len(team_a) + len(team_b) - len(all_skills)
print(f"Overlap count: {overlap}")
# Practical example: Merge user permissions
print("\n=== Permission Merge ===")
basic_perms = {"read", "comment"}
editor_perms = {"read", "write", "edit", "comment"}
admin_perms = {"read", "write", "edit", "delete", "manage"}
# Build editor permissions (basic + editor)
full_editor_perms = basic_perms | editor_perms
print(f"Editor has: {full_editor_perms}")
# Build admin permissions (all combined)
full_admin_perms = basic_perms | editor_perms | admin_perms
print(f"Admin has: {full_admin_perms}")
# Update in place with |=
user_perms = {"read"}
user_perms |= {"comment", "vote"} # Add more
print(f"\nUser perms after |=: {user_perms}")
main()
Use | operator or union() method. Result has all unique elements.
Remove duplicates from list
Convert a list to set to eliminate duplicates.
def main():
# List with duplicate entries
emails = [
"alice@example.com",
"bob@example.com",
"alice@example.com", # duplicate
"carol@example.com",
"bob@example.com", # duplicate
"david@example.com",
"alice@example.com" # duplicate
]
print("=== Email List (with duplicates) ===")
print(f"Emails: {emails}")
print(f"Count: {len(emails)}")
# Remove duplicates using set
unique_emails = set(emails)
print("\n=== After Removing Duplicates ===")
print(f"Unique: {unique_emails}")
print(f"Count: {len(unique_emails)}")
print(f"Removed {len(emails) - len(unique_emails)} duplicates")
# Convert back to list if needed
cleaned_list = list(unique_emails)
print(f"\nAs list: {cleaned_list}")
# PRESERVE ORDER: use dict.fromkeys()
print("\n=== Preserving Order ===")
ordered_unique = list(dict.fromkeys(emails))
print(f"Order preserved: {ordered_unique}")
print("(dict maintains insertion order in Python 3.7+)")
# One-liner pattern
print("\n=== One-liner Dedup ===")
nums = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]
unique_nums = list(set(nums))
print(f"Original: {nums}")
print(f"Unique (order lost): {unique_nums}")
# Duplicate analysis
print("\n=== Duplicate Analysis ===")
from collections import Counter
counts = Counter(emails)
for email, count in counts.items():
if count > 1:
print(f"{email} appeared {count} times")
main()
set(list) creates a set, automatically removing duplicates.
Exercise: set_operations.py
Explore intersection, difference, and symmetric difference