Python中的defaultdict详解
defaultdict是Python标准库collections模块中提供的一个非常有用的字典子类,它自动为不存在的键提供默认值,避免了处理缺失键时的KeyError异常。
defaultdict基本概念普通字典(dict)在访问不存在的键时会抛出KeyError,而defaultdict会自动为不存在的键创建默认值。
from collections import defaultdict
# 普通字典
normal_dict = {}
# print(normal_dict['missing_key']) # 抛出KeyError
# defaultdict
default_dict = defaultdict(int)
print(default_dict['missing_key']) # 输出: 0defaultdict(default_factory, **kwargs)default_factory: 一个可调用对象(函数、类等),用于为不存在的键提供默认值**kwargs: 可选的初始键值对defaultdict的常见用法from collections import defaultdict
# 默认值为0
int_default = defaultdict(int)
print(int_default['a']) # 输出: 0
# 默认值为空列表
list_default = defaultdict(list)
print(list_default['b']) # 输出: []
# 默认值为空集合
set_default = defaultdict(set)
print(set_default['c']) # 输出: set()
# 默认值为空字符串
str_default = defaultdict(str)
print(str_default['d']) # 输出: ''
# 默认值为False
bool_default = defaultdict(bool)
print(bool_default['e']) # 输出: Falsefrom collections import defaultdict
# 自定义默认值函数
def default_value():
return "Unknown"
custom_default = defaultdict(default_value)
print(custom_default['name']) # 输出: "Unknown"
# 使用lambda表达式
lambda_default = defaultdict(lambda: 100)
print(lambda_default['score']) # 输出: 100from collections import defaultdict
# 嵌套字典
nested_dict = defaultdict(lambda: defaultdict(int))
nested_dict['group1']['item1'] += 1
print(nested_dict) # 输出: defaultdict(<function <lambda> at ...>, {'group1': defaultdict(<class 'int'>, {'item1': 1}))
# 字典的字典
graph = defaultdict(dict)
graph['A']['B'] = 5
graph['A']['C'] = 3
print(graph) # 输出: defaultdict(<class 'dict'>, {'A': {'B': 5, 'C': 3}})from collections import defaultdict
words = ["apple", "banana", "apple", "orange", "banana", "apple"]
word_count = defaultdict(int)
for word in words:
word_count[word] += 1
print(word_count) # 输出: defaultdict(<class 'int'>, {'apple': 3, 'banana': 2, 'orange': 1})from collections import defaultdict
students = [
('Alice', 'Math'),
('Bob', 'Physics'),
('Charlie', 'Math'),
('David', 'Chemistry'),
('Eve', 'Physics')
]
subject_students = defaultdict(list)
for name, subject in students:
subject_students[subject].append(name)
print(subject_students)
# 输出: defaultdict(<class 'list'>, {
# 'Math': ['Alice', 'Charlie'],
# 'Physics': ['Bob', 'Eve'],
# 'Chemistry': ['David']
# })from collections import defaultdict
# 图的邻接表表示
graph = defaultdict(list)
edges = [(1, 2), (1, 3), (2, 4), (3, 4), (4, 5)]
for u, v in edges:
graph[u].append(v)
graph[v].append(u) # 无向图需要双向添加
print(graph)
# 输出: defaultdict(<class 'list'>, {
# 1: [2, 3],
# 2: [1, 4],
# 3: [1, 4],
# 4: [2, 3, 5],
# 5: [4]
# })from collections import defaultdict
# 一个键对应多个值
multi_dict = defaultdict(set)
data = [('a', 1), ('b', 2), ('a', 3), ('c', 4), ('b', 2)]
for key, value in data:
multi_dict[key].add(value)
print(multi_dict)
# 输出: defaultdict(<class 'set'>, {'a': {1, 3}, 'b': {2}, 'c': {4}})defaultdict的高级用法from collections import defaultdict
class Person:
def __init__(self, name=None):
self.name = name
def __repr__(self):
return f"Person(name={self.name!r})"
person_dict = defaultdict(Person)
person_dict['alice'].name = "Alice Smith"
person_dict['bob'].name = "Bob Johnson"
print(person_dict['alice']) # 输出: Person(name='Alice Smith')
print(person_dict['charlie']) # 输出: Person(name=None)from collections import defaultdict
import random
def random_color():
return "#{:06x}".format(random.randint(0, 0xFFFFFF))
color_dict = defaultdict(random_color)
print(color_dict['background']) # 输出随机颜色如: "#3a7b9f"
print(color_dict['text']) # 输出另一个随机颜色from collections import defaultdict
def make_default(value):
return lambda: value
config = defaultdict(make_default("N/A"))
print(config['timeout']) # 输出: "N/A"
config['timeout'] = 30
print(config['timeout']) # 输出: 30
print(config['retries']) # 输出: "N/A"defaultdict比普通字典稍慢,因为需要调用默认工厂函数key in defaultdict也会创建默认值
d = defaultdict(list)
if 'key' in d: # 这会创建空列表作为'key'的值
print(d['key'])defaultdict在序列化(pickle)时不会保存默认工厂函数dict.setdefault()# 使用setdefault实现类似功能
normal_dict = {}
for word in words:
normal_dict.setdefault(word, 0)
normal_dict[word] += 1dict.get() with default# 使用get方法
normal_dict = {}
for word in words:
normal_dict[word] = normal_dict.get(word, 0) + 1# 3.9+版本可以使用合并运算符
normal_dict = {}
for word in words:
normal_dict |= {word: normal_dict.get(word, 0) + 1}defaultdict是Python中一个强大而灵活的工具,特别适合以下场景:
选择defaultdict而非普通字典的主要优势在于代码的简洁性和可读性,但要注意它的特性可能带来的副作用。在性能关键的场景中,可能需要对比测试defaultdict与普通字典加setdefault()或get()的性能差异。