Redis源码阅读(八) 快速列表

快速列表quicklist是一个由压缩列表ziplist组成的双端链表,链表中每个结点都有一个ziplist,每个ziplist有多个entry,主要用于列表键的底层实现,替换了原有的双端链表adlist。

1. 定义

  • 快速列表结点

    结点进行压缩后,zl指向的是压缩后的LZF结构体,其中compressd中存储的压缩的内容

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
typedef struct quicklistNode {                                                                                  
// 快速列表前驱结点
struct quicklistNode *prev;
// 快速列表后继节点
struct quicklistNode *next;
// 指向ziplist或者quicklistLZF(LZF压缩后)
unsigned char *zl;
// ziplist使用内存字节数
unsigned int sz; /* ziplist size in bytes */
// ziplist中的结点个数,占16bits,不超过32k
unsigned int count : 16; /* count of items in ziplist */
// 2bits, 表示是否采用LZF压缩算法压缩结点,1表示不压缩,2表示压缩
unsigned int encoding : 2; /* RAW==1 or LZF==2 */
// 2bits, 表示是否采用ziplist结构保存结点,1表示不采用,2表示使用
unsigned int container : 2; /* NONE==1 or ZIPLIST==2 */
// 1bit, true时表示曾经临时解压过,需要被压缩
unsigned int recompress : 1; /* was this node previous compressed? */
// 测试时使用,结点太小时不能压缩
unsigned int attempted_compress : 1; /* node can't compress; too small */
// 额外扩展位,10bits
unsigned int extra : 10; /* more bits to steal for future usage */
} quicklistNode;
  • 快速列表LZF算法压缩ziplist
1
2
3
4
5
6
typedef struct quicklistLZF {                                                                                  
// LZF压缩过后的ziplist大小
unsigned int sz; /* LZF size in bytes*/
// 保存压缩后的ziplist
char compressed[];
} quicklistLZF;
  • 快速列表

    • head,tail:头尾结点

    • count:所有结点中ziplist中entry的总和

    • len:quicklistNode结点个数

    • fill:16bit,表示ziplist的大小限制【redis.conf中list-max-ziplist-size可配置】

      • 负数表示每个快速列表结点中ziplist的大小限制

        -1: 4KB、-2: 8KB(默认) 、-3: 16KB 、-4: 32KB -5: 64KB

      • 正数表示每个快速列表结点中ziplist包含最多entry的个数,最大2^15

    • compress:16bit,表示列表两端不需要压缩的结点数【redis.conf中list-compress-depth可配置】

      • 0表示不压缩
      • 1表示列表前后两端各有1个结点不压缩
      • 2表示列表前后两端各有2个结点不压缩
      • ……
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
typedef struct quicklist {
// 头结点
quicklistNode *head;
// 尾结点
quicklistNode *tail;
// ziplist所有条目的数量
unsigned long count; /* total count of all entries in all ziplists */
// quicklist结点个数
unsigned long len; /* number of quicklistNodes */
// 16bits,用户设置(或默认),单个ziplist的大小限制
// 小于0时表示ziplist的大小限制,大于0时表示最多结点个数
int fill : 16; /* fill factor for individual nodes */
// 列表两端不被要是的结点数
unsigned int compress : 16; /* depth of end nodes not to compress;0=off */
} quicklist;
  • 快速列表迭代器
1
2
3
4
5
6
7
8
9
10
11
12
typedef struct quicklistIter {                                                                      
// 迭代器指向的列表
const quicklist *quicklist;
// 迭代器指向列表的当前结点
quicklistNode *current;
// 迭代器指向列表的ziplist中的结点
unsigned char *zi;
// ziplist的偏移量
long offset; /* offset in current ziplist */
// 迭代器方向
int direction;
} quicklistIter;
  • 快速列表结点entry
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
typedef struct quicklistEntry {                                                                     
// 所属的快速列表
const quicklist *quicklist;
// quicklistNode结点
quicklistNode *node;
// 指向的ziplist
unsigned char *zi;
// ziplist结点的字符串value
unsigned char *value;
// ziplist结点的整数value
long long longval;
// 当前ziplist结构的字节数大小
unsigned int sz;
// ziplist的相对偏移量
int offset;
} quicklistEntry;

2. 源码剖析

  • entry插入

    由于每个结点中的ziplist有大小限制,所以在node结点满的情况下,会做特殊处理

    1. 插入结点未满则插入结点的ziplist中
    2. 插入结点已满,则根据插入的位置判断前驱或后继是否已满,若未满则插入前驱或后继的ziplist
    3. 插入结点已满,前驱和后继也已满,则创建新的结点,插入到新结点的ziplist,并将该结点插入到快速列表中
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
REDIS_STATIC void _quicklistInsert(quicklist *quicklist, quicklistEntry *entry,
void *value, const size_t sz, int after) {
int full = 0, at_tail = 0, at_head = 0, full_next = 0, full_prev = 0;
int fill = quicklist->fill;

// entry的quicklistNode结点
quicklistNode *node = entry->node;
quicklistNode *new_node = NULL;

// 若entry没有quicklistNode结点,则需要创建新的
if (!node) {
/* we have no reference node, so let's create only node in the list */
D("No node given!");

// 创建新的结点
new_node = quicklistCreateNode();

// value插入到ziplist的头部
new_node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);

// 结点插入到quicklist中
__quicklistInsertNode(quicklist, NULL, new_node, after);
new_node->count++;
quicklist->count++;
return;
}

/* Populate accounting flags for easier boolean checks later */

// node结点已经没有空间
if (!_quicklistNodeAllowInsert(node, fill, sz)) {
D("Current node is full with count %d with requested fill %lu",
node->count, fill);
full = 1;
}

// 插入的位置是tail
if (after && (entry->offset == node->count)) {
D("At Tail of current ziplist");
at_tail = 1;
if (!_quicklistNodeAllowInsert(node->next, fill, sz)) {
D("Next node is full too.");
full_next = 1;
}
}

// 插入的位置是head
if (!after && (entry->offset == 0)) {
D("At Head");
at_head = 1;
if (!_quicklistNodeAllowInsert(node->prev, fill, sz)) {
D("Prev node is full too.");
full_prev = 1;
}
}

/* Now determine where and how to insert the new element */
if (!full && after) { // 结点可插入,并且是结点之后
D("Not full, inserting after current position.");

// node临时解压
quicklistDecompressNodeForUse(node);

// 计算要插入的位置
unsigned char *next = ziplistNext(node->zl, entry->zi);

if (next == NULL) { // 插入尾部
node->zl = ziplistPush(node->zl, value, sz, ZIPLIST_TAIL);
} else { // 插入next位置
node->zl = ziplistInsert(node->zl, next, value, sz);
}
node->count++;
quicklistNodeUpdateSz(node);
quicklistRecompressOnly(quicklist, node);
} else if (!full && !after) { // 结点可插入,并且是结点之前
D("Not full, inserting before current position.");

// 临时解压
quicklistDecompressNodeForUse(node);

// 插入结点到ziplist
node->zl = ziplistInsert(node->zl, entry->zi, value, sz);

// 更新计数
node->count++;

// 更新内存大小
quicklistNodeUpdateSz(node);

// 重新压缩
quicklistRecompressOnly(quicklist, node);
} else if (full && at_tail && node->next && !full_next && after) {
// node结点已满,要插入尾部,并且node的后继未满,则插入后继的头部
/* If we are: at tail, next has free space, and inserting after:
* - insert entry at head of next node. */
D("Full and tail, but next isn't full; inserting next node head");
// 后继节点
new_node = node->next;

// 临时解压node->next结点
quicklistDecompressNodeForUse(new_node);

// 插入到node->next的头部位置
new_node->zl = ziplistPush(new_node->zl, value, sz, ZIPLIST_HEAD);

// 更改计数
new_node->count++;

// 更新quicklist大小
quicklistNodeUpdateSz(new_node);

// 重新压缩
quicklistRecompressOnly(quicklist, new_node);
} else if (full && at_head && node->prev && !full_prev && !after) {
// node结点已满,要插入头部,并且node的前驱未满,则插入前驱的尾部
/* If we are: at head, previous has free space, and inserting before:
* - insert entry at tail of previous node. */
D("Full and head, but prev isn't full, inserting prev node tail");

// 前驱结点
new_node = node->prev;

// 临时解药
quicklistDecompressNodeForUse(new_node);

// 插入到前驱的尾部
new_node->zl = ziplistPush(new_node->zl, value, sz, ZIPLIST_TAIL);

// 更新计数
new_node->count++;

// 更新内存大小
quicklistNodeUpdateSz(new_node);

// 重新压缩
quicklistRecompressOnly(quicklist, new_node);
} else if (full && ((at_tail && node->next && full_next && after) ||
(at_head && node->prev && full_prev && !after))) {
// node结点已满
// 1. 插入尾部,但后继已满; 2. 插入头部,但前驱已满
// 所以创建新结点
/* If we are: full, and our prev/next is full, then:
* - create new node and attach to quicklist */
D("\tprovisioning new node...");

// 创建新结点
new_node = quicklistCreateNode();

// 插入到新结点ziplist的头部
new_node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);

// 更新计数
new_node->count++;

// 更新内存大小
quicklistNodeUpdateSz(new_node);

// 插入新结点到quicklist
__quicklistInsertNode(quicklist, node, new_node, after);
} else if (full) {
// node结点已满,需要分割结点
/* else, node is full we need to split it. */
/* covers both after and !after cases */
D("\tsplitting node...");

// 临时解压
quicklistDecompressNodeForUse(node);

// 分割结点
new_node = _quicklistSplitNode(node, entry->offset, after);

// 插入到新结点
new_node->zl = ziplistPush(new_node->zl, value, sz,
after ? ZIPLIST_HEAD : ZIPLIST_TAIL);
// 更新计数
new_node->count++;

// 更新内存大小
quicklistNodeUpdateSz(new_node);

// 插入分割后的新结点
__quicklistInsertNode(quicklist, node, new_node, after);

// 合并
_quicklistMergeNodes(quicklist, node);
}

// 更新quicklist计数
quicklist->count++;
}
  • entry删除

    根据范围进行删除,若结点中ziplist全部删除,则需要删除该结点

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
int quicklistDelRange(quicklist *quicklist, const long start,
const long count) {
if (count <= 0)
return 0;

// 要删除的个数
unsigned long extent = count; /* range is inclusive of start position */

if (start >= 0 && extent > (quicklist->count - start)) {
// start 是正数,正向删除
/* if requesting delete more elements than exist, limit to list size. */
extent = quicklist->count - start;
} else if (start < 0 && extent > (unsigned long)(-start)) {
// start 是负数,反向删除
/* else, if at negative offset, limit max size to rest of list. */
extent = -start; /* c.f. LREM -29 29; just delete until end. */
}

quicklistEntry entry;
if (!quicklistIndex(quicklist, start, &entry))
return 0;

D("Quicklist delete request for start %ld, count %ld, extent: %ld", start,
count, extent);
quicklistNode *node = entry.node;

/* iterate over next nodes until everything is deleted. */
// 遍历要删除的个数
while (extent) {
// 备份下一结点
quicklistNode *next = node->next;

unsigned long del;
int delete_entire_node = 0;
if (entry.offset == 0 && extent >= node->count) {
// 偏移量为0,删除个数大于总个数,表示全部删除
/* If we are deleting more than the count of this node, we
* can just delete the entire node without ziplist math. */
delete_entire_node = 1;
del = node->count;
} else if (entry.offset >= 0 && extent >= node->count) {
// 偏移量不为0,删除个数大于总个数,表示总偏移量开始删除所有
/* If deleting more nodes after this one, calculate delete based
* on size of current node. */
del = node->count - entry.offset;
} else if (entry.offset < 0) {
// 偏移量为负数,从尾结点向前删除
/* If offset is negative, we are in the first run of this loop
* and we are deleting the entire range
* from this start offset to end of list. Since the Negative
* offset is the number of elements until the tail of the list,
* just use it directly as the deletion count. */
del = -entry.offset;

/* If the positive offset is greater than the remaining extent,
* we only delete the remaining extent, not the entire offset.
*/
if (del > extent)
del = extent;
} else {
// 删除的结点在中间
/* else, we are deleting less than the extent of this node, so
* use extent directly. */
del = extent;
}

D("[%ld]: asking to del: %ld because offset: %d; (ENTIRE NODE: %d), "
"node count: %u",
extent, del, entry.offset, delete_entire_node, node->count);

// 表示该结点的全部删除
if (delete_entire_node) {
__quicklistDelNode(quicklist, node);
} else {
// 临时解压
quicklistDecompressNodeForUse(node);
// 删除ziplist的一段范围
node->zl = ziplistDeleteRange(node->zl, entry.offset, del);
// 更新计数
quicklistNodeUpdateSz(node);
node->count -= del;
quicklist->count -= del;
// 删除为空的node
quicklistDeleteIfEmpty(quicklist, node);
if (node)
// node未删除,则重新压缩
quicklistRecompressOnly(quicklist, node);
}

extent -= del;
node = next;
entry.offset = 0;
}
return 1;
}