Redis源码剖析 - Redis内置数据结构之字符串sds

Posted Fred^_^

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Redis源码剖析 - Redis内置数据结构之字符串sds相关的知识,希望对你有一定的参考价值。

原创作品,转载请标明:http://blog.csdn.net/Xiejingfa/article/details/50972592

今天花了一个晚上的时间分析了Redis中字符串操作的实现,源文件为sds.h和sds.c。总结如下:

1、在C语言中,字符串是以字符数组的形式体现的(以’\\0’为结束符),Redis中的字符串定义如下:

/* 字符串结构体(字符串就是字符数组) */
struct sdshdr 
    // 字符串当前长度
    unsigned int len;
    // 剩余可用长度
    unsigned int free;
    // 字符数组(具体存放字符串的地方)
    char buf[];
;

在实际使用时,Redis没有直接使用sdshdr结构,而是定义了sds类型来操作sdshdr结构:

/* 为char *类型定义别名为sds */
typedef char *sds;

其中sds指向sdshdr结构的buf[],通过如下操作来获得整个sdshdr结构:

//  sizeof(struct sdshdr)的值为8
/*  这里为什么用s-(sizeof(struct sdshdr))就得到sdshdr *指针?
    从后面我们可以看到sds指向sdshdr结构的buf[]字符数组,所以
    s-(sizeof(struct sdshdr))就是sdshdr结构的地址。
*/
struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));

2、Redis的字符替换操作由sdsmapchars函数实现,其原型如下:

sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen)

sdsmapchars执行的字符替换操作与我们常规理解的字符替换有些不同,它将sds中出现在from中的字符替换为to对应的字符。

比如:
from = “ho”
to = “01”
s = “hello”
经过sdsmapchars处理后s = “0ell1”

3、Redis中定义了sdscatvprintf来将格式化输出的字符串连接到源字符串中,其原型如下:

sds sdscatvprintf(sds s, const char *fmt, va_list ap)

该方法比较有意思,采用了一种类似启发式的方法不断进行尝试以确定一个合适大小的缓冲区来存放格式化输出的字符串。具体可以参考后面源码。

Redis中定义的字符串操作函数都比较简单,我已经做了比较详细的注释。

sds.h头文件定义如下:

/* SDSLib, A C dynamic strings library
 *
 * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   * Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *   * Neither the name of Redis nor the names of its contributors may be used
 *     to endorse or promote products derived from this software without
 *     specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef __SDS_H
#define __SDS_H

/* 最大的可分配内存1024*1024bytes = 1M */
#define SDS_MAX_PREALLOC (1024*1024)

#include <sys/types.h>
#include <stdarg.h>

/* 为char *类型定义别名为sds */
typedef char *sds;

/* 字符串结构体(字符串就是字符数组) */
struct sdshdr 
    // 字符串当前长度
    unsigned int len;
    // 剩余可用长度
    unsigned int free;
    // 字符数组(具体存放字符串的地方)
    char buf[];
;

/*  下面两个是static函数,仅在本文件可见 */

/* 获取字符串长度 */
static inline size_t sdslen(const sds s) 
    //  sizeof(struct sdshdr)的值为8
    /*  这里为什么用s-(sizeof(struct sdshdr))就得到sdshdr *指针?
        从后面我们可以看到sds指向sdshdr结构的buf[]字符数组,所以
        s-(sizeof(struct sdshdr))就是sdshdr结构的地址。
    */
    struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
    return sh->len;


/* 获取字符数组中的可用空间 */
static inline size_t sdsavail(const sds s) 
    //  sizeof(struct sdshdr)的值为8
    struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
    return sh->free;


/* 下面是字符串的操作函数,从其实现上看sds指向sdshdr结构的buf[]字符数组。所以
    所“创建sds”和“创建sdshdr结构”是一致的。
*/


/* 根据给初始化参数init和给定长度创建新的字符串 */
sds sdsnewlen(const void *init, size_t initlen);
/* 根据给定的值创建sds */
sds sdsnew(const char *init);
/* 创建一个空字符串 */
sds sdsempty(void);
/* 获取sds长度*/
size_t sdslen(const sds s);
/* 复制一个sds */
sds sdsdup(const sds s);
/* 释放sds的内存空间 */
void sdsfree(sds s);
/* 获取sds的可用空间 */
size_t sdsavail(const sds s);
/* 扩展字符串到指定的长度 */
sds sdsgrowzero(sds s, size_t len);
/* 字符串连接操作 */
sds sdscatlen(sds s, const void *t, size_t len);
/* 字符串连接操作 */
sds sdscat(sds s, const char *t);
/* 字符串连接操作 */
sds sdscatsds(sds s, const sds t);
/* 字符串复制操作 */
sds sdscpylen(sds s, const char *t, size_t len);
/* 字符串复制操作 */
sds sdscpy(sds s, const char *t);

/* 字符串格式化输出操作 */
sds sdscatvprintf(sds s, const char *fmt, va_list ap);
#ifdef __GNUC__
sds sdscatprintf(sds s, const char *fmt, ...)
    __attribute__((format(printf, 2, 3)));
#else
 /* 格式化输入,类似C语言中的sprintf函数 */
sds sdscatprintf(sds s, const char *fmt, ...);
#endif

/* 字符串格式化输出 */
sds sdscatfmt(sds s, char const *fmt, ...);
/* 字符串的trim操作,高级语言普遍提供 */
sds sdstrim(sds s, const char *cset);
/* 字符串截取 */
void sdsrange(sds s, int start, int end);
/* 更新字符串的长度,考虑下面这种情况
    s = sdsnew("foobar");
    s[2] = '\\0';
    这是就需要调用sdsupdatelen(s)更新字符串长度,底层是使用strlen计算字符串长度
 */
void sdsupdatelen(sds s);
/* 清空字符串 */
void sdsclear(sds s);
/* 字符串比较操作 */
int sdscmp(const sds s1, const sds s2);
/* 字符串分割操作 */
sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count);
/* 释放sdssplitlen函数返回的sds数组 */
void sdsfreesplitres(sds *tokens, int count);
/* 统一转换为小写字符 */
void sdstolower(sds s);
/* 统一转换为大写字符 */
void sdstoupper(sds s);
/* 将一个long long类型的数字转换为字符串 */
sds sdsfromlonglong(long long value);
/* 添加引用字符串 */
sds sdscatrepr(sds s, const char *p, size_t len);
/* 参数解析 */
sds *sdssplitargs(const char *line, int *argc);
/* 字符替换操作 */
sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
/* 将一个C风格的字符串数组用指定分隔符连接成一个字符串*/
sds sdsjoin(char **argv, int argc, char *sep);

/* Low level functions exposed to the user API */

/* 确保sds中的可用空间大于或等于addlen,如果当前字符串可用空间不满足则重新配置空间 */
sds sdsMakeRoomFor(sds s, size_t addlen);
/* 根据给定参数incr调整当前长度和可用空间大小 */
void sdsIncrLen(sds s, int incr);
/* 释放字符数组buf中的多余空间,使其刚好能存放当前字符数 */
sds sdsRemoveFreeSpace(sds s);
/* 获取sds实际分配的空间大小(包括最后的'\\0'结束符) */
size_t sdsAllocSize(sds s);

#endif

sds.c源文件定义如下:

/* SDSLib, A C dynamic strings library
 *
 * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   * Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *   * Neither the name of Redis nor the names of its contributors may be used
 *     to endorse or promote products derived from this software without
 *     specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include "sds.h"
#include "zmalloc.h"

/* Create a new sds string with the content specified by the 'init' pointer
 * and 'initlen'.
 * If NULL is used for 'init' the string is initialized with zero bytes.
 *
 * The string is always null-termined (all the sds strings are, always) so
 * even if you create an sds string with:
 *
 * mystring = sdsnewlen("abc",3);
 *
 * You can print the string with printf() as there is an implicit \\0 at the
 * end of the string. However the string is binary safe and can contain
 * \\0 characters in the middle, as the length is stored in the sds header. */
sds sdsnewlen(const void *init, size_t initlen) 
    struct sdshdr *sh;

    // 注意下面调用的是两个不同的空间分配函数(以后我们再分析)
    // +1是因为字符串需要额外一个位置存放结束符‘\\0’
    if (init) 
        sh = zmalloc(sizeof(struct sdshdr)+initlen+1);
     else 
        sh = zcalloc(sizeof(struct sdshdr)+initlen+1);
    
    // 分配失败直接返回
    if (sh == NULL) return NULL;
    // 设置长度和可用空间
    sh->len = initlen;
    sh->free = 0;
    // 如果提供了字符串的初始值则复制一份
    if (initlen && init)
        memcpy(sh->buf, init, initlen);
    sh->buf[initlen] = '\\0';
    // 注意返回值,返回的是sdshdr结构的buf[]
    return (char*)sh->buf;


/* Create an empty (zero length) sds string. Even in this case the string
 * always has an implicit null term. */
/* 创建一个空字符串,实际调用sdsnewlen函数 */
sds sdsempty(void) 
    return sdsnewlen("",0);


/* Create a new sds string starting from a null terminated C string. */
/* 根据一个给定的C风格字符串创建一个sds,实际调用sdsnewlen函数 */
sds sdsnew(const char *init) 
    // 
    size_t initlen = (init == NULL) ? 0 : strlen(init);
    return sdsnewlen(init, initlen);


/* Duplicate an sds string. */
/* 复制一个sds,实际上就是根据已有sds创建一个新的sds */
sds sdsdup(const sds s) 
    return sdsnewlen(s, sdslen(s));


/* Free an sds string. No operation is performed if 's' is NULL. */
/* 释放sds的内存空间 */
void sdsfree(sds s) 
    if (s == NULL) return;
    zfree(s-sizeof(struct sdshdr));


/* Set the sds string length to the length as obtained with strlen(), so
 * considering as content only up to the first null term character.
 *
 * This function is useful when the sds string is hacked manually in some
 * way, like in the following example:
 *
 * s = sdsnew("foobar");
 * s[2] = '\\0';
 * sdsupdatelen(s);
 * printf("%d\\n", sdslen(s));
 *
 * The output will be "2", but if we comment out the call to sdsupdatelen()
 * the output will be "6" as the string was modified but the logical length
 * remains 6 bytes. */

/* 更新字符串的长度,考虑下面这种情况
    s = sdsnew("foobar");
    s[2] = '\\0';
    这是就需要调用sdsupdatelen(s)更新字符串长度,底层是使用strlen计算字符串长度
 */
void sdsupdatelen(sds s) 
    struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
    int reallen = strlen(s);
    sh->free += (sh->len-reallen);
    sh->len = reallen;


/* Modify an sds string in-place to make it empty (zero length).
 * However all the existing buffer is not discarded but set as free space
 * so that next append operations will not require allocations up to the
 * number of bytes previously available. */
 /* 清空字符串 */
void sdsclear(sds s) 
    struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
    // 重置可空空间、当前长度
    sh->free += sh->len;
    sh->len = 0;
    sh->buf[0] = '\\0';


/* Enlarge the free space at the end of the sds string so that the caller
 * is sure that after calling this function can overwrite up to addlen
 * bytes after the end of the string, plus one more byte for nul term.
 *
 * Note: this does not change the *length* of the sds string as returned
 * by sdslen(), but only the free buffer space we have. */
 /* 确保sds中的可用空间大于或等于addlen,如果当前字符串可用空间不满足则重新配置空间 */
sds sdsMakeRoomFor(sds s, size_t addlen) 
    struct sdshdr *sh, *newsh;
    size_t free = sdsavail(s);
    size_t len, newlen;

    // 当前空间满足要求,直接返回
    if (free >= addlen) return s;
    len = sdslen(s);
    sh = (void*) (s-(sizeof(struct sdshdr)));
    // 重新分配空间时并不是分配刚刚好满足需求的空间,而是以其2倍的数量进行分配。这点类似于STL中的vector
    newlen = (len+addlen);
    if (newlen < SDS_MAX_PREALLOC)
        newlen *= 2;
    else
        newlen += SDS_MAX_PREALLOC;
    // 调用zrealloc直接在原地进行扩展
    newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1);
    if (newsh == NULL) return NULL;

    // 更新可用空间
    newsh->free = newlen - len;
    return newsh->buf;


/* Reallocate the sds string so that it has no free space at the end. The
 * contained string remains not altered, but next concatenation operations
 * will require a reallocation.
 *
 * After the call, the passed sds string is no longer valid and all the
 * references must be substituted with the new pointer returned by the call. */
 /* 释放字符数组buf中的多余空间,使其刚好能存放当前字符数 */
sds sdsRemoveFreeSpace(sds s) 
    struct sdshdr *sh;

    sh = (void*) (s-(sizeof(struct sdshdr)));
    // 重新分配空间,使其刚好能存放当前的字符数量(sizeof(struct sdshdr)+sh->len+1)
    sh = zrealloc(sh, sizeof(struct sdshdr)+sh->len+1);
    // 重新分配后当前可用空间为0
    sh->free = 0;
    return sh->buf;


/* Return the total size of the allocation of the specifed sds string,
 * including:
 * 1) The sds header before the pointer.
 * 2) The string.
 * 3) The free buffer at the end if any.
 * 4) The implicit null term.
 */
/* 获取sds实际分配的空间大小(包括最后的'\\0'结束符) */
size_t sdsAllocSize(sds s) 
    struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));

    return sizeof(*sh)+sh->len+sh->free+1;


/* Increment the sds length and decrements the left free space at the
 * end of the string according to 'incr'. Also set the null term
 * in the new end of the string.
 *
 * This function is used in order to fix the string length after the
 * user calls sdsMakeRoomFor(), writes something after the end of
 * the current string, and finally needs to set the new length.
 *
 * Note: it is possible to use a negative increment in order to
 * right-trim the string.
 *
 * Usage example:
 *
 * Using sdsIncrLen() and sdsMakeRoomFor() it is possible to mount the
 * following schema, to cat bytes coming from the kernel to the end of an
 * sds string without copying into an intermediate buffer:
 *
 * oldlen = sdslen(s);
 * s = sdsMakeRoomFor(s, BUFFER_SIZE);
 * nread = read(fd, s+oldlen, BUFFER_SIZE);
 * ... check for nread <= 0 and handle it ...
 * sdsIncrLen(s, nread);
 */
 /* 根据给定参数incr调整当前长度和可用空间大小 
    典型用法:
        oldlen = sdslen(s);
        s = sdsMakeRoomFor(s, BUFFER_SIZE);
        nread = read(fd, s+oldlen, BUFFER_SIZE);
        ... check for nread <= 0 and handle it ...
        sdsIncrLen(s, nread);
 */
void sdsIncrLen(sds s, int incr) 
    struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));

    // 判断参数incr是否合法,如果不合法说明数据已经发生错误
    if (incr >= 0)
        assert(sh->free >= (unsigned int)incr);
    else
        assert(sh->len >= (unsigned int)(-incr));
    // 当前长度增加incr
    sh->len += incr;
    // 可用空间减少incr
    sh->free -= incr;
    s[sh->len] = '\\0';


/* Grow the sds to have the specified length. Bytes that were not part of
 * the original length of the sds will be set to zero.
 *
 * if the specified length is smaller than the current length, no operation
 * is performed. */
 /* 扩展字符串到指定的长度 */
sds sdsgrowzero(sds s, size_t len) 
    struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
    size_t totlen, curlen = sh->len;

    // 如果指定长度小于sds的当前长度,则不执行任何操作
    if (len <= curlen) return s;
    // 调用sdsMakeRoomFor函数扩展字符串到指定的长度
    s = sdsMakeRoomFor(s,len-curlen);
    // 如果出错直接返回
    if (s == NULL) return NULL;

    /* Make sure added region doesn't contain garbage */
    // 将新增加的元素全部赋值为0,防止无效字符干扰
    sh = (void*)(s-(sizeof(struct sdshdr)));
    memset(s+curlen,0,(len-curlen+1)); /* also set trailing \\0 byte */
    // 更新当前长度和可用空间
    totlen = sh->len+sh->free;
    sh->len = len;
    sh->free = totlen-sh->len;
    return s;


/* Append the specified binary-safe string pointed by 't' of 'len' bytes to the
 * end of the specified sds string 's'.
 *
 * After the call, the passed sds string is no longer valid and all the
 * references must be substituted with the new pointer returned by the call. */
 /* 将长度为len的字符串t连接到sds尾部 */
sds sdscatlen(sds s, const void *t, size_t len) 
    struct sdshdr *sh;
    size_t curlen = sdslen(s);

    // 确保sds有足够的剩余空间放置字符串t
    s = sdsMakeRoomFor(s,len);
    if (s == NULL) return NULL;
    sh = (void*) (s-(sizeof(struct sdshdr)));
    // 将字符串t拷贝到sds尾部
    memcpy(s+curlen, t, len);
    // 更新当前长度和可用长度
    sh->len = curlen+len;
    sh->free = sh->free-len;
    // 设置'\\0'结束符
    s[curlen+len] = '\\0';
    return s;


/* Append the specified null termianted C string to the sds string 's'.
 *
 * After the call, the passed sds string is no longer valid and all the
 * references must be substituted with the new pointer returned by the call. */
/* 将字符串t连接到sds尾部,实际上调用sdscatlen(sds s, const void *t, size_t len)函数 */
sds sdscat(sds s, const char *t) 
    return sdscatlen(s, t, strlen(t));


/* Append the specified sds 't' to the existing sds 's'.
 *
 * After the call, the modified sds string is no longer valid and all the
 * references must be substituted with the new pointer returned by the call. */
/* 将一个给定sds t连接到sds s尾部,实际上调用sdscatlen(sds s, const void *t, size_t len)函数 */
sds sdscatsds(sds s, const sds t) 
    return sdscatlen(s, t, sdslen(t));


/* Destructively modify the sds string 's' to hold the specified binary
 * safe string pointed by 't' of length 'len' bytes. */
/* 将一个长度为len的字符串复制到sds中 */
sds sdscpylen(sds s, const char *t, size_t len) 
    struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
    size_t totlen = sh->free+sh->len; // 字符数组实际长度

    // 判断当前字符数组的长度能否容纳给定的字符串t,如果不能则需要配置额外空间
    if (totlen < len) 
        // 配置额外空间
        s = sdsMakeRoomFor(s,len-sh->len);
        if (s == NULL) return NULL;
        sh = (void*) (s-(sizeof(struct sdshdr)));
        totlen = sh->free+sh->len;
    
    // 字符串复制
    memcpy(s, t, len);
    // 更新当前长度和可用长度
    s[len] = '\\0';
    sh->len = len;
    sh->free = totlen-len;
    return s;


/* Like sdscpylen() but 't' must be a null-termined string so that the length
 * of the string is obtained with strlen(). */
/* 将给定字符串t复制到sds中,实际调用sdscpylen函数 */
sds sdscpy(sds s, const char *t) 
    return sdscpylen(s, t, strlen(t));


/* Helper for sdscatlonglong() doing the actual number -> string
 * conversion. 's' must point to a string with room for at least
 * SDS_LLSTR_SIZE bytes.
 *
 * The function returns the length of the null-terminated string
 * representation stored at 's'. */
#define SDS_LLSTR_SIZE 21
 /* 将一个long long类型的数组转化为字符串 */
int sdsll2str(char *s, long long value) 
    char *p, aux;
    unsigned long long v;
    size_t l;

    /* Generate the string representation, this method produces
     * an reversed string. */
    // 常规的操作,值得注意的是经过下面的操作,得到的字符串是逆置的(如123 => "321")。
    v = (value < 0) ? -value : value;
    p = s;
    do 
        *p++ = '0'+(v%10);  // 
        v /= 10;
     while(v);
    if (value < 0) *p++ = '-';

    /* Compute length and add null term. */
    l = p-s;
    *p = '\\0';

    /* Reverse the string. */
    // 将字符串逆置(双指针法)
    p--;
    while(s < p) 
        aux = *s;
        *s = *p;
        *p = aux;
        s++;
        p--;
    
    return l;


/* Identical sdsll2str(), but for unsigned long long type. */
/* sdsll2str函数的unsigned long long版,跟long long版大同小异 */
int sdsull2str(char *s, unsigned long long v) 
    char *p, aux;
    size_t l;

    /* Generate the string representation, this method produces
     * an reversed string. */
    // 对于unsigned long long永远为正数
    p = s;
    do 
        *p++ = '0'+(v%10);
        v /= 10;
     while(v);

    /* Compute length and add null term. */
    l = p-s;
    *p = '\\0';

    /* Reverse the string. */
    p--;
    while(s < p) 
        aux = *s;
        *s = *p;
        *p = aux;
        s++;
        p--;
    
    return l;


/* Create an sds string from a long long value. It is much faster than:
 *
 * sdscatprintf(sdsempty(),"%lld\\n", value);
 */
 /* 将一个long long类型的数字转换为字符串sds */
sds sdsfromlonglong(long long value) 
    // 先将字符串转换为字符数组,然后常见sds
    char buf[SDS_LLSTR_SIZE];
    int len = sdsll2str(buf,value);

    return sdsnewlen(buf,len);



/* 下面三个为格式化输出函数,类似C语言中的sprintf函数。*/

/* Like sdscatprintf() but gets va_list instead of being variadic. */
/* 将格式化输出的参数连接到字符串s后面 */
sds sdscatvprintf(sds s, const char *fmt, va_list ap) 
    /* 这个方法比较有意思,采用了类似启发式的方法 */

    va_list cpy;
    char staticbuf[1024], *buf = staticbuf, *t;
    size_t buflen = strlen(fmt)*2;

    /* We try to start using a static buffer for speed.
     * If not possible we revert to heap allocation. */
    // 如果fmt长度的两倍小于1024,则直接使用定义好的缓冲区staticbuf,否则在堆上分配一个新的缓冲区空间
    if (buflen > sizeof(staticbuf)) 
        buf = zmalloc(buflen);
        if (buf == NULL) return NULL;
     else 
        buflen = sizeof(staticbuf);
    

    /* Try with buffers two times bigger every time we fail to
     * fit the string in the current buffer size. */
    while(1) 
        // 给缓冲区的倒数第二位打一个结束符标记
        buf[buflen-2] = '\\0';
        va_copy(cpy,ap);
        // 格式化输出到缓冲区中
        vsnprintf(buf, buflen, fmt, cpy);
        va_end(cpy);
        // 如果先前打的标记被覆盖,说明缓冲区还是小了,将缓冲区扩大两倍继续尝试
        if (buf[buflen-2] != '\\0') 
            if (buf != staticbuf) zfree(buf);
            buflen *= 2;
            buf = zmalloc(buflen);
            if (buf == NULL) return NULL;
            continue;
        
        break;
    

    /* Finally concat the obtained string to the SDS string and return it. */
    // 最后将格式化输出的字符串连接到源字符串s尾部
    t = sdscat(s, buf);
    if (buf != staticbuf) zfree(buf);
    return t;


/* Append to the sds string 's' a string obtained using printf-alike format
 * specifier.
 *
 * After the call, the modified sds string is no longer valid and all the
 * references must be substituted with the new pointer returned by the call.
 *
 * Example:
 *
 * s = sdsnew("Sum is: ");
 * s = sdscatprintf(s,"%d+%d = %d",a,b,a+b).
 *
 * Often you need to create a string from scratch with the printf-alike
 * format. When this is the need, just use sdsempty() as the target string:
 *
 * s = sdscatprintf(sdsempty(), "... your format ...", args);
 */
 /* 类似sdscatvprintf函数,只是使用了可变参数 */
sds sdscatprintf(sds s, const char *fmt, ...) 
    va_list ap;
    char *t;
    va_start(ap, fmt);
    t = sdscatvprintf(s,fmt,ap);
    va_end(ap);
    return t;


/* This function is similar to sdscatprintf, but much faster as it does
 * not rely on sprintf() family functions implemented by the libc that
 * are often very slow. Moreover directly handling the sds string as
 * new data is concatenated provides a performance improvement.
 *
 * However this function only handles an incompatible subset of printf-alike
 * format specifiers:
 *
 * %s - C String
 * %S - SDS string
 * %i - signed int
 * %I - 64 bit signed integer (long long, int64_t)
 * %u - unsigned int
 * %U - 64 bit unsigned integer (unsigned long long, uint64_t)
 * %% - Verbatim "%" character.
 */
 /* 格式化输入 */
sds sdscatfmt(sds s, char const *fmt, ...) 
    struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
    size_t initlen = sdslen(s);
    const char *f = fmt;
    int i;
    // 存放可变参数
    va_list ap; 

    // 获取可变参数
    va_start(ap,fmt);   
    f = fmt;    /* Next format specifier byte to process. */
    // 源字符串中下一个可写文章
    i = initlen; /* Position of the next byte to write to dest str. */
    // 从左往右一次扫描格式化字符串
    while(*f) 
        char next, *str;
        unsigned int l;
        long long num;
        unsigned long long unum;

        /* Make sure there is always space for at least 1 char. */
        // 确保至少有一个位置的可用空间
        if (sh->free == 0) 
            s = sdsMakeRoomFor(s,1);
            sh = (void*) (s-(sizeof(struct sdshdr)));
        

        switch(*f) 
        case '%':
            next = *(f+1);
            f++;
            switch(next) 
            case 's':
            case 'S':
                str = va_arg(ap,char*);
                l = (next == 's') ? strlen(str) : sdslen(str);
                if (sh->free < l) 
                    s = sdsMakeRoomFor(s,l);
                    sh = (void*) (s-(sizeof(struct sdshdr)));
                
                memcpy(s+i,str,l);
                sh->len += l;
                sh->free -= l;
                i += l;
                break;
            case 'i':
            case 'I':
                if (next == 'i')
                    num = va_arg(ap,int);
                else
                    num = va_arg(ap,long long);
                
                    char buf[SDS_LLSTR_SIZE];
                    l = sdsll2str(buf,num);
                    if (sh->free < l) 
                        s = sdsMakeRoomFor(s,l);
                        sh = (void*) (s-(sizeof(struct sdshdr)));
                    
                    memcpy(s+i,buf,l);
                    sh->len += l;
                    sh->free -= l;
                    i += l;
                
                break;
            case 'u':
            case 'U':
                if (next == 'u')
                    unum = va_arg(ap,unsigned int);
                else
                    unum = va_arg(ap,unsigned long long);
                
                    char buf[SDS_LLSTR_SIZE];
                    l = sdsull2str(buf,unum);
                    if (sh->free < l) 
                        s = sdsMakeRoomFor(s,l);
                        sh = (void*) (s-(sizeof(struct sdshdr)));
                    
                    memcpy(s+i,buf,l);
                    sh->len += l;
                    sh->free -= l;
                    i += l;
                
                break;
            default: /* Handle %% and generally %<unknown>. */
                s[i++] = next;
                sh->len += 1;
                sh->free -= 1;
                break;
            
            break;
        default:
            s[i++] = *f;
            sh->len += 1;
            sh->free -= 1;
            break;
        
        f++;
    
    va_end(ap);

    /* Add null-term */
    s[i] = '\\0';
    return s;


/* Remove the part of the string from left and from right composed just of
 * contiguous characters found in 'cset', that is a null terminted C string.
 *
 * After the call, the modified sds string is no longer valid and all the
 * references must be substituted with the new pointer returned by the call.
 *
 * Example:
 *
 * s = sdsnew("AA...AA.a.aa.aHelloWorld     :::");
 * s = sdstrim(s,"Aa. :");
 * printf("%s\\n", s);
 *
 * Output will be just "Hello World".
 */
 /* 字符串的trim操作,即将字符串头部和尾部出现的特定字符删除,类似java.lang.String的trim方法 */
sds sdstrim(sds s, const char *cset) 
    struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
    char *start, *end, *sp, *ep;
    size_t len;

    sp = start = s;
    ep = end = s+sdslen(s)-1;
    // 从左往右找第一个不在cset中出现的字符
    while(sp <= end && strchr(cset, *sp)) sp++;
    // 从右往左找第一个不在cset中出现的字符
    while(ep > start && strchr(cset, *ep)) ep--;
    // 剩余字符的长度
    len = (sp > ep) ? 0 : ((ep-sp)+1);
    // 子串移动
    if (sh->buf != sp) memmove(sh->buf, sp, len);
    // 更新当前长度和可用空间
    sh->buf[len] = '\\0';
    sh->free = sh->free+(sh->len-len);
    sh->len = len;
    return s;


/* Turn the string into a smaller (or equal) string containing only the
 * substring specified by the 'start' and 'end' indexes.
 *
 * start and end can be negative, where -1 means the last character of the
 * string, -2 the penultimate character, and so forth.
 *
 * The interval is inclusive, so the start and end characters will be part
 * of the resulting string.
 *
 * The string is modified in-place.
 *
 * Example:
 *
 * s = sdsnew("Hello World");
 * sdsrange(s,1,-1); => "ello World"
 */
 /* 根据参数start和参数end指定的范围截取字符串 */
void sdsrange(sds s, int start, int end) 
    struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
    size_t newlen, len = sdslen(s);

    if (len == 0) return;
    // 支持负下标
    if (start < 0) 
        start = len+start;
        if (start < 0) start = 0;
    
    if (end < 0) 
        end = len+end;
        if (end < 0) end = 0;
    
    // 确定截取子串长度
    newlen = (start > end) ? 0 : (end-start)+1;
    // 验证start和end指定的范围是否合法,如果不合法则做相应修改
    if (newlen != 0) 
        if (start >= (signed)len) 
            newlen = 0;
         else if (end >= (signed)len) 
            end = len-1;
            newlen = (start > end) ? 0 : (end-start)+1;
        
     else 
        start = 0;
    
    // 移动子串
    if (start && newlen) memmove(sh->buf, sh->buf+start, newlen);
    // 重置字符串长度和可用空间
    sh->buf[newlen] = 0;
    sh->free = sh->free+(sh->len-newlen);
    sh->len = newlen;


/* Apply tolower() to every character of the sds string 's'. */
/* 统一转换为小写字符 */
void sdstolower(sds s) 
    int len = sdslen(s), j;

    for (j = 0; j < len; j++) s[j] = tolower(s[j]);


/* Apply toupper() to every character of the sds string 's'. */
/* 统一转换为大写字符 */
void sdstoupper(sds s) 
    int len = sdslen(s), j;

    for (j = 0; j < len; j++) s[j] = toupper(s[j]);


/* Compare two sds strings s1 and s2 with memcmp().
 *
 * Return value:
 *
 *     positive if s1 > s2.
 *     negative if s1 < s2.
 *     0 if s1 and s2 are exactly the same binary string.
 *
 * If two strings share exactly the same prefix, but one of the two has
 * additional characters, the longer string is considered to be greater than
 * the smaller one. */
 /* 字符串比较操作,底层通过memcmp实现 */
int sdscmp(const sds s1, const sds s2) 
    size_t l1, l2, minlen;
    int cmp;

    l1 = sdslen(s1);
    l2 = sdslen(s2);
    minlen = (l1 < l2) ? l1 : l2;
    cmp = memcmp(s1,s2,minlen);
    if (cmp == 0) return l1-l2;
    return cmp;


/* Split 's' with separator in 'sep'. An array
 * of sds strings is returned. *count will be set
 * by reference to the number of tokens returned.
 *
 * On out of memory, zero length string, zero length
 * separator, NULL is returned.
 *
 * Note that 'sep' is able to split a string using
 * a multi-character separator. For example
 * sdssplit("foo_-_bar","_-_"); will return two
 * elements "foo" and "bar".
 *
 * This version of the function is binary-safe but
 * requires length arguments. sdssplit() is just the
 * same function but for zero-terminated strings.
 */
 /* 字符串分割函数,根据参数sep分割源串,返回sds数组,其中:参数count存放分割后子串数量。
    类似java.lang.String的spilt方法
 */
sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count) 
    int elements = 0, slots = 5, start = 0, j;
    // 保存分割后的结果
    sds *tokens;    

    if (seplen < 1 || len < 0) return NULL;

    // 分配空间,分割的子串初始值只有5组,如果不够再重新分配
    tokens = zmalloc(sizeof(sds)*slots);
    // 空间分配失败直接返回
    if (tokens == NULL) return NULL;

    // 源串长度为0,无须分割
    if (len == 0) 
        *count = 0;
        return tokens;
    
    // 从左往右遍历源串。显然最后一个能匹配的分割符sep的位置为len-(seplen-1)
    for (j = 0; j < (len-(seplen-1)); j++) 
        /* make sure there is room for the next element and the final one */
        // 结果字符串数组的长度少于当前已存字符串长度+2的时,重新分配
        if (slots < elements+2) 
            sds *newtokens;

            // 以原长度的两倍重新分配
            slots *= 2;
            newtokens = zrealloc(tokens,sizeof(sds)*slots);
            // 分配失败,进行相关的内存回收工作
            if (newtokens == NULL) goto cleanup;

以上是关于Redis源码剖析 - Redis内置数据结构之字符串sds的主要内容,如果未能解决你的问题,请参考以下文章

Redis源码剖析 - Redis内置数据结构之压缩字典zipmap

Redis源码剖析 - Redis内置数据结构之双向链表list

Redis源码剖析 - Reids内置数据结构之整数集合intset

Redis源码剖析 - Reids内置数据结构之整数集合intset

Redis源码剖析 - Redis内置数据结构之字符串sds

Redis源码剖析 - Redis数据类型之redisObject