从 K&R 书中解释 malloc 的这种实现

Posted 2023-02-21

技术标签:

【中文标题】从 K&R 书中解释 malloc 的这种实现【英文标题】：Explain this implementation of malloc from the K&R book 【发布时间】：2012-10-21 00:09:44 【问题描述】：

这是Kernighan 和 Ritchie 所著的 C 书籍的节选。它展示了如何实现malloc 的一个版本。虽然评论很好，但我很难理解它。谁能解释一下？

typedef long Align; /* for alignment to long boundary */
union header  /* block header */
struct 
union header *ptr; /* next block if on free list */
unsigned size; /* size of this block */
 s;
Align x; /* force alignment of blocks */
;
typedef union header Header;

static Header base; /* empty list to get started */
static Header *freep = NULL; /* start of free list */
/* malloc: general-purpose storage allocator */
void *malloc(unsigned nbytes)

   Header *p, *prevp;
   Header *morecore(unsigned);
   unsigned nunits;
   nunits = (nbytes+sizeof(Header)-1)/sizeof(header) + 1;
   if ((prevp = freep) == NULL)  /* no free list yet */
      base.s.ptr = freeptr = prevptr = &base;
      base.s.size = 0;
   
   for (p = prevp->s.ptr; ; prevp = p, p = p->s.ptr) 
      if (p->s.size >= nunits)  /* big enough */
        if (p->s.size == nunits) /* exactly */
           prevp->s.ptr = p->s.ptr;
        else  /* allocate tail end */
           p->s.size -= nunits;
           p += p->s.size;
           p->s.size = nunits
             
        freep = prevp;
        return (void *)(p+1);
      
      if (p == freep) /* wrapped around free list */
         if ((p = morecore(nunits)) == NULL)
             return NULL; /* none left */
      


#define NALLOC 1024 /* minimum #units to request */
/* morecore: ask system for more memory */

static Header *morecore(unsigned nu)


  char *cp, *sbrk(int);
  Header *up;

  if (nu < NALLOC)
    nu = NALLOC;

  cp = sbrk(nu * sizeof(Header));

  if (cp == (char *) -1) /* no space at all */
    return NULL;

  up = (Header *) cp;
  up->s.size = nu;
  free((void *)(up+1));

  return freep;


/* free: put block ap in free list */
void free(void *ap) 
  Header *bp, *p;
  bp = (Header *)ap - 1; /* point to block header */
  for (p = freep; !(bp > p && bp < p->s.ptr); p = p->s.ptr)
    if (p >= p->s.ptr && (bp > p || bp < p->s.ptr))
      break; /* freed block at start or end of arena */
  if (bp + bp->size == p->s.ptr) 
    bp->s.size += p->s.ptr->s.size;
    bp->s.ptr = p->s.ptr->s.ptr;
   else
      bp->s.ptr = p->s.ptr;

  if (p + p->size == bp) 
    p->s.size += bp->s.size;
    p->s.ptr = bp->s.ptr;
   else
    p->s.ptr = bp;
  freep = p;

【问题讨论】：

我面前有我的 K&R 第 2 版——我想这是相当早的印刷品——它不包含公认答案所指的一些问题。请问你用的是哪个版本的，代码是不是手写的？也许提出具体的问题（例如，为什么以及如何准确地对齐块？）会产生更有帮助的答案？我在我的 K&R 第 2 版副本中看到了这一点。国际标准书号 0-13-110362-8。该代码在第 8.7 节中的 pg.185 中提供：示例-A 存储分配器，并分为几段解释。 【参考方案1】：

好的，我们这里有一大段写得很糟糕的代码。我将在这篇文章中所做的最好的描述是软件考古学。

第 1 步：修正格式。

缩进和紧凑的格式对任何人都没有任何好处。需要插入各种空格和空行。 cmets 可以以更易读的方式编写。我将从修复它开始。

同时我将牙套样式从 K&R 样式更改 - 请注意 K&R 牙套样式是可以接受的，这只是我的个人喜好。另一个个人偏好是将 * 写在指向的类型旁边的指针。我不会在这里争论（主观）风格问题。

另外，Header 的类型定义完全不可读，需要彻底修复。

我发现了一些完全模糊的东西：他们似乎在函数内部声明了一个函数原型。 Header* morecore(unsigned);。这是非常古老且非常糟糕的风格，我不确定 C 是否允许它不再使用。让我们删除该行，无论该函数做什么，它都必须在其他地方定义。

typedef long Align;                      /* for alignment to long boundary */

typedef union header                     /* block header */

  struct
  
    union header *ptr;                   /* next block if on free list */
    unsigned size;                       /* size of this block */
   s;

  Align x;                               /* force alignment of blocks */

 Header;


static Header base;                      /* empty list to get started */
static Header* freep = NULL;             /* start of free list */


/* malloc: general-purpose storage allocator */
void* malloc (unsigned nbytes)

  Header*   p;
  Header*   prevp;
  unsigned  nunits;

  nunits = (nbytes + sizeof(Header) - 1) / sizeof(header) + 1;

  if ((prevp = freep) == NULL)           /* no free list yet */
  
    base.s.ptr = freeptr = prevptr = &base;
    base.s.size = 0;
  

  for (p = prevp->s.ptr; ; prevp = p, p = p->s.ptr)
  
    if (p->s.size >= nunits)             /* big enough */
    
      if (p->s.size == nunits)           /* exactly */
        prevp->s.ptr = p->s.ptr;
      else                               /* allocate tail end */
      
        p->s.size -= nunits;
        p += p->s.size;
        p->s.size = nunits
      

      freep = prevp;
      return (void *)(p+1);
    

    if (p == freep)                      /* wrapped around free list */
      if ((p = morecore(nunits)) == NULL)
        return NULL;                     /* none left */

好的，现在我们实际上可以阅读代码了。

第 2 步：清除广为人知的不良做法。

这段代码充满了现在被认为是不好的做法。它们需要被删除，因为它们会危害代码的安全性、可读性和维护性。如果您想参考宣扬与我相同做法的权威，请查看广受认可的编码标准MISRA-C。

我发现并删除了以下不良做法：

1) 只是在代码中输入unsigned 可能会导致混淆：这是程序员的错字还是写unsigned int 的意图？我们应该用unsigned int 替换所有unsigned。但是当我们这样做时，我们发现它在这种情况下被用来给出各种二进制数据的大小。用于此类事务的正确类型是 C 标准类型size_t。这本质上也只是一个无符号整数，但保证对于特定平台来说“足够大”。 sizeof 运算符返回 size_t 类型的结果，如果我们查看 C 标准对真正 malloc 的定义，它是 void *malloc(size_t size);。所以size_t 是最正确的类型。

2) 为我们自己的 malloc 函数使用与位于 stdlib.h 中的函数相同的名称是一个坏主意。如果我们需要包含 stdlib.h，事情就会变得一团糟。根据经验，切勿在您自己的代码中使用 C 标准库函数的标识符名称。我把名字改成 kr_malloc。

3) 代码滥用了所有静态变量都保证初始化为零的事实。这是由 C 标准明确定义的，但这是一个相当微妙的规则。让我们显式初始化所有静态变量，以表明我们没有忘记初始化它们。

4) 条件内的赋值是危险的且难以阅读。如果可能，应该避免这种情况，因为它也可能导致错误，例如经典的 = vs == 错误。

5) 由于求值的顺序，同一行上的多个赋值很难阅读，而且还可能很危险。

6) 同一行上的多个声明很难阅读，而且很危险，因为在混合数据和指针声明时可能会导致错误。始终在自己的一行中声明每个变量。

7) 总是在每条语句后使用大括号。不这样做会导致bugs bugs。

8) 切勿将特定指针类型强制转换为 void*。在 C 语言中它是不必要的，并且可以隐藏编译器本来会检测到的错误。

9) 避免在函数中使用多个返回语句。有时它们会导致代码更清晰，但在大多数情况下它们会导致意大利面条。就代码而言，我们无法在不重写循环的情况下更改它，所以我稍后会修复它。

10) 保持 for 循环简单。它们应该包含一个 init 语句、一个循环条件和一个迭代，仅此而已。这个带有逗号运算符和所有内容的 for 循环非常晦涩难懂。同样，我们发现需要将这个循环重写为合理的东西。接下来我会这样做，但现在我们有：

typedef long Align;                      /* for alignment to long boundary */

typedef union header                     /* block header */

  struct
  
    union header *ptr;                   /* next block if on free list */
    size_t size;                         /* size of this block */
   s;

  Align x;                               /* force alignment of blocks */

 Header;


static Header base = 0;                /* empty list to get started */
static Header* freep = NULL;             /* start of free list */


/* malloc: general-purpose storage allocator */
void* kr_malloc (size_t nbytes)

  Header*  p;
  Header*  prevp;
  size_t   nunits;

  nunits = (nbytes + sizeof(Header) - 1) / sizeof(header) + 1;

  prevp = freep;
  if (prevp == NULL)                     /* no free list yet */
  
    base.s.ptr  = &base;
    freeptr     = &base;
    prevptr     = &base;
    base.s.size = 0;
  

  for (p = prevp->s.ptr; ; prevp = p, p = p->s.ptr)
  
    if (p->s.size >= nunits)             /* big enough */
    
      if (p->s.size == nunits)           /* exactly */
      
        prevp->s.ptr = p->s.ptr;
      
      else                               /* allocate tail end */
      
        p->s.size -= nunits;
        p += p->s.size;
        p->s.size = nunits
      

      freep = prevp;
      return p+1;
    

    if (p == freep)                      /* wrapped around free list */
    
      p = morecore(nunits);
      if (p == NULL)
      
        return NULL;                     /* none left */
      
    
   /* for */

第 3 步：重写晦涩的循环。

出于前面提到的原因。我们可以看到这个循环永远持续下去，它通过从函数返回终止，无论是在分配完成时，还是在没有剩余内存时。因此，让我们将其创建为循环条件，并将 return 提升到应该在的函数末尾。让我们摆脱那个丑陋的逗号运算符。

我将介绍两个新变量：一个结果变量用于保存结果指针，另一个用于跟踪循环是否应该继续。我将使用 bool 类型让 K&R 大吃一惊，这是自 1999 年以来 C 语言的一部分。

（我希望我没有用这个改变改变算法，我相信我没有）

#include <stdbool.h>

typedef long Align;                      /* for alignment to long boundary */

typedef union header                     /* block header */

  struct
  
    union header *ptr;                   /* next block if on free list */
    size_t size;                         /* size of this block */
   s;

  Align x;                               /* force alignment of blocks */

 Header;


static Header base = 0;                /* empty list to get started */
static Header* freep = NULL;             /* start of free list */


/* malloc: general-purpose storage allocator */
void* kr_malloc (size_t nbytes)

  Header*  p;
  Header*  prevp;
  size_t   nunits;
  void*    result;
  bool     is_allocating;

  nunits = (nbytes + sizeof(Header) - 1) / sizeof(header) + 1;

  prevp = freep;
  if (prevp == NULL)                     /* no free list yet */
  
    base.s.ptr  = &base;
    freeptr     = &base;
    prevptr     = &base;
    base.s.size = 0;
  

  is_allocating = true;
  for (p = prevp->s.ptr; is_allocating; p = p->s.ptr)
  
    if (p->s.size >= nunits)             /* big enough */
    
      if (p->s.size == nunits)           /* exactly */
      
        prevp->s.ptr = p->s.ptr;
      
      else                               /* allocate tail end */
      
        p->s.size -= nunits;
        p += p->s.size;
        p->s.size = nunits
      

      freep = prevp;
      result = p+1;
      is_allocating = false;             /* we are done */
    

    if (p == freep)                      /* wrapped around free list */
    
      p = morecore(nunits);
      if (p == NULL)
      
        result = NULL;                   /* none left */
        is_allocating = false;
      
    
    prevp = p;
   /* for */

  return result;

第 4 步：编译这个废话。

由于这是来自 K&R，因此充满了拼写错误。 sizeof(header) 应该是 sizeof(Header)。缺少分号。它们使用不同的名称 freep、prevp 与 freeptr、prevptr，但显然意味着相同的变量。我相信后者实际上是更好的名字，所以让我们使用它们。

#include <stdbool.h>

typedef long Align;                      /* for alignment to long boundary */

typedef union header                     /* block header */

  struct
  
    union header *ptr;                   /* next block if on free list */
    size_t size;                         /* size of this block */
   s;

  Align x;                               /* force alignment of blocks */

 Header;


static Header base = 0;                /* empty list to get started */
static Header* freeptr = NULL;           /* start of free list */


/* malloc: general-purpose storage allocator */
void* kr_malloc (size_t nbytes)

  Header*  p;
  Header*  prevptr;
  size_t   nunits;
  void*    result;
  bool     is_allocating;

  nunits = (nbytes + sizeof(Header) - 1) / sizeof(Header) + 1;

  prevptr = freeptr;
  if (prevptr == NULL)                   /* no free list yet */
  
    base.s.ptr  = &base;
    freeptr     = &base;
    prevptr     = &base;
    base.s.size = 0;
  

  is_allocating = true;
  for (p = prevptr->s.ptr; is_allocating; p = p->s.ptr)
  
    if (p->s.size >= nunits)             /* big enough */
    
      if (p->s.size == nunits)           /* exactly */
      
        prevptr->s.ptr = p->s.ptr;
      
      else                               /* allocate tail end */
      
        p->s.size -= nunits;
        p += p->s.size;
        p->s.size = nunits;
      

      freeptr = prevptr;
      result = p+1;
      is_allocating = false;             /* we are done */
    

    if (p == freeptr)                    /* wrapped around free list */
    
      p = morecore(nunits);
      if (p == NULL)
      
        result = NULL;                   /* none left */
        is_allocating = false;
      
    
    prevptr = p;
   /* for */

  return result;

现在我们有了一些可读、可维护的代码，没有许多危险的做法，甚至可以编译！所以现在我们可以真正开始思考代码实际上在做什么了。

你可能已经猜到，结构“Header”是链表中节点的声明。每个这样的节点都包含一个指向下一个节点的指针。不是很懂morecore函数，也不是“wrap-around”，我没用过这个函数，也没有sbrk。但我假设它分配了这个结构中指定的标头，以及该标头后面的一些原始数据块。如果是这样，这就解释了为什么没有实际的数据指针：假设数据跟在标头之后，在内存中相邻。所以对于每个节点，我们得到了header，我们在header之后得到了一大块原始数据。

迭代本身非常简单，它们通过一个单链表，一次一个节点。

在循环结束时，他们将指针设置为指向“块”末尾之后的位置，然后将其存储在静态变量中，这样程序就会记住它之前分配内存的位置，下次函数被调用。

他们使用了一个技巧来使他们的头文件最终位于一个对齐的内存地址上：他们将所有开销信息与一个足够大的变量一起存储在一个联合中，以对应平台的对齐要求。因此，如果“ptr”的大小加上“size”的大小太小而无法给出精确的对齐，则联合保证至少分配 sizeof(Align) 个字节。我相信这整个技巧在今天已经过时了，因为 C 标准要求自动填充结构/联合。

【讨论】：

您提到的大多数不良做法都不是，它们是语言功能。我有点同意＃1； #2 无关紧要，其余的都是风格问题。在我 25 多年的编码生涯中，这是我第一次听到 K&R 被称为“令人难以置信的炒作”和有缺陷。 @Rob 你还在使用 25 岁以上的编译器吗？ 25 岁以上的操作系统？在 25 岁以上的计算机上？如果你只是环顾四周，就会有很多针对这本书的完全有效的批评。如果你仅仅因为我告诉你太阳是太阳系的中心而不是地球而对我投了反对票，那么至少提供一些你认为我错了的理由。我很想听听您对原始代码为何如此出色的逻辑推理。它甚至会强制你对这本书发表自己的看法，而不是随波逐流。 @Cupidvogel：在完全主观的情况下将信息作为事实传播对我来说是一个足够好的理由。我们从未解释过代码的实际工作原理【参考方案2】：

我正在研究 K&R，就像我想象 OP 在他问这个问题时一样，我来到这里是因为我也发现这些实现令人困惑。虽然接受的答案非常详细且很有帮助，但我尝试采用不同的方法来理解最初编写的代码 - 我已经浏览了代码并将 cmets 添加到了对我来说很难的代码部分.这包括该部分中其他例程的代码（它们是函数free 和memcore - 我已将它们重命名为kandr_malloc 和kandr_free 以避免与stdlib 冲突）。我想我会把它留在这里作为已接受答案的补充，以供其他可能觉得有帮助的学生使用。

我承认此代码中的 cmets 过多。请注意，我只是将其作为一个学习练习，并不是建议这是实际编写代码的好方法。

我冒昧地将一些变量名称更改为对我来说更直观的名称；除此之外，代码基本上保持不变。尽管 valgrind 对某些应用程序有抱怨，但对于我使用的测试程序，它似乎编译和运行良好。

另外：cmets 中的一些文本直接来自 K&R 或手册页 - 我不打算对这些部分进行任何功劳。

#include <unistd.h>  // sbrk

#define NALLOC 1024  // Number of block sizes to allocate on call to sbrk
#ifdef NULL
#undef NULL
#endif
#define NULL 0


// long is chosen as an instance of the most restrictive alignment type
typedef long Align;

/* Construct Header data structure.  To ensure that the storage returned by
 * kandr_malloc is aligned properly for the objects that are stored in it, all
 * blocks are multiples of the header size, and the header itself is aligned
 * properly.  This is achieved through the use of a union; this data type is big
 * enough to hold the "widest" member, and the alignment is appropriate for all
 * of the types in the union.  Thus by including a member of type Align, which
 * is an instance of the most restrictive type, we guarantee that the size of
 * Header is aligned to the worst-case boundary.  The Align field is never used;
 * it just forces each header to the desired alignment.
 */
union header 
  struct 
    union header *next;
    unsigned size;
   s;

  Align x;
;
typedef union header Header;


static Header base;           // Used to get an initial member for free list
static Header *freep = NULL;  // Free list starting point


static Header *morecore(unsigned nblocks);
void kandr_free(void *ptr);




void *kandr_malloc(unsigned nbytes) 

  Header *currp;
  Header *prevp;
  unsigned nunits;

  /* Calculate the number of memory units needed to provide at least nbytes of
   * memory.
   *
   * Suppose that we need n >= 0 bytes and that the memory unit sizes are b > 0
   * bytes.  Then n / b (using integer division) yields one less than the number
   * of units needed to provide n bytes of memory, except in the case that n is
   * a multiple of b; then it provides exactly the number of units needed.  It
   * can be verified that (n - 1) / b provides one less than the number of units
   * needed to provide n bytes of memory for all values of n > 0.  Thus ((n - 1)
   * / b) + 1 provides exactly the number of units needed for n > 0.
   *
   * The extra sizeof(Header) in the numerator is to include the unit of memory
   * needed for the header itself.
   */
  nunits = ((nbytes + sizeof(Header) - 1) / sizeof(Header)) + 1;

  // case: no free list yet exists; we have to initialize.
  if (freep == NULL) 

    // Create degenerate free list; base points to itself and has size 0
    base.s.next = &base;
    base.s.size = 0;

    // Set free list starting point to base address
    freep = &base;
  

  /* Initialize pointers to two consecutive blocks in the free list, which we
   * call prevp (the previous block) and currp (the current block)
   */
  prevp = freep;
  currp = prevp->s.next;

  /* Step through the free list looking for a block of memory large enough to
   * fit nunits units of memory into.  If the whole list is traversed without
   * finding such a block, then morecore is called to request more memory from
   * the OS.
   */
  for (; ; prevp = currp, currp = currp->s.next) 

    /* case: found a block of memory in free list large enough to fit nunits
     * units of memory into.  Partition block if necessary, remove it from the
     * free list, and return the address of the block (after moving past the
     * header).
     */
    if (currp->s.size >= nunits) 

      /* case: block is exactly the right size; remove the block from the free
       * list by pointing the previous block to the next block.
       */
      if (currp->s.size == nunits) 
    /* Note that this line wouldn't work as intended if we were down to only
     * 1 block.  However, we would never make it here in that scenario
     * because the block at &base has size 0 and thus the conditional will
     * fail (note that nunits is always >= 1).  It is true that if the block
     * at &base had combined with another block, then previous statement
     * wouldn't apply - but presumably since base is a global variable and
     * future blocks are allocated on the heap, we can be sure that they
     * won't border each other.
     */
    prevp->s.next = currp->s.next;
      
      /* case: block is larger than the amount of memory asked for; allocate
       * tail end of the block to the user.
       */
      else 
    // Changes the memory stored at currp to reflect the reduced block size
    currp->s.size -= nunits;
    // Find location at which to create the block header for the new block
    currp += currp->s.size;
    // Store the block size in the new header
    currp->s.size = nunits;
      

      /* Set global starting position to the previous pointer.  Next call to
       * malloc will start either at the remaining part of the partitioned block
       * if a partition occurred, or at the block after the selected block if
       * not.
       */
      freep = prevp;

      /* Return the location of the start of the memory, i.e. after adding one
       * so as to move past the header
       */
      return (void *) (currp + 1);

     // end found a block of memory in free list case

    /* case: we've wrapped around the free list without finding a block large
     * enough to fit nunits units of memory into.  Call morecore to request that
     * at least nunits units of memory are allocated.
     */
    if (currp == freep) 
      /* morecore returns freep; the reason that we have to assign currp to it
       * again (since we just tested that they are equal), is that there is a
       * call to free inside of morecore that can potentially change the value
       * of freep.  Thus we reassign it so that we can be assured that the newly
       * added block is found before (currp == freep) again.
       */
      if ((currp = morecore(nunits)) == NULL) 
    return NULL;
      
     // end wrapped around free list case
   // end step through free list looking for memory loop





static Header *morecore(unsigned nunits) 

  void *freemem;    // The address of the newly created memory
  Header *insertp;  // Header ptr for integer arithmatic and constructing header

  /* Obtaining memory from OS is a comparatively expensive operation, so obtain
   * at least NALLOC blocks of memory and partition as needed
   */
  if (nunits < NALLOC) 
    nunits = NALLOC;
  

  /* Request that the OS increment the program's data space.  sbrk changes the
   * location of the program break, which defines the end of the process's data
   * segment (i.e., the program break is the first location after the end of the
   * uninitialized data segment).  Increasing the program break has the effect
   * of allocating memory to the process.  On success, brk returns the previous
   * break - so if the break was increased, then this value is a pointer to the
   * start of the newly allocated memory.
   */
  freemem = sbrk(nunits * sizeof(Header));
  // case: unable to allocate more memory; sbrk returns (void *) -1 on error
  if (freemem == (void *) -1) 
    return NULL;
  

  // Construct new block
  insertp = (Header *) freemem;
  insertp->s.size = nunits;

  /* Insert block into the free list so that it is available for malloc.  Note
   * that we add 1 to the address, effectively moving to the first position
   * after the header data, since of course we want the block header to be
   * transparent for the user's interactions with malloc and free.
   */
  kandr_free((void *) (insertp + 1));

  /* Returns the start of the free list; recall that freep has been set to the
   * block immediately preceeding the newly allocated memory (by free).  Thus by
   * returning this value the calling function can immediately find the new
   * memory by following the pointer to the next block.
   */
  return freep;





void kandr_free(void *ptr) 

  Header *insertp, *currp;

  // Find address of block header for the data to be inserted
  insertp = ((Header *) ptr) - 1;

  /* Step through the free list looking for the position in the list to place
   * the insertion block.  In the typical circumstances this would be the block
   * immediately to the left of the insertion block; this is checked for by
   * finding a block that is to the left of the insertion block and such that
   * the following block in the list is to the right of the insertion block.
   * However this check doesn't check for one such case, and misses another.  We
   * still have to check for the cases where either the insertion block is
   * either to the left of every other block owned by malloc (the case that is
   * missed), or to the right of every block owned by malloc (the case not
   * checked for).  These last two cases are what is checked for by the
   * condition inside of the body of the loop.
   */
  for (currp = freep; !((currp < insertp) && (insertp < currp->s.next)); currp = currp->s.next) 

    /* currp >= currp->s.ptr implies that the current block is the rightmost
     * block in the free list.  Then if the insertion block is to the right of
     * that block, then it is the new rightmost block; conversely if it is to
     * the left of the block that currp points to (which is the current leftmost
     * block), then the insertion block is the new leftmost block.  Note that
     * this conditional handles the case where we only have 1 block in the free
     * list (this case is the reason that we need >= in the first test rather
     * than just >).
     */
    if ((currp >= currp->s.next) && ((currp < insertp) || (insertp < currp->s.next))) 
      break;
    
  

  /* Having found the correct location in the free list to place the insertion
   * block, now we have to (i) link it to the next block, and (ii) link the
   * previous block to it.  These are the tasks of the next two if/else pairs.
   */

  /* case: the end of the insertion block is adjacent to the beginning of
   * another block of data owned by malloc.  Absorb the block on the right into
   * the block on the left (i.e. the previously existing block is absorbed into
   * the insertion block).
   */
  if ((insertp + insertp->s.size) == currp->s.next) 
    insertp->s.size += currp->s.next->s.size;
    insertp->s.next = currp->s.next->s.next;
  
  /* case: the insertion block is not left-adjacent to the beginning of another
   * block of data owned by malloc.  Set the insertion block member to point to
   * the next block in the list.
   */
  else 
    insertp->s.next = currp->s.next;
  

  /* case: the end of another block of data owned by malloc is adjacent to the
   * beginning of the insertion block.  Absorb the block on the right into the
   * block on the left (i.e. the insertion block is absorbed into the preceeding
   * block).
   */
  if ((currp + currp->s.size) == insertp) 
    currp->s.size += insertp->s.size;
    currp->s.next = insertp->s.next;
  
  /* case: the insertion block is not right-adjacent to the end of another block
   * of data owned by malloc.  Set the previous block in the list to point to
   * the insertion block.
   */
  else 
    currp->s.next = insertp;
  

  /* Set the free pointer list to start the block previous to the insertion
   * block.  This makes sense because calls to malloc start their search for
   * memory at the next block after freep, and the insertion block has as good a
   * chance as any of containing a reasonable amount of memory since we've just
   * added some to it.  It also coincides with calls to morecore from
   * kandr_malloc because the next search in the iteration looks at exactly the
   * right memory block.
   */
  freep = currp;

【讨论】：

天哪，这是一个如此彻底和详细的答案！谢谢！我现在破产了，但有一天我会变得富有（有 SO 学分），然后我会给你一份当之无愧的赏金.. :) 话虽如此，尽管评论很好，但我仍然对实用程序有问题Align 这个词的含义以及它的作用，以及对齐的含义。你能再解释一下吗？我现在只是在自己学习这些概念，所以我只能说我认为正在发生的事情。计算机体系结构对字进行操作，即 32 位或 64 位数据段。此处的malloc 例程对特定内存单元大小的倍数进行操作，定义为sizeof(Header)。当我们分配数据时，我们需要它在字边界处开始和结束。所以我认为他们选择了具有全字长的数据类型，这保证了sizeof(Header) 是字长的倍数，因此malloc 分配的数据在字边界上开始和结束。 @AttitudeMonger 作者还解释了 Align 并将其称为机器的“最严格的类型”，SO Question 中也讨论了这个术语。感谢@dpritch 的精彩说明！我很难理解nunits 对currp->s.size 的分配。当这样的答案出现在这里时真是太好了:) 当分配一个太大的块的尾部时，如何创建一个新的头部？ currp->s.size 从末尾增加到偏移量nunits 后不应该是NULL 吗？【参考方案3】：

malloc()的基础

在 Linux 中，有两种典型的内存请求方式：sbrk 和 mmap。这些系统调用对频繁的小分配有严格的限制。 malloc() 是解决此问题的库函数。它使用 sbrk/mmap 请求大块内存并返回大块内的小内存块。这比直接调用 sbrk/mmap 更加高效灵活。

K&R malloc()

在 K&R 实现中，core（通常称为 arena）是一大块内存。 morecore() 通过sbrk() 从系统请求一个核心。当您多次调用 malloc()/free() 时，核心中的一些块被使用/分配，而其他块则空闲。 K&R malloc 将空闲块的地址存储在一个循环单链表中。在这个列表中，每个节点都是一块空闲内存。第一个sizeof(Header) 字节保存块的大小和指向下一个空闲块的指针。空闲块中的其余字节未初始化。与教科书中的典型列表不同，空闲列表中的节点只是指向核心中一些未使用区域的指针；除了核心之外，您实际上并没有分配每个节点。这个列表是理解算法的关键。

下图显示了具有两个内核/竞技场的示例内存布局。在图中，每个字符占用sizeof(Header) 字节。 @ 是 Header，+ 标记分配的内存，- 标记内核内的空闲内存。在示例中，有 3 个已分配块和 3 个空闲块。三个空闲块存储在循环列表中。对于三个分配的块，只有它们的大小存储在Header中。

            This is core 1                             This is core 2

@---------@+++++++++@++++++++++++        @----------@+++++++++++++++++@------------
|                                        |                            |
p->ptr->ptr                              p = p->ptr->ptr->ptr         p->ptr

在您的代码中，freep 是空闲列表的入口点。如果你反复关注freep->ptr，你会回到freep——它是循环的。一旦你了解了循环单链表，剩下的就相对容易了。 malloc() 找到一个空闲块并可能将其拆分。 free() 将一个空闲块添加回列表，并可能将其合并到相邻的空闲块。他们都试图保持列表的结构。

关于实现的其他 cmets

malloc() 中提到的代码 cmets “环绕”。当您遍历整个空闲列表但找不到大于请求长度的空闲块时，就会发生该行。在这种情况下，您必须使用morecore() 添加一个新内核。

base 是一个大小为零的块，始终包含在空闲列表中。避免特殊套管是一个技巧。这不是绝对必要的。

free() 可能看起来有点复杂，因为它必须考虑四种不同的情况才能将新释放的块合并到列表中的其他空闲块。除非您想自己重新实现，否则这个细节并不重要。

This blog post 更详细地解释了 K&R malloc。

PS： 在我看来，K&R malloc 是最优雅的代码之一。当我第一次理解代码时，真是大开眼界。令我难过的是，一些现代程序员甚至不了解此实现的基本原理，仅根据其编码风格就将其称为废话。

【讨论】：

【参考方案4】：

我还发现这个练习很棒而且很有趣。

在我看来，可视化结构可能有助于理解逻辑 - 或者至少这对我有用。下面是我的代码，它尽可能多地打印出 K&R malloc 的流程。

我在 K&R malloc 中所做的最重要的更改是更改了“free”以确保不再使用某些旧指针。除此之外，我添加了 cmets 并修复了一些小错别字。

用 NALLOC、MAXMEM 和 'main' 中的测试变量进行实验也会有所帮助。

在我的计算机（Ubuntu 16.04.3）上编译时没有错误：

gcc -g -std=c99 -Wall -Wextra -pedantic-errors krmalloc.c

krmalloc.c：

#include <stdio.h>
#include <unistd.h>

typedef long Align;             /* for alignment to long boundary */
union header                   /* block header */
    struct 
        union header *ptr;      /* next block if on free list */
        size_t size;            /* size of this block */
                                /*      including the Header itself */
                                /*      measured in count of Header chunks */
                                /*      not less than NALLOC Header's */
     s;
    Align x;                    /* force alignment of blocks */
;
typedef union header Header;

static Header *morecore(size_t);
void *mmalloc(size_t);
void _mfree(void **);
void visualize(const char*);
size_t getfreem(void);
size_t totmem = 0;              /* total memory in chunks */

static Header base;             /* empty list to get started */
static Header *freep = NULL;    /* start of free list */

#define NALLOC 1                /* minimum chunks to request */
#define MAXMEM 2048             /* max memory available (in bytes) */

#define mfree(p) _mfree((void **)&p)

void *sbrk(__intptr_t incr);


int main(void)

    char *pc, *pcc, *pccc, *ps;
    long *pd, *pdd;
    int dlen = 100;
    int ddlen = 50;

    visualize("start");


    /* trying to fragment as much as possible to get a more interesting view */

    /* claim a char */
    if ((pc = (char *) mmalloc(sizeof(char))) == NULL)
        return -1;

    /* claim a string */
    if ((ps = (char *) mmalloc(dlen * sizeof(char))) == NULL)
        return -1;

    /* claim some long's */
    if ((pd = (long *) mmalloc(ddlen * sizeof(long))) == NULL)
        return -1;

    /* claim some more long's */
    if ((pdd = (long *) mmalloc(ddlen * 2 * sizeof(long))) == NULL)
        return -1;

    /* claim one more char */
    if ((pcc = (char *) mmalloc(sizeof(char))) == NULL)
        return -1;

    /* claim the last char */
    if ((pccc = (char *) mmalloc(sizeof(char))) == NULL)
        return -1;


    /* free and visualize */
    printf("\n");
    mfree(pccc);
    /*      bugged on purpose to test free(NULL) */
    mfree(pccc);
    visualize("free(the last char)");

    mfree(pdd);
    visualize("free(lot of long's)");

    mfree(ps);
    visualize("free(string)");

    mfree(pd);
    visualize("free(less long's)");

    mfree(pc);
    visualize("free(first char)");

    mfree(pcc);
    visualize("free(second char)");


    /* check memory condition */
    size_t freemem = getfreem();
    printf("\n");
    printf("--- Memory claimed  : %ld chunks (%ld bytes)\n",
                totmem, totmem * sizeof(Header));
    printf("    Free memory now : %ld chunks (%ld bytes)\n",
                freemem, freemem * sizeof(Header));
    if (freemem == totmem)
        printf("    No memory leaks detected.\n");
    else
        printf("    (!) Leaking memory: %ld chunks (%ld bytes).\n",
                    (totmem - freemem), (totmem - freemem) * sizeof(Header));

    printf("// Done.\n\n");
    return 0;



/* visualize: print the free list (educational purpose) */
void visualize(const char* msg)

    Header *tmp;

    printf("--- Free list after \"%s\":\n", msg);

    if (freep == NULL)                    /* does not exist */
        printf("\tList does not exist\n\n");
        return;
    

    if  (freep == freep->s.ptr)           /* self-pointing list = empty */
        printf("\tList is empty\n\n");
        return;
    

    printf("  ptr: %10p size: %-3lu -->  ", (void *) freep, freep->s.size);

    tmp = freep;                           /* find the start of the list */
    while (tmp->s.ptr > freep)            /* traverse the list */
        tmp = tmp->s.ptr;
        printf("ptr: %10p size: %-3lu -->  ", (void *) tmp, tmp->s.size);
    
    printf("end\n\n");



/* calculate the total amount of available free memory */
size_t getfreem(void)

    if (freep == NULL)
        return 0;

    Header *tmp;
    tmp = freep;
    size_t res = tmp->s.size;

    while (tmp->s.ptr > tmp) 
        tmp = tmp->s.ptr;
        res += tmp->s.size;
    

    return res;



/* mmalloc: general-purpose storage allocator */
void *mmalloc(size_t nbytes)

    Header *p, *prevp;
    size_t nunits;

    /* smallest count of Header-sized memory chunks */
    /*  (+1 additional chunk for the Header itself) needed to hold nbytes */
    nunits = (nbytes + sizeof(Header) - 1) / sizeof(Header) + 1;

    /* too much memory requested? */
    if (((nunits + totmem + getfreem())*sizeof(Header)) > MAXMEM) 
        printf("Memory limit overflow!\n");
        return NULL;
    

    if ((prevp = freep) == NULL)           /* no free list yet */
        /* set the list to point to itself */
        base.s.ptr = freep = prevp = &base;
        base.s.size = 0;
    

    /* traverse the circular list */
    for (p = prevp->s.ptr; ; prevp = p, p = p->s.ptr) 

        if (p->s.size >= nunits)           /* big enough */
            if (p->s.size == nunits)        /* exactly */
                prevp->s.ptr = p->s.ptr;
            else                           /* allocate tail end */
                /* adjust the size */
                p->s.size -= nunits;
                /* find the address to return */
                p += p->s.size;
                p->s.size = nunits;
            
            freep = prevp;
            return (void *)(p+1);
        

        /* back where we started and nothing found - we need to allocate */
        if (p == freep)                     /* wrapped around free list */
            if ((p = morecore(nunits)) == NULL)
                return NULL;                /* none left */
    



/* morecore: ask system for more memory */
/*      nu: count of Header-chunks needed */
static Header *morecore(size_t nu)

    char *cp;
    Header *up;

    /* get at least NALLOC Header-chunks from the OS */
    if (nu < NALLOC)
        nu = NALLOC;

    cp = (char *) sbrk(nu * sizeof(Header));

    if (cp == (char *) -1)                  /* no space at all */
        return NULL;

    printf("... (sbrk) claimed %ld chunks.\n", nu);
    totmem += nu;                           /* keep track of allocated memory */

    up = (Header *) cp;
    up->s.size = nu;

    /* add the free space to the circular list */
    void *n = (void *)(up+1);
    mfree(n);

    return freep;



/* mfree: put block ap in free list */
void _mfree(void **ap)

    if (*ap == NULL)
        return;

    Header *bp, *p;
    bp = (Header *)*ap - 1;                 /* point to block header */

    if (bp->s.size == 0 || bp->s.size > totmem) 
        printf("_mfree: impossible value for size\n");
        return;
    

    /* the free space is only marked as free, but 'ap' still points to it */
    /* to avoid reusing this address and corrupt our structure set it to '\0' */
    *ap = NULL;

    /* look where to insert the free space */

    /* (bp > p && bp < p->s.ptr)    => between two nodes */
    /* (p > p->s.ptr)               => this is the end of the list */
    /* (p == p->p.str)              => list is one element only */
    for (p = freep; !(bp > p && bp < p->s.ptr); p = p->s.ptr)
        if (p >= p->s.ptr && (bp > p || bp < p->s.ptr))
            /* freed block at start or end of arena */
            break;

    if (bp + bp->s.size == p->s.ptr)       /* join to upper nbr */
    /* the new block fits perfect up to the upper neighbor */

        /* merging up: adjust the size */
        bp->s.size += p->s.ptr->s.size;
        /* merging up: point to the second next */
        bp->s.ptr = p->s.ptr->s.ptr;

     else
        /* set the upper pointer */
        bp->s.ptr = p->s.ptr;

    if (p + p->s.size == bp)               /* join to lower nbr */
    /* the new block fits perfect on top of the lower neighbor */

        /* merging below: adjust the size */
        p->s.size += bp->s.size;
        /* merging below: point to the next */
        p->s.ptr = bp->s.ptr;

     else
        /* set the lower pointer */
        p->s.ptr = bp;

    /* reset the start of the free list */
    freep = p;

【讨论】：

以上是关于从 K&R 书中解释 malloc 的这种实现的主要内容，如果未能解决你的问题，请参考以下文章