Wyjaśnij tę implementację malloc z książki K & R

Question

Wyjaśnij tę implementację malloc z książki K & R

To fragment książki O C autorstwaKernighan i Ritchie . Pokazuje jak zaimplementować wersję malloc. Chociaż dobrze skomentowane, mam duże trudności w zrozumieniu tego. Czy ktoś może to wyjaśnić?

typedef long Align; /* for alignment to long boundary */
union header { /* block header */
struct {
union header *ptr; /* next block if on free list */
unsigned size; /* size of this block */
} s;
Align x; /* force alignment of blocks */
};
typedef union header Header;

static Header base; /* empty list to get started */
static Header *freep = NULL; /* start of free list */
/* malloc: general-purpose storage allocator */
void *malloc(unsigned nbytes)
{
   Header *p, *prevp;
   Header *morecore(unsigned);
   unsigned nunits;
   nunits = (nbytes+sizeof(Header)-1)/sizeof(header) + 1;
   if ((prevp = freep) == NULL) { /* no free list yet */
      base.s.ptr = freeptr = prevptr = &base;
      base.s.size = 0;
   }
   for (p = prevp->s.ptr; ; prevp = p, p = p->s.ptr) {
      if (p->s.size >= nunits) { /* big enough */
        if (p->s.size == nunits) /* exactly */
           prevp->s.ptr = p->s.ptr;
        else { /* allocate tail end */
           p->s.size -= nunits;
           p += p->s.size;
           p->s.size = nunits
             }
        freep = prevp;
        return (void *)(p+1);
      }
      if (p == freep) /* wrapped around free list */
         if ((p = morecore(nunits)) == NULL)
             return NULL; /* none left */
      }
}

#define NALLOC 1024 /* minimum #units to request */
/* morecore: ask system for more memory */

static Header *morecore(unsigned nu)
{

  char *cp, *sbrk(int);
  Header *up;

  if (nu < NALLOC)
    nu = NALLOC;

  cp = sbrk(nu * sizeof(Header));

  if (cp == (char *) -1) /* no space at all */
    return NULL;

  up = (Header *) cp;
  up->s.size = nu;
  free((void *)(up+1));

  return freep;
}

/* free: put block ap in free list */
void free(void *ap) {
  Header *bp, *p;
  bp = (Header *)ap - 1; /* point to block header */
  for (p = freep; !(bp > p && bp < p->s.ptr); p = p->s.ptr)
    if (p >= p->s.ptr && (bp > p || bp < p->s.ptr))
      break; /* freed block at start or end of arena */
  if (bp + bp->size == p->s.ptr) {
    bp->s.size += p->s.ptr->s.size;
    bp->s.ptr = p->s.ptr->s.ptr;
  } else
      bp->s.ptr = p->s.ptr;

  if (p + p->size == bp) {
    p->s.size += bp->s.size;
    p->s.ptr = bp->s.ptr;
  } else
    p->s.ptr = bp;
  freep = p;
}

28

memory-management c pointers malloc

Author: M.M, 2012-10-31

Source

4 answers

Studiuję K & R, jak wyobrażam sobie OP, kiedy zadał to pytanie, i przyszedłem tutaj, ponieważ również uważam te implementacje za mylące. Chociaż przyjęta odpowiedź jest bardzo szczegółowa i pomocna, starałem się przyjąć inną taktykę, która polegała na zrozumieniu kodu tak, jak został on pierwotnie napisany - przejrzałem kod i dodałem komentarze do sekcji kodu, które były dla mnie trudne. Obejmuje to kod dla innych procedur w sekcji (które są funkcjami free i memcore - nazwałem je kandr_malloc i kandr_free, Aby uniknąć konfliktów z stdlib). Pomyślałem, że zostawię to tutaj jako uzupełnienie zaakceptowanej odpowiedzi dla innych uczniów, którzy mogą uznać ją za pomocną.

Przyjmuję do wiadomości, że komentarze w tym Kodeksie są przesadne. Proszę wiedzieć, że robię to tylko jako ćwiczenie uczenia się i nie proponuję, aby był to dobry sposób na napisanie kodu.

Pozwoliłem sobie zmienić niektóre nazwy zmiennych na te, które wydawały się bardziej intuicyjne dla mnie; poza tym kod jest zasadniczo nienaruszony. Wydaje się, że kompiluje i działa dobrze dla programów testowych, których używałem, chociaż valgrind miał skargi na niektóre aplikacje.

Również: część tekstu w komentarzach jest usuwana bezpośrednio z K & R lub stron podręcznika-nie zamierzam brać żadnego uznania za te sekcje.

#include <unistd.h>  // sbrk

#define NALLOC 1024  // Number of block sizes to allocate on call to sbrk
#ifdef NULL
#undef NULL
#endif
#define NULL 0


// long is chosen as an instance of the most restrictive alignment type
typedef long Align;

/* Construct Header data structure.  To ensure that the storage returned by
 * kandr_malloc is aligned properly for the objects that are stored in it, all
 * blocks are multiples of the header size, and the header itself is aligned
 * properly.  This is achieved through the use of a union; this data type is big
 * enough to hold the "widest" member, and the alignment is appropriate for all
 * of the types in the union.  Thus by including a member of type Align, which
 * is an instance of the most restrictive type, we guarantee that the size of
 * Header is aligned to the worst-case boundary.  The Align field is never used;
 * it just forces each header to the desired alignment.
 */
union header {
  struct {
    union header *next;
    unsigned size;
  } s;

  Align x;
};
typedef union header Header;


static Header base;           // Used to get an initial member for free list
static Header *freep = NULL;  // Free list starting point


static Header *morecore(unsigned nblocks);
void kandr_free(void *ptr);




void *kandr_malloc(unsigned nbytes) {

  Header *currp;
  Header *prevp;
  unsigned nunits;

  /* Calculate the number of memory units needed to provide at least nbytes of
   * memory.
   *
   * Suppose that we need n >= 0 bytes and that the memory unit sizes are b > 0
   * bytes.  Then n / b (using integer division) yields one less than the number
   * of units needed to provide n bytes of memory, except in the case that n is
   * a multiple of b; then it provides exactly the number of units needed.  It
   * can be verified that (n - 1) / b provides one less than the number of units
   * needed to provide n bytes of memory for all values of n > 0.  Thus ((n - 1)
   * / b) + 1 provides exactly the number of units needed for n > 0.
   *
   * The extra sizeof(Header) in the numerator is to include the unit of memory
   * needed for the header itself.
   */
  nunits = ((nbytes + sizeof(Header) - 1) / sizeof(Header)) + 1;

  // case: no free list yet exists; we have to initialize.
  if (freep == NULL) {

    // Create degenerate free list; base points to itself and has size 0
    base.s.next = &base;
    base.s.size = 0;

    // Set free list starting point to base address
    freep = &base;
  }

  /* Initialize pointers to two consecutive blocks in the free list, which we
   * call prevp (the previous block) and currp (the current block)
   */
  prevp = freep;
  currp = prevp->s.next;

  /* Step through the free list looking for a block of memory large enough to
   * fit nunits units of memory into.  If the whole list is traversed without
   * finding such a block, then morecore is called to request more memory from
   * the OS.
   */
  for (; ; prevp = currp, currp = currp->s.next) {

    /* case: found a block of memory in free list large enough to fit nunits
     * units of memory into.  Partition block if necessary, remove it from the
     * free list, and return the address of the block (after moving past the
     * header).
     */
    if (currp->s.size >= nunits) {

      /* case: block is exactly the right size; remove the block from the free
       * list by pointing the previous block to the next block.
       */
      if (currp->s.size == nunits) {
    /* Note that this line wouldn't work as intended if we were down to only
     * 1 block.  However, we would never make it here in that scenario
     * because the block at &base has size 0 and thus the conditional will
     * fail (note that nunits is always >= 1).  It is true that if the block
     * at &base had combined with another block, then previous statement
     * wouldn't apply - but presumably since base is a global variable and
     * future blocks are allocated on the heap, we can be sure that they
     * won't border each other.
     */
    prevp->s.next = currp->s.next;
      }
      /* case: block is larger than the amount of memory asked for; allocate
       * tail end of the block to the user.
       */
      else {
    // Changes the memory stored at currp to reflect the reduced block size
    currp->s.size -= nunits;
    // Find location at which to create the block header for the new block
    currp += currp->s.size;
    // Store the block size in the new header
    currp->s.size = nunits;
      }

      /* Set global starting position to the previous pointer.  Next call to
       * malloc will start either at the remaining part of the partitioned block
       * if a partition occurred, or at the block after the selected block if
       * not.
       */
      freep = prevp;

      /* Return the location of the start of the memory, i.e. after adding one
       * so as to move past the header
       */
      return (void *) (currp + 1);

    } // end found a block of memory in free list case

    /* case: we've wrapped around the free list without finding a block large
     * enough to fit nunits units of memory into.  Call morecore to request that
     * at least nunits units of memory are allocated.
     */
    if (currp == freep) {
      /* morecore returns freep; the reason that we have to assign currp to it
       * again (since we just tested that they are equal), is that there is a
       * call to free inside of morecore that can potentially change the value
       * of freep.  Thus we reassign it so that we can be assured that the newly
       * added block is found before (currp == freep) again.
       */
      if ((currp = morecore(nunits)) == NULL) {
    return NULL;
      }
    } // end wrapped around free list case
  } // end step through free list looking for memory loop
}




static Header *morecore(unsigned nunits) {

  void *freemem;    // The address of the newly created memory
  Header *insertp;  // Header ptr for integer arithmatic and constructing header

  /* Obtaining memory from OS is a comparatively expensive operation, so obtain
   * at least NALLOC blocks of memory and partition as needed
   */
  if (nunits < NALLOC) {
    nunits = NALLOC;
  }

  /* Request that the OS increment the program's data space.  sbrk changes the
   * location of the program break, which defines the end of the process's data
   * segment (i.e., the program break is the first location after the end of the
   * uninitialized data segment).  Increasing the program break has the effect
   * of allocating memory to the process.  On success, brk returns the previous
   * break - so if the break was increased, then this value is a pointer to the
   * start of the newly allocated memory.
   */
  freemem = sbrk(nunits * sizeof(Header));
  // case: unable to allocate more memory; sbrk returns (void *) -1 on error
  if (freemem == (void *) -1) {
    return NULL;
  }

  // Construct new block
  insertp = (Header *) freemem;
  insertp->s.size = nunits;

  /* Insert block into the free list so that it is available for malloc.  Note
   * that we add 1 to the address, effectively moving to the first position
   * after the header data, since of course we want the block header to be
   * transparent for the user's interactions with malloc and free.
   */
  kandr_free((void *) (insertp + 1));

  /* Returns the start of the free list; recall that freep has been set to the
   * block immediately preceeding the newly allocated memory (by free).  Thus by
   * returning this value the calling function can immediately find the new
   * memory by following the pointer to the next block.
   */
  return freep;
}




void kandr_free(void *ptr) {

  Header *insertp, *currp;

  // Find address of block header for the data to be inserted
  insertp = ((Header *) ptr) - 1;

  /* Step through the free list looking for the position in the list to place
   * the insertion block.  In the typical circumstances this would be the block
   * immediately to the left of the insertion block; this is checked for by
   * finding a block that is to the left of the insertion block and such that
   * the following block in the list is to the right of the insertion block.
   * However this check doesn't check for one such case, and misses another.  We
   * still have to check for the cases where either the insertion block is
   * either to the left of every other block owned by malloc (the case that is
   * missed), or to the right of every block owned by malloc (the case not
   * checked for).  These last two cases are what is checked for by the
   * condition inside of the body of the loop.
   */
  for (currp = freep; !((currp < insertp) && (insertp < currp->s.next)); currp = currp->s.next) {

    /* currp >= currp->s.ptr implies that the current block is the rightmost
     * block in the free list.  Then if the insertion block is to the right of
     * that block, then it is the new rightmost block; conversely if it is to
     * the left of the block that currp points to (which is the current leftmost
     * block), then the insertion block is the new leftmost block.  Note that
     * this conditional handles the case where we only have 1 block in the free
     * list (this case is the reason that we need >= in the first test rather
     * than just >).
     */
    if ((currp >= currp->s.next) && ((currp < insertp) || (insertp < currp->s.next))) {
      break;
    }
  }

  /* Having found the correct location in the free list to place the insertion
   * block, now we have to (i) link it to the next block, and (ii) link the
   * previous block to it.  These are the tasks of the next two if/else pairs.
   */

  /* case: the end of the insertion block is adjacent to the beginning of
   * another block of data owned by malloc.  Absorb the block on the right into
   * the block on the left (i.e. the previously existing block is absorbed into
   * the insertion block).
   */
  if ((insertp + insertp->s.size) == currp->s.next) {
    insertp->s.size += currp->s.next->s.size;
    insertp->s.next = currp->s.next->s.next;
  }
  /* case: the insertion block is not left-adjacent to the beginning of another
   * block of data owned by malloc.  Set the insertion block member to point to
   * the next block in the list.
   */
  else {
    insertp->s.next = currp->s.next;
  }

  /* case: the end of another block of data owned by malloc is adjacent to the
   * beginning of the insertion block.  Absorb the block on the right into the
   * block on the left (i.e. the insertion block is absorbed into the preceeding
   * block).
   */
  if ((currp + currp->s.size) == insertp) {
    currp->s.size += insertp->s.size;
    currp->s.next = insertp->s.next;
  }
  /* case: the insertion block is not right-adjacent to the end of another block
   * of data owned by malloc.  Set the previous block in the list to point to
   * the insertion block.
   */
  else {
    currp->s.next = insertp;
  }

  /* Set the free pointer list to start the block previous to the insertion
   * block.  This makes sense because calls to malloc start their search for
   * memory at the next block after freep, and the insertion block has as good a
   * chance as any of containing a reasonable amount of memory since we've just
   * added some to it.  It also coincides with calls to morecore from
   * kandr_malloc because the next search in the iteration looks at exactly the
   * right memory block.
   */
  freep = currp;
}

25

Author: dpritch,
Warning: date(): Invalid date.timezone value 'Europe/Kyiv', we selected the timezone 'UTC' for now. in /var/www/agent_stack/data/www/doraprojects.net/template/agent.layouts/content.php on line 54
2016-04-09 02:40:06

The basic of malloc ()

W Linuksie istnieją dwa typowe sposoby żądania pamięci: sbrk i mmap . Te wywołania systemowe mają poważne ograniczenia dotyczące częstych małych przydziałów. malloc() jest funkcją biblioteczną, która rozwiązuje ten problem. Żąda dużych kawałków pamięci z sbrk / mmap i zwraca małe bloki pamięci wewnątrz dużych kawałków. Jest to o wiele bardziej wydajne i elastyczne niż bezpośrednie wywołanie sbrk / mmap.

K & R malloc ()

W K & R implementacja, rdzeń (częściej nazywany arena ) to duża część pamięci. morecore() żąda rdzenia z systemu poprzez sbrk(). Gdy wywołujesz malloc () / free () wiele razy, niektóre bloki w rdzeniach są używane / przydzielane, podczas gdy inne są wolne. K & r malloc przechowuje adresy wolnych bloków w circular single linked list. Na tej liście każdy węzeł jest blokiem wolnej pamięci. Pierwsze sizeof(Header) bajty zachowują rozmiar bloku i wskaźnik do następnego wolnego bloku. Na reszta bajtów w wolnym bloku jest niezinicjalizowana. W odróżnieniu od typowych list w podręcznikach, węzły na liście wolnej są po prostu wskaźnikami do niektórych nieużywanych obszarów w rdzeniach; w rzeczywistości nie przydzielasz każdego węzła z wyjątkiem rdzeni. Lista ta jest kluczem do zrozumienia algorytmu.

Poniższy schemat przedstawia przykładowy układ pamięci z dwoma rdzeniami / arenami. Na diagramie każdy znak zajmuje sizeof(Header) bajty. @ jest Header, + oznacza przydzieloną pamięć i - oznacza wolną pamięć wewnętrzne rdzenie. W przykładzie są trzy przydzielone bloki i trzy wolne bloki. Trzy wolne bloki są przechowywane na liście okrągłej. Dla trzech przydzielonych bloków w Header zapisywane są tylko ich rozmiary.

            This is core 1                             This is core 2

@---------@+++++++++@++++++++++++        @----------@+++++++++++++++++@------------
|                                        |                            |
p->ptr->ptr                              p = p->ptr->ptr->ptr         p->ptr

W Twoim kodzie, freep jest punktem wejścia do wolnej listy. Jeśli będziesz powtarzał freep->ptr, wrócisz do {–10]} - to jest okrągłe. Po zrozumieniu okrągłej listy z pojedynczymi linkami, reszta jest stosunkowo łatwa. malloc() znajduje wolny blok i ewentualnie dzieli to. free() dodaje wolny blok z powrotem do listy i może scalić go z sąsiednimi wolnymi blokami. Obaj starają się zachować strukturę listy.

Inne uwagi dotyczące realizacji

Komentarze do kodu wymienione" owinięte wokół " w malloc(). Ta linia ma miejsce, gdy przejedziesz całą wolną listę, ale nie możesz znaleźć wolnego bloku większego niż żądana długość. W tym przypadku musisz dodać nowy rdzeń za pomocą morecore().
base jest wielkości zerowej blok, który zawsze znajduje się na wolnej liście. To sztuczka, aby uniknąć specjalnej obudowy. Nie jest to bezwzględnie konieczne.
free() może wydawać się trochę skomplikowane, ponieważ musi brać pod uwagę cztery różne przypadki, aby scalić nowo uwolniony blok z innymi wolnymi blokami na liście. Ten szczegół nie jest tak ważny, chyba że chcesz ponownie wdrożyć samodzielnie.
Ten wpis na blogu wyjaśnia K & R malloc bardziej szczegółowo.

PS: K & R malloc jest moim zdaniem jednym z najbardziej eleganckich kawałków kodu. To było naprawdę otwierające oczy, kiedy po raz pierwszy zrozumiałam kod. Zasmuca mnie to, że niektórzy współcześni programiści, nawet nie rozumiejąc podstaw tej implementacji, nazywają arcydzieło bzdurą wyłącznie w oparciu o jego styl kodowania.

5

Author: user172818,
Warning: date(): Invalid date.timezone value 'Europe/Kyiv', we selected the timezone 'UTC' for now. in /var/www/agent_stack/data/www/doraprojects.net/template/agent.layouts/content.php on line 54
2017-09-19 04:17:23

Również uważam to ćwiczenie za świetne i interesujące.

Moim zdaniem wizualizacja struktury może bardzo pomóc w zrozumieniu logiki-a przynajmniej mi się to udało. Poniżej znajduje się mój kod, który drukuje jak najwięcej o przepływie K & R malloc.

Najbardziej znaczącą zmianą, jaką wprowadziłem w K & R malloc, jest zmiana "free", aby upewnić się, że jakiś stary wskaźnik nie zostanie użyty ponownie. Poza tym dodałem komentarze i poprawiłem kilka małych literówek.

Eksperymentowanie z NALLOC, MAXMEM i zmienne testowe w 'main' mogą być również pomocne.

Na moim komputerze (Ubuntu 16.04.3) to skompilowane bez błędów z:

gcc -g -std=c99 -Wall -Wextra -pedantic-errors krmalloc.c

Krmalloc.c:

#include <stdio.h>
#include <unistd.h>

typedef long Align;             /* for alignment to long boundary */
union header {                  /* block header */
    struct {
        union header *ptr;      /* next block if on free list */
        size_t size;            /* size of this block */
                                /*      including the Header itself */
                                /*      measured in count of Header chunks */
                                /*      not less than NALLOC Header's */
    } s;
    Align x;                    /* force alignment of blocks */
};
typedef union header Header;

static Header *morecore(size_t);
void *mmalloc(size_t);
void _mfree(void **);
void visualize(const char*);
size_t getfreem(void);
size_t totmem = 0;              /* total memory in chunks */

static Header base;             /* empty list to get started */
static Header *freep = NULL;    /* start of free list */

#define NALLOC 1                /* minimum chunks to request */
#define MAXMEM 2048             /* max memory available (in bytes) */

#define mfree(p) _mfree((void **)&p)

void *sbrk(__intptr_t incr);


int main(void)
{
    char *pc, *pcc, *pccc, *ps;
    long *pd, *pdd;
    int dlen = 100;
    int ddlen = 50;

    visualize("start");


    /* trying to fragment as much as possible to get a more interesting view */

    /* claim a char */
    if ((pc = (char *) mmalloc(sizeof(char))) == NULL)
        return -1;

    /* claim a string */
    if ((ps = (char *) mmalloc(dlen * sizeof(char))) == NULL)
        return -1;

    /* claim some long's */
    if ((pd = (long *) mmalloc(ddlen * sizeof(long))) == NULL)
        return -1;

    /* claim some more long's */
    if ((pdd = (long *) mmalloc(ddlen * 2 * sizeof(long))) == NULL)
        return -1;

    /* claim one more char */
    if ((pcc = (char *) mmalloc(sizeof(char))) == NULL)
        return -1;

    /* claim the last char */
    if ((pccc = (char *) mmalloc(sizeof(char))) == NULL)
        return -1;


    /* free and visualize */
    printf("\n");
    mfree(pccc);
    /*      bugged on purpose to test free(NULL) */
    mfree(pccc);
    visualize("free(the last char)");

    mfree(pdd);
    visualize("free(lot of long's)");

    mfree(ps);
    visualize("free(string)");

    mfree(pd);
    visualize("free(less long's)");

    mfree(pc);
    visualize("free(first char)");

    mfree(pcc);
    visualize("free(second char)");


    /* check memory condition */
    size_t freemem = getfreem();
    printf("\n");
    printf("--- Memory claimed  : %ld chunks (%ld bytes)\n",
                totmem, totmem * sizeof(Header));
    printf("    Free memory now : %ld chunks (%ld bytes)\n",
                freemem, freemem * sizeof(Header));
    if (freemem == totmem)
        printf("    No memory leaks detected.\n");
    else
        printf("    (!) Leaking memory: %ld chunks (%ld bytes).\n",
                    (totmem - freemem), (totmem - freemem) * sizeof(Header));

    printf("// Done.\n\n");
    return 0;
}


/* visualize: print the free list (educational purpose) */
void visualize(const char* msg)
{
    Header *tmp;

    printf("--- Free list after \"%s\":\n", msg);

    if (freep == NULL) {                   /* does not exist */
        printf("\tList does not exist\n\n");
        return;
    }

    if  (freep == freep->s.ptr) {          /* self-pointing list = empty */
        printf("\tList is empty\n\n");
        return;
    }

    printf("  ptr: %10p size: %-3lu -->  ", (void *) freep, freep->s.size);

    tmp = freep;                           /* find the start of the list */
    while (tmp->s.ptr > freep) {           /* traverse the list */
        tmp = tmp->s.ptr;
        printf("ptr: %10p size: %-3lu -->  ", (void *) tmp, tmp->s.size);
    }
    printf("end\n\n");
}


/* calculate the total amount of available free memory */
size_t getfreem(void)
{
    if (freep == NULL)
        return 0;

    Header *tmp;
    tmp = freep;
    size_t res = tmp->s.size;

    while (tmp->s.ptr > tmp) {
        tmp = tmp->s.ptr;
        res += tmp->s.size;
    }

    return res;
}


/* mmalloc: general-purpose storage allocator */
void *mmalloc(size_t nbytes)
{
    Header *p, *prevp;
    size_t nunits;

    /* smallest count of Header-sized memory chunks */
    /*  (+1 additional chunk for the Header itself) needed to hold nbytes */
    nunits = (nbytes + sizeof(Header) - 1) / sizeof(Header) + 1;

    /* too much memory requested? */
    if (((nunits + totmem + getfreem())*sizeof(Header)) > MAXMEM) {
        printf("Memory limit overflow!\n");
        return NULL;
    }

    if ((prevp = freep) == NULL) {          /* no free list yet */
        /* set the list to point to itself */
        base.s.ptr = freep = prevp = &base;
        base.s.size = 0;
    }

    /* traverse the circular list */
    for (p = prevp->s.ptr; ; prevp = p, p = p->s.ptr) {

        if (p->s.size >= nunits) {          /* big enough */
            if (p->s.size == nunits)        /* exactly */
                prevp->s.ptr = p->s.ptr;
            else {                          /* allocate tail end */
                /* adjust the size */
                p->s.size -= nunits;
                /* find the address to return */
                p += p->s.size;
                p->s.size = nunits;
            }
            freep = prevp;
            return (void *)(p+1);
        }

        /* back where we started and nothing found - we need to allocate */
        if (p == freep)                     /* wrapped around free list */
            if ((p = morecore(nunits)) == NULL)
                return NULL;                /* none left */
    }
}


/* morecore: ask system for more memory */
/*      nu: count of Header-chunks needed */
static Header *morecore(size_t nu)
{
    char *cp;
    Header *up;

    /* get at least NALLOC Header-chunks from the OS */
    if (nu < NALLOC)
        nu = NALLOC;

    cp = (char *) sbrk(nu * sizeof(Header));

    if (cp == (char *) -1)                  /* no space at all */
        return NULL;

    printf("... (sbrk) claimed %ld chunks.\n", nu);
    totmem += nu;                           /* keep track of allocated memory */

    up = (Header *) cp;
    up->s.size = nu;

    /* add the free space to the circular list */
    void *n = (void *)(up+1);
    mfree(n);

    return freep;
}


/* mfree: put block ap in free list */
void _mfree(void **ap)
{
    if (*ap == NULL)
        return;

    Header *bp, *p;
    bp = (Header *)*ap - 1;                 /* point to block header */

    if (bp->s.size == 0 || bp->s.size > totmem) {
        printf("_mfree: impossible value for size\n");
        return;
    }

    /* the free space is only marked as free, but 'ap' still points to it */
    /* to avoid reusing this address and corrupt our structure set it to '\0' */
    *ap = NULL;

    /* look where to insert the free space */

    /* (bp > p && bp < p->s.ptr)    => between two nodes */
    /* (p > p->s.ptr)               => this is the end of the list */
    /* (p == p->p.str)              => list is one element only */
    for (p = freep; !(bp > p && bp < p->s.ptr); p = p->s.ptr)
        if (p >= p->s.ptr && (bp > p || bp < p->s.ptr))
            /* freed block at start or end of arena */
            break;

    if (bp + bp->s.size == p->s.ptr) {      /* join to upper nbr */
    /* the new block fits perfect up to the upper neighbor */

        /* merging up: adjust the size */
        bp->s.size += p->s.ptr->s.size;
        /* merging up: point to the second next */
        bp->s.ptr = p->s.ptr->s.ptr;

    } else
        /* set the upper pointer */
        bp->s.ptr = p->s.ptr;

    if (p + p->s.size == bp) {              /* join to lower nbr */
    /* the new block fits perfect on top of the lower neighbor */

        /* merging below: adjust the size */
        p->s.size += bp->s.size;
        /* merging below: point to the next */
        p->s.ptr = bp->s.ptr;

    } else
        /* set the lower pointer */
        p->s.ptr = bp;

    /* reset the start of the free list */
    freep = p;
}

0

Author: Vladimir,
Warning: date(): Invalid date.timezone value 'Europe/Kyiv', we selected the timezone 'UTC' for now. in /var/www/agent_stack/data/www/doraprojects.net/template/agent.layouts/content.php on line 54
2018-01-01 08:49:08

score 56 · Accepted Answer

Ok, to co tu mamy to kawałek naprawdę źle napisanego kodu. To, co zrobię w tym poście, najlepiej opisać jako Archeologia oprogramowania.

Krok 1: napraw formatowanie.

Wcięcia i kompaktowy format nie robią nikomu nic dobrego. Należy wstawić różne spacje i puste wiersze. Komentarze mogą być napisane w bardziej czytelny sposób. Zacznę od naprawienia tego.

W tym samym czasie zmieniam styl klamry z K & R style-Uwaga że styl K & R brace jest akceptowalny, to tylko moje osobiste preferencje. Inną osobistą preferencją jest zapisywanie * dla wskaźników obok wskazywanego typu. Nie będę się tutaj spierał o (subiektywne) sprawy stylu.

Również definicja typu Header jest całkowicie nieczytelna, wymaga drastycznej poprawki.

I zauważyłem coś zupełnie niejasnego: wydaje się, że zadeklarowali prototyp funkcji wewnątrz funkcji. Header* morecore(unsigned);. To bardzo stary i bardzo kiepski styl, i nie jestem pewien, czy C w ogóle na to pozwala. Po prostu usuńmy tę linię, cokolwiek robi ta funkcja, będzie musiała być zdefiniowana gdzie indziej.

typedef long Align;                      /* for alignment to long boundary */

typedef union header                     /* block header */
{
  struct
  {
    union header *ptr;                   /* next block if on free list */
    unsigned size;                       /* size of this block */
  } s;

  Align x;                               /* force alignment of blocks */

} Header;


static Header base;                      /* empty list to get started */
static Header* freep = NULL;             /* start of free list */


/* malloc: general-purpose storage allocator */
void* malloc (unsigned nbytes)
{
  Header*   p;
  Header*   prevp;
  unsigned  nunits;

  nunits = (nbytes + sizeof(Header) - 1) / sizeof(header) + 1;

  if ((prevp = freep) == NULL)           /* no free list yet */
  {
    base.s.ptr = freeptr = prevptr = &base;
    base.s.size = 0;
  }

  for (p = prevp->s.ptr; ; prevp = p, p = p->s.ptr)
  {
    if (p->s.size >= nunits)             /* big enough */
    {
      if (p->s.size == nunits)           /* exactly */
        prevp->s.ptr = p->s.ptr;
      else                               /* allocate tail end */
      {
        p->s.size -= nunits;
        p += p->s.size;
        p->s.size = nunits
      }

      freep = prevp;
      return (void *)(p+1);
    }

    if (p == freep)                      /* wrapped around free list */
      if ((p = morecore(nunits)) == NULL)
        return NULL;                     /* none left */
  }
}

Ok teraz możemy rzeczywiście być w stanie odczytać kod.

Krok 2: pozbądź się powszechnie uznawanych złych praktyk.

Ten kod jest wypełniony rzeczami, które są obecnie uważane za złe praktyki. Należy je usunąć, ponieważ zagrażają bezpieczeństwu, czytelności i konserwacji kodu. Jeśli chcesz odniesienie do autorytetu głoszącego te same praktyki co ja, sprawdź powszechnie uznany standard kodowania MISRA-C .

Zauważyłem i usunąłem następujące złe praktyki:

1) samo wpisanie unsigned w kodzie może prowadzić do nieporozumień: czy była to literówka programisty, czy też intencja napisania unsigned int? Powinniśmy zastąpić wszystkie unsigned unsigned int. Ale kiedy to robimy, znajdujemy, że jest on używany w tym kontekście, aby podać rozmiar różnych danych binarnych. The correct typ używany w takich sprawach to typ standardowy C size_t. Jest to w zasadzie tylko niepodpisana liczba całkowita, ale gwarantowana jest "wystarczająco duża" dla danej platformy. Operator sizeof zwraca wynik typu {[10] } i jeśli przyjrzymy się definicji prawdziwego malloca w standardzie C, to jest to void *malloc(size_t size);. Więc size_t jest najbardziej poprawnym typem do użycia.

2) używanie tej samej nazwy dla naszej własnej funkcji malloc jako tej, która znajduje się w stdlib jest złym pomysłem.h. Czy musimy uwzględnić stdlib.H, będzie bałagan. Z reguły nigdy nie używaj nazw identyfikatorów funkcji biblioteki standardowej C we własnym kodzie. Zmienię nazwę na kr_malloc.

3) kod nadużywa faktu, że wszystkie zmienne statyczne są gwarantowane, że zostaną zainicjowane na zero. Jest to dobrze zdefiniowane przez standard C, ale raczej subtelna reguła. Pozwala zainicjalizować wszystkie statyki jawnie, aby pokazać, że nie zapomnieliśmy init je przez przypadek.

4) przypisanie wewnątrz warunków jest niebezpieczne i trudne do odczytania. Należy tego unikać, jeśli to możliwe, ponieważ może to również prowadzić do błędów, takich jak klasyczny błąd = vs==.

5) Wiele zadań w tym samym wierszu jest trudne do odczytania, a także być może niebezpieczne, ze względu na kolejność oceny.

6) wiele deklaracji w tym samym wierszu jest trudne do odczytania i niebezpieczne, ponieważ może prowadzić do błędów podczas mieszania deklaracji danych i wskaźników. Zawsze deklaruj każdą zmienną w swoim własnym wierszu.

7) Zawsze używa szelek po każdym oświadczeniu. Nie robienie tego doprowadzi do błędów błędy.

8) nigdy nie wpisuj cast od określonego typu wskaźnika do void*. Jest niepotrzebny w C i może ukrywać błędy, które w przeciwnym razie kompilator by wykrył.

9) Unikaj używania wielu zwrotów wewnątrz funkcji. Czasami prowadzą do wyraźniejszego kodu, ale w większości przypadków prowadzą do spaghetti. W obecnym stanie kodu nie możemy tego zmienić bez przepisania pętli, więc naprawię to później.

10) Zachowaj proste pętle. Powinny zawierać jedną instrukcję init, jeden warunek pętli i jedną iterację, nic więcej. To dla pętli, z operatorem przecinka i w ogóle, jest bardzo niejasne. Ponownie dostrzegamy potrzebę przepisania tej pętli na coś zdrowego. Zrobię to teraz, ale na razie mamy:

typedef long Align;                      /* for alignment to long boundary */

typedef union header                     /* block header */
{
  struct
  {
    union header *ptr;                   /* next block if on free list */
    size_t size;                         /* size of this block */
  } s;

  Align x;                               /* force alignment of blocks */

} Header;


static Header base = {0};                /* empty list to get started */
static Header* freep = NULL;             /* start of free list */


/* malloc: general-purpose storage allocator */
void* kr_malloc (size_t nbytes)
{
  Header*  p;
  Header*  prevp;
  size_t   nunits;

  nunits = (nbytes + sizeof(Header) - 1) / sizeof(header) + 1;

  prevp = freep;
  if (prevp == NULL)                     /* no free list yet */
  {
    base.s.ptr  = &base;
    freeptr     = &base;
    prevptr     = &base;
    base.s.size = 0;
  }

  for (p = prevp->s.ptr; ; prevp = p, p = p->s.ptr)
  {
    if (p->s.size >= nunits)             /* big enough */
    {
      if (p->s.size == nunits)           /* exactly */
      {
        prevp->s.ptr = p->s.ptr;
      }
      else                               /* allocate tail end */
      {
        p->s.size -= nunits;
        p += p->s.size;
        p->s.size = nunits
      }

      freep = prevp;
      return p+1;
    }

    if (p == freep)                      /* wrapped around free list */
    {
      p = morecore(nunits);
      if (p == NULL)
      {
        return NULL;                     /* none left */
      }
    }
  } /* for */
}

Krok 3: przepisać niejasne pętli.

Z powodów wymienionych wcześniej. Widzimy, że ta pętla trwa w nieskończoność, kończy się zwracając z funkcji, albo gdy alokacja jest wykonywana, albo gdy nie ma już pamięci. Więc stwórzmy to jako warunek pętli i wyciągnijmy powrót do końca funkcji tam, gdzie powinna być. I pozbądźmy się tego brzydkiego operatora przecinka.

Wprowadzę dwie nowe zmienne: jedną zmienną wynikową do przechowywania wskaźnika wynikowego, a drugą do śledzenia, czy pętla powinna być kontynuowana, czy nie. Rozwalę umysły K & R używając typu bool, który jest częścią języka C od 1999 roku.

(mam nadzieję, że nie zmieniłem algorytmu tą zmianą, wierzę, że nie)

#include <stdbool.h>

typedef long Align;                      /* for alignment to long boundary */

typedef union header                     /* block header */
{
  struct
  {
    union header *ptr;                   /* next block if on free list */
    size_t size;                         /* size of this block */
  } s;

  Align x;                               /* force alignment of blocks */

} Header;


static Header base = {0};                /* empty list to get started */
static Header* freep = NULL;             /* start of free list */


/* malloc: general-purpose storage allocator */
void* kr_malloc (size_t nbytes)
{
  Header*  p;
  Header*  prevp;
  size_t   nunits;
  void*    result;
  bool     is_allocating;

  nunits = (nbytes + sizeof(Header) - 1) / sizeof(header) + 1;

  prevp = freep;
  if (prevp == NULL)                     /* no free list yet */
  {
    base.s.ptr  = &base;
    freeptr     = &base;
    prevptr     = &base;
    base.s.size = 0;
  }

  is_allocating = true;
  for (p = prevp->s.ptr; is_allocating; p = p->s.ptr)
  {
    if (p->s.size >= nunits)             /* big enough */
    {
      if (p->s.size == nunits)           /* exactly */
      {
        prevp->s.ptr = p->s.ptr;
      }
      else                               /* allocate tail end */
      {
        p->s.size -= nunits;
        p += p->s.size;
        p->s.size = nunits
      }

      freep = prevp;
      result = p+1;
      is_allocating = false;             /* we are done */
    }

    if (p == freep)                      /* wrapped around free list */
    {
      p = morecore(nunits);
      if (p == NULL)
      {
        result = NULL;                   /* none left */
        is_allocating = false;
      }
    }
    prevp = p;
  } /* for */

  return result;
}

Krok 4: make this crap skompilować.

Ponieważ jest to z K & R, jest wypełniona literówkami. sizeof(header) powinno być sizeof(Header). Brakuje półkoloni. Używają różnych nazw freep, prevp versus freeptr, prevptr, ale wyraźnie oznaczają tę samą zmienną. Uważam, że te ostatnie były właściwie lepszymi nazwami, więc użyjmy ich.

#include <stdbool.h>

typedef long Align;                      /* for alignment to long boundary */

typedef union header                     /* block header */
{
  struct
  {
    union header *ptr;                   /* next block if on free list */
    size_t size;                         /* size of this block */
  } s;

  Align x;                               /* force alignment of blocks */

} Header;


static Header base = {0};                /* empty list to get started */
static Header* freeptr = NULL;           /* start of free list */


/* malloc: general-purpose storage allocator */
void* kr_malloc (size_t nbytes)
{
  Header*  p;
  Header*  prevptr;
  size_t   nunits;
  void*    result;
  bool     is_allocating;

  nunits = (nbytes + sizeof(Header) - 1) / sizeof(Header) + 1;

  prevptr = freeptr;
  if (prevptr == NULL)                   /* no free list yet */
  {
    base.s.ptr  = &base;
    freeptr     = &base;
    prevptr     = &base;
    base.s.size = 0;
  }

  is_allocating = true;
  for (p = prevptr->s.ptr; is_allocating; p = p->s.ptr)
  {
    if (p->s.size >= nunits)             /* big enough */
    {
      if (p->s.size == nunits)           /* exactly */
      {
        prevptr->s.ptr = p->s.ptr;
      }
      else                               /* allocate tail end */
      {
        p->s.size -= nunits;
        p += p->s.size;
        p->s.size = nunits;
      }

      freeptr = prevptr;
      result = p+1;
      is_allocating = false;             /* we are done */
    }

    if (p == freeptr)                    /* wrapped around free list */
    {
      p = morecore(nunits);
      if (p == NULL)
      {
        result = NULL;                   /* none left */
        is_allocating = false;
      }
    }
    prevptr = p;
  } /* for */

  return result;
}

A teraz mamy dość czytelny, łatwy do utrzymania kod, bez licznych niebezpiecznych praktyk, które nawet się skompilują! Więc teraz możemy zacząć zastanawiać się nad tym, co faktycznie robi kod.

Struktura "Header" jest, jak można się domyślić, deklaracją węzła na liście połączonej. Każdy taki węzeł zawiera wskaźnik do następnego. Nie do końca rozumiem funkcję morecore, ani "wrap-around", nigdy nie używałem tej funkcji, ani sbrk. Ale zakładam, że przydziela nagłówek jako określone w tej strukturze, a także część surowych danych podążających za tym nagłówkiem. Jeśli tak, to wyjaśnia, dlaczego nie ma rzeczywistego wskaźnika danych: zakłada się, że dane podążają za nagłówkiem, sąsiadująco w pamięci. Więc dla każdego węzła otrzymujemy nagłówek i otrzymujemy fragment surowych danych po nagłówku.

Iteracja sama w sobie jest dość prosta, przechodzą przez pojedynczą listę, jeden węzeł na raz.

Na końcu pętli ustawiają wskaźnik na punkt pierwszy koniec "kawałka", a następnie przechowywać go w zmiennej statycznej, tak, że program będzie pamiętać, gdzie wcześniej alokowane pamięci, następnym razem funkcja jest wywoływana.

Używają sztuczki, aby ich nagłówek wylądował na wyrównanym adresie pamięci: przechowują wszystkie informacje o napowietrznych w Unii razem ze zmienną na tyle dużą, aby odpowiadała wymaganiom wyrównania platformy. Jeśli więc rozmiar " PTR "plus Rozmiar" size " są zbyt małe, aby podać dokładne wyrównanie, to Unia gwarantuje, że zostaną przydzielone co najmniej sizeof(Align) bajty. Uważam, że ta cała sztuczka jest dzisiaj przestarzała, ponieważ standard C nakazuje automatyczne wypełnianie struct/union padding.