poolalloc.c

Go to the documentation of this file.
00001 /* $Id: poolalloc.c 50717 2011-08-11 23:30:38Z crab $ */
00002 #ifndef DISABLE_POOL_ALLOC
00003 /*
00004    Copyright (C) 2008 by David White <dave@whitevine.net>
00005    Part of the Battle for Wesnoth Project http://www.wesnoth.org/
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License version 2
00009    or at your option any later version.
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY.
00012 
00013    See the COPYING file for more details.
00014 */
00015 
00016 /*
00017 This file defines a custom allocator that is optimized for doing memory
00018 allocations for Wesnoth. Its primary consideration is space, though it
00019 should be pretty fast too.
00020 
00021 The largest consideration is meta-data: Wesnoth allocates many small chunks,
00022 and so we want to minimize per-chunk meta-data. Typical general-purpose
00023 allocators have a per-chunk overhead of one or two pointers. This allocator
00024 has no per-chunk overhead, just memory overhead of less than 2%.
00025 
00026 The allocator is designed to handle small chunks. We include dlmalloc's source,
00027 and allocations that are not considered small are simply punted to dlmalloc
00028 to allocate.
00029 
00030 Some basic terminology:
00031 
00032 - chunk: a single allocation, allocated with malloc.
00033 - block: a block of memory from which we allocate chunks. A block has a header
00034 and then its data section. A block should be a multiple of the page size.
00035 A given block is dedicated to allocating chunks of a specific size. All blocks
00036 are the same size (4096 bytes by default, which should be the minimum).
00037 - superblock: we allocate one huge block from which all blocks are allocated.
00038 */
00039 
00040 #include <assert.h>
00041 #include <errno.h>
00042 #include <inttypes.h>
00043 #include <pthread.h>
00044 #include <stdio.h>
00045 #include <stdlib.h>
00046 #include <string.h>
00047 
00048 void* dlmalloc(size_t size);
00049 void* dlcalloc(size_t count, size_t size);
00050 void* dlvalloc(size_t size);
00051 void* dlmemalign(size_t alignment, size_t size);
00052 void* dlrealloc(void* ptr, size_t size);
00053 void dlfree(void* ptr);
00054 
00055 #define BLOCK_SIZE (4096)
00056 
00057 #define MAX_CHUNK_SIZE (256)
00058 #define CHUNK_SIZE_STEP (sizeof(void*))
00059 #define NUM_POOLS ((MAX_CHUNK_SIZE/CHUNK_SIZE_STEP) + 1)
00060 
00061 // find the index of the pool that a chunk of size n will be allocated from.
00062 #define GET_POOL_INDEX(n) ((n)/CHUNK_SIZE_STEP)
00063 #define ROUNDUP_SIZE(n) (((n)%CHUNK_SIZE_STEP) ? ((n) + CHUNK_SIZE_STEP - ((n)%CHUNK_SIZE_STEP)) : (n))
00064 
00065 #define CUSTOM_MEMORY_SIZE (1024*1024*40)
00066 uint8_t* begin_superblock_range = NULL;
00067 uint8_t* begin_superblock = NULL;
00068 uint8_t* end_superblock = NULL;
00069 #define IS_OUR_PTR(ptr) ((uint8_t*)(ptr) >= begin_superblock_range && (uint8_t*)(ptr) < end_superblock)
00070 
00071 pthread_t main_thread;
00072 
00073 void init_custom_malloc()
00074 {
00075     main_thread = pthread_self();
00076 
00077     // allocate the memory -- allocate an extra block at the end, so that
00078     // if the address we get back isn't block-aligned, we can advance
00079     // the pointer until it is.
00080     void* alloc = dlmalloc(CUSTOM_MEMORY_SIZE + BLOCK_SIZE);
00081     assert(alloc);
00082     begin_superblock = (uint8_t*)alloc;
00083     while(((uintptr_t)begin_superblock)%BLOCK_SIZE) {
00084         ++begin_superblock;
00085     }
00086 
00087     end_superblock = begin_superblock + CUSTOM_MEMORY_SIZE;
00088     begin_superblock_range = begin_superblock;
00089 }
00090 
00091 typedef struct BlockHeader {
00092     uint32_t check_a;
00093     struct Block* next;
00094     struct Block* prev;
00095     uint8_t* uninit;
00096     void* free_list;
00097     uint32_t chunk_size;
00098     uint32_t allocated_chunks;
00099     uint32_t check_b;
00100 } BlockHeader;
00101 
00102 typedef struct Block {
00103     BlockHeader header;
00104     char data[BLOCK_SIZE - sizeof(BlockHeader)];
00105 } Block;
00106 
00107 #define BLOCK_EMPTY(b) ((b)->header.allocated_chunks == 0)
00108 #define BLOCK_FULL(b) ((b)->header.uninit == NULL && (b)->header.free_list == NULL)
00109 
00110 void* allocate_chunk_from_block(Block* b)
00111 {
00112     b->header.allocated_chunks++;
00113     if(b->header.uninit) {
00114         void* result = b->header.uninit;
00115         b->header.uninit += b->header.chunk_size;
00116 
00117         //check if we've run out of uninitialized elements.
00118         if(b->header.uninit + b->header.chunk_size > (uint8_t*)(b+1)) {
00119             b->header.uninit = NULL;
00120         }
00121 
00122         return result;
00123     }
00124 
00125     assert(b->header.free_list);
00126     void* result = b->header.free_list;
00127     b->header.free_list = *(void**)result;
00128     return result;
00129 }
00130 
00131 // inline causes linker errors in debug builds with gcc 4.3.2(-std=c99 and -O0)
00132 #ifdef DEBUG
00133 #define inline
00134 #endif
00135 inline Block* get_block_from_chunk(void* chunk)
00136 #ifdef DEBUG
00137 #undef inline
00138 #endif
00139 {
00140     int8_t* block_ptr = ((int8_t*)chunk) - ((uintptr_t)chunk)%BLOCK_SIZE;
00141     return (Block*)block_ptr;
00142 }
00143 
00144 Block* free_chunk_from_block(void* chunk)
00145 {
00146     Block* block = get_block_from_chunk(chunk);
00147     block->header.allocated_chunks--;
00148     *(void**)chunk = block->header.free_list;
00149     block->header.free_list = chunk;
00150 
00151     return block;
00152 }
00153 
00154 Block* format_block(Block* ptr, int chunk_size)
00155 {
00156     BlockHeader* block = &ptr->header;
00157 
00158     block->check_a = 0xFFFFFFFF;
00159     block->check_b = 0xFFFFFFFF;
00160     
00161     block->next = NULL;
00162     block->prev = NULL;
00163     block->uninit = (uint8_t*)(block+1);
00164     block->free_list = NULL;
00165     block->chunk_size = chunk_size;
00166     block->allocated_chunks = 0;
00167     return ptr;
00168 }
00169 
00170 Block* block_free_list = NULL;
00171 
00172 Block* allocate_new_block(uint32_t chunk_size)
00173 {
00174     if(block_free_list == NULL && begin_superblock >= end_superblock) {
00175         return NULL;
00176     }
00177 
00178     Block* block;
00179 
00180     if(block_free_list != NULL) {
00181         block = block_free_list;
00182         block_free_list = block->header.next;
00183     } else {
00184         block = (Block*)begin_superblock;
00185         begin_superblock += sizeof(Block);
00186     }
00187 
00188     return format_block(block, chunk_size);
00189 }
00190 
00191 void return_block_to_free_list(Block* block)
00192 {
00193     block->header.next = block_free_list;
00194     block_free_list = block;
00195 }
00196 
00197 Block* block_pools[NUM_POOLS];
00198 
00199 #define IS_BLOCK_ORPHAN(block) ((block)->header.next == NULL && (block)->header.prev == NULL && block_pools[GET_POOL_INDEX(block->header.chunk_size)] != (block))
00200 
00201 void add_block_to_pool(Block* block)
00202 {
00203     assert(block->header.chunk_size > 0 && block->header.chunk_size <= MAX_CHUNK_SIZE);
00204     Block** target = &block_pools[GET_POOL_INDEX(block->header.chunk_size)];
00205     block->header.next = *target;
00206     block->header.prev = NULL;
00207     if(*target) {
00208         (*target)->header.prev = block;
00209     }
00210     *target = block;
00211 }
00212 
00213 void make_block_orphan(Block* block)
00214 {
00215     BlockHeader* header = &block->header;
00216     if(block_pools[GET_POOL_INDEX(header->chunk_size)] == block) {
00217         block_pools[GET_POOL_INDEX(header->chunk_size)] = header->next;
00218     }
00219 
00220     if(header->prev) {
00221         header->prev->header.next = header->next;
00222     }
00223 
00224     if(header->next) {
00225         header->next->header.prev = header->prev;
00226     }
00227 
00228     header->prev = NULL;
00229     header->next = NULL;
00230 }
00231 
00232 // A list of the chunks that were allocated in the main thread, but free()
00233 // was called in another thread. We can't deallocate them from another thread,
00234 // so we put them in this array. The main thread will free all these chunks,
00235 // whenever it can't immediately allocate memory.
00236 void** free_chunks;
00237 size_t nfree_chunks, capacity_free_chunks;
00238 pthread_mutex_t free_chunks_mutex = PTHREAD_MUTEX_INITIALIZER;
00239 
00240 //mutex to protect all calls to dlmalloc.
00241 pthread_mutex_t dlmalloc_mutex = PTHREAD_MUTEX_INITIALIZER;
00242 
00243 void free_memory(void* ptr);
00244 
00245 void collect_memory_from_other_threads()
00246 {
00247     pthread_mutex_lock(&free_chunks_mutex);
00248     int n;
00249     for(n = 0; n != nfree_chunks; ++n) {
00250         free_memory(free_chunks[n]);
00251     }
00252 
00253     nfree_chunks = 0;
00254     pthread_mutex_unlock(&free_chunks_mutex);
00255 }
00256 
00257 void free_memory_from_other_thread(void* ptr)
00258 {
00259     pthread_mutex_lock(&free_chunks_mutex);
00260     
00261     if(nfree_chunks == capacity_free_chunks) {
00262         capacity_free_chunks *= 2;
00263         if(capacity_free_chunks < 16) {
00264             capacity_free_chunks = 16;
00265         }
00266 
00267         pthread_mutex_lock(&dlmalloc_mutex);
00268         void** new_free_chunks = (void**)dlrealloc(free_chunks, sizeof(void*)*capacity_free_chunks);
00269         pthread_mutex_unlock(&dlmalloc_mutex);
00270         if(!new_free_chunks) {
00271             pthread_mutex_unlock(&free_chunks_mutex);
00272             fprintf(stderr, "DLREALLOC FAILED!\n");
00273             return;
00274         }
00275 
00276         free_chunks = new_free_chunks;
00277     }
00278 
00279     free_chunks[nfree_chunks++] = ptr;
00280     pthread_mutex_unlock(&free_chunks_mutex);
00281 }
00282 
00283 Block* get_block(uint32_t chunk_size)
00284 {
00285     const int index = GET_POOL_INDEX(chunk_size);
00286     assert(index >= 0 && index < sizeof(block_pools)/sizeof(*block_pools));
00287     if(block_pools[index]) {
00288         return block_pools[index];
00289     }
00290 
00291     // free memory from other threads and then try again. This requires a mutex
00292     // lock, but this code should be rarely reached.
00293     collect_memory_from_other_threads();
00294 
00295     if(block_pools[index]) {
00296         return block_pools[index];
00297     }
00298 
00299     Block* block = allocate_new_block(chunk_size);
00300     if(block == NULL) {
00301         return block;
00302     }
00303     add_block_to_pool(block);
00304     return block;
00305 }
00306 
00307 void* allocate_memory(int32_t size)
00308 {
00309     Block* block = get_block(size);
00310     if(block == NULL) {
00311         return NULL;
00312     }
00313 
00314     void* result = allocate_chunk_from_block(block);
00315     if(BLOCK_FULL(block)) {
00316         make_block_orphan(block);
00317     }
00318 
00319     return result;
00320 }
00321 
00322 void free_memory(void* ptr)
00323 {
00324     Block* block = free_chunk_from_block(ptr);
00325     if(IS_BLOCK_ORPHAN(block)) {
00326         add_block_to_pool(block);
00327     } else if(BLOCK_EMPTY(block)) {
00328         //since the block is empty, return it to the global free list of
00329         //blocks, so it can be moved to a different pool.
00330         make_block_orphan(block);
00331         return_block_to_free_list(block);
00332     }
00333 }
00334 
00335 void* malloc(size_t size)
00336 {
00337     if(pthread_equal(pthread_self(), main_thread) && size > 0 && size <= MAX_CHUNK_SIZE) {
00338         size = ROUNDUP_SIZE(size);
00339         void* result = allocate_memory(size);
00340         if(result != NULL) {
00341             return result;
00342         }
00343     }
00344 
00345     pthread_mutex_lock(&dlmalloc_mutex);
00346     void* result = dlmalloc(size);
00347     pthread_mutex_unlock(&dlmalloc_mutex);
00348     return result;
00349 }
00350 
00351 void* calloc(size_t count, size_t size)
00352 {
00353     pthread_mutex_lock(&dlmalloc_mutex);
00354     void* result = dlcalloc(count, size);
00355     pthread_mutex_unlock(&dlmalloc_mutex);
00356     return result;
00357 }
00358 
00359 void* valloc(size_t size)
00360 {
00361     pthread_mutex_lock(&dlmalloc_mutex);
00362     void* result = dlvalloc(size);
00363     pthread_mutex_unlock(&dlmalloc_mutex);
00364     return result;
00365 }
00366 
00367 void* memalign(size_t alignment, size_t size)
00368 {
00369     pthread_mutex_lock(&dlmalloc_mutex);
00370     void* result = dlmemalign(alignment, size);
00371     pthread_mutex_unlock(&dlmalloc_mutex);
00372     return result;
00373 }
00374 
00375 // Note this function might not be entirely POSIX compatible, but it seems to
00376 // work.
00377 int posix_memalign(void **memptr, size_t alignment, size_t size)
00378 {
00379     pthread_mutex_lock(&dlmalloc_mutex);
00380     *memptr = dlmemalign(alignment, size);
00381     pthread_mutex_unlock(&dlmalloc_mutex);
00382     return errno;
00383 }
00384 
00385 void* realloc(void* ptr, size_t size)
00386 {
00387     if(IS_OUR_PTR(ptr)) {
00388         if(size == 0) {
00389             free(ptr);
00390             return NULL;
00391         }
00392 
00393         void* new_memory = malloc(size);
00394         if(new_memory == NULL) {
00395             return NULL;
00396         }
00397 
00398         const int old_size = get_block_from_chunk(ptr)->header.chunk_size;
00399         const size_t nbytes = size < old_size ? size : old_size;
00400         memcpy(new_memory, ptr, nbytes);
00401         free(ptr);
00402         return new_memory;
00403     }
00404 
00405     pthread_mutex_lock(&dlmalloc_mutex);
00406     void* result = dlrealloc(ptr, size);
00407     pthread_mutex_unlock(&dlmalloc_mutex);
00408     return result;
00409 }
00410 
00411 void free(void* ptr)
00412 {
00413     if(IS_OUR_PTR(ptr)) {
00414         if(!pthread_equal(pthread_self(), main_thread)) {
00415             //this will queue up the free to be performed later in the
00416             //main thread when it wants more memory.
00417             free_memory_from_other_thread(ptr);
00418             return;
00419         }
00420 
00421         free_memory(ptr);
00422         return;
00423     }
00424     pthread_mutex_lock(&dlmalloc_mutex);
00425     dlfree(ptr);
00426     pthread_mutex_unlock(&dlmalloc_mutex);
00427 }
00428 
00429 #ifdef TEST_POOLED_ALLOC
00430 int main()
00431 {
00432     init_custom_malloc();
00433 
00434     void** items = NULL;
00435     int number_of_items = 0;
00436 
00437     while(number_of_items < 100000) {
00438         if(number_of_items) {
00439             int clear = rand()%number_of_items;
00440             while(--clear >= 0) {
00441                 int len = rand()%1000;
00442                 free(items[clear]);
00443                 items[clear] = malloc(len);
00444                 assert(items[clear]);
00445                 memset(items[clear], 10, len);
00446             }
00447         }
00448 
00449         int i = number_of_items;
00450         number_of_items += rand()%100;
00451         items = realloc(items, sizeof(*items)*number_of_items);
00452         while(i != number_of_items) {
00453             int len = rand()%1000;
00454             items[i] = malloc(len);
00455             assert(items[i]);
00456             memset(items[i], 10, len);
00457             ++i;
00458         }
00459     }
00460 
00461     while(number_of_items--) {
00462         free(items[number_of_items]);
00463     }
00464 
00465     return 0;
00466 }
00467 #endif
00468 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated by doxygen 1.7.1 on Sun Feb 19 2012 01:02:19 for The Battle for Wesnoth
Gna! | Forum | Wiki | CIA | devdocs