veil_shmem.c

Go to the documentation of this file.
00001 /**
00002  * @file   veil_shmem.c
00003  * \code
00004  *     Author:       Marc Munro
00005  *     Copyright (c) 2005 - 2011 Marc Munro
00006  *     License:      BSD
00007  *
00008  * \endcode
00009  * @brief  
00010  * Functions for dealing with veil shared memory.
00011  *
00012  * This provides dynamic memory allocation, like malloc, from chunks of
00013  * shared memory allocated from the Postgres shared memory pool.  In
00014  * order to be able to reset and reload shared memory structures while
00015  * other backends continue to use the existing structures, a shared
00016  * memory reset creates a new context, or switches to an existing one
00017  * that is no longer in use.  No more than two separate contexts will be
00018  * created.
00019  *
00020  * Each context of veil shared memory is associated with a shared hash,
00021  * which is used to store veil's shared variables.  A specially named
00022  * variable, VEIL_SHMEMCTL appears only in context0 and contains a
00023  * reference to chunk0, and the ShmemCtl structure.  From this structure
00024  * we can identify the current context, the initial chunks for each
00025  * active context, and whether a context switch is in progress. 
00026  * 
00027  * A context switch takes place in 3 steps:
00028  * -  preparation, in which we determine if a context switch is allowed,
00029  *    initialise the new context and record the fact that we are in the
00030  *    process of switching.  All subsequent operations in the current
00031  *    backend will work in the new context, while other backends will
00032  *    continue to use the original context
00033  * -  initialisation of the new context, variables, etc.  This is done
00034  *    by the user-space function veil_init().
00035  * -  switchover, when all other processes gain access to the newly
00036  *    initialised context.  They may continue to use the previous
00037  *    context for the duration of their current transactions.
00038  *
00039  * To access shared variable "x" in a new session, the following steps
00040  * are taken:
00041  *  - We access the hash "VEIL_SHARED1_nnn" (where nnn is the oid of our
00042  *    database).  This gives us a reference to the ShmemCtl structure.
00043  *    We record hash0 and shared_meminfo on the way.
00044  *  - We access ShemCtl to identify the current hash and current
00045  *    context. 
00046  *  - We look up variable "x" in the current hash, and if we have to
00047  *    allocate space for it, allocate it from the current context.
00048  *
00049  * Note that We use a dynamicall allocated LWLock, VeilLWLock to protect
00050  * our shared control structures.
00051  * 
00052  */
00053 
00054 #include "postgres.h"
00055 #include "utils/hsearch.h"
00056 #include "storage/pg_shmem.h"
00057 #include "storage/shmem.h"
00058 #include "storage/lwlock.h"
00059 #include "storage/procarray.h"
00060 #include "access/xact.h"
00061 #include "access/transam.h"
00062 #include "miscadmin.h"
00063 #include "veil_version.h"
00064 #include "veil_shmem.h"
00065 #include "veil_funcs.h"
00066 
00067 /**
00068  * shared_meminfo provides access to the ShmemCtl structure allocated in
00069  * context 0.
00070  */
00071 static ShmemCtl *shared_meminfo = NULL;
00072 
00073 /**
00074  * Whether the current backend is in the process of switching contexts.
00075  * If so, it will be setting up the non-current context in readiness for
00076  * making it available to all other backends.
00077  */
00078 static bool      prepared_for_switch = false;
00079 
00080 /**
00081  * The LWLock that Veil will use for managing concurrent access to
00082  * shared memory.  It is initialised to a lock id that is distinct
00083  * from any tha twill be dynamically allocated.
00084  */
00085 static LWLockId  VeilLWLock = AddinShmemInitLock;
00086 
00087 /**
00088  * The LWLock to be used while initially setting up shared memory and 
00089  * allocating a veil database-specific LWLock.
00090  */
00091 static LWLockId  InitialLWLock = AddinShmemInitLock;
00092 
00093 /** 
00094  * Return the index of the other context from the one supplied.
00095  * 
00096  * @param x the context for which we want the other one.
00097  * 
00098  * @return the opposite context to that of x.
00099  */
00100 #define OTHER_CONTEXT(x)    (x ? 0: 1)
00101 
00102 /** 
00103  * Veil's startup function.  This should be run when the Veil shared
00104  * library is loaded by postgres.
00105  * 
00106  * If shared_preload_libraries is not defined, Veil may still be run but
00107  * it will steal shared memory from postgres, potentially exhausting it.
00108  * 
00109  */
00110 void
00111 _PG_init()
00112 {
00113     int veil_dbs;
00114 
00115     /* Define GUCs for veil */
00116     veil_config_init(); 
00117     veil_dbs = veil_dbs_in_cluster();
00118 
00119     /* Request a Veil-specific shared memory context */
00120     RequestAddinShmemSpace(2 * veil_shmem_context_size() * veil_dbs);
00121 
00122     /* Request a LWLock for later use by all backends */
00123     RequestAddinLWLocks(veil_dbs);
00124 }
00125 
00126 /** 
00127  * Create/attach to the shared hash identified by hashname.  Return a
00128  * pointer to an HTAB that references the shared hash.  All locking is
00129  * handled by the caller.
00130  * 
00131  * @param hashname 
00132  * 
00133  * @return Pointer to HTAB referencing the shared hash.
00134  */
00135 static HTAB *
00136 create_shared_hash(const char *hashname)
00137 {
00138     HASHCTL  hashctl;
00139     HTAB    *result;
00140     char    *db_hashname;
00141     int      hash_elems = veil_shared_hash_elems();
00142 
00143     /* Add the current database oid into the hashname so that it is
00144      * distinct from the shared hash for other databases in the
00145      * cluster. */
00146     db_hashname = (char *) vl_malloc(HASH_KEYLEN);
00147     (void) snprintf(db_hashname, HASH_KEYLEN - 1, "%s_%u", 
00148                     hashname, MyDatabaseId);
00149     hashctl.keysize = HASH_KEYLEN;
00150     hashctl.entrysize = sizeof(VarEntry);
00151 
00152     result = ShmemInitHash(db_hashname, hash_elems,
00153                            hash_elems, &hashctl, HASH_ELEM);
00154     pfree(db_hashname);
00155     return result;
00156 }
00157 
00158 /** 
00159  * Return reference to the HTAB for the shared hash associated with
00160  * context 0.
00161  * 
00162  * @return Pointer to HTAB referencing shared hash for context 0.
00163  */
00164 static HTAB *
00165 get_hash0()
00166 {
00167     static HTAB *hash0 = NULL;
00168 
00169     if (!hash0) {
00170         hash0 = create_shared_hash("VEIL_SHARED1");
00171     }
00172     return hash0;
00173 }
00174 
00175 /** 
00176  * Return reference to the HTAB for the shared hash associated with
00177  * context 1.
00178  * 
00179  * @return Pointer to HTAB referencing shared hash for context 1.
00180  */
00181 static HTAB *
00182 get_hash1()
00183 {
00184     static HTAB *hash1 = NULL;
00185 
00186     if (!hash1) {
00187         hash1 = create_shared_hash("VEIL_SHARED2");
00188     }
00189 
00190     return hash1;
00191 }
00192 
00193 
00194 /** 
00195  * Allocate or attach to, a new chunk of shared memory for a named
00196  * memory context.
00197  * 
00198  * @param name The name
00199  * @param size The size of the shared memory chunk to be allocated.
00200  * @param p_found Pointer to boolean that will identify whether this
00201  * chunk has already been initialised.
00202  * 
00203  * @return Pointer to chunk of shared memory.
00204  */
00205 static MemContext *
00206 get_shmem_context(char   *name,
00207                   size_t  size,
00208                   bool   *p_found)
00209 {
00210     int         i;
00211     MemContext *context;
00212     char       *uniqname  = (char *) vl_malloc(strlen(name) + 16);
00213     int         max_dbs = veil_dbs_in_cluster();
00214 
00215     for (i = 0; i < max_dbs; i++) {
00216         (void) sprintf(uniqname, "%s_%d", name, i);
00217         context = ShmemInitStruct(uniqname, size, p_found);;
00218         if (!context) {
00219             ereport(ERROR,
00220                     (errcode(ERRCODE_INTERNAL_ERROR),
00221                      errmsg("veil: cannot allocate shared memory(1)")));
00222         }
00223 
00224         if (*p_found) {
00225             /* Already exists.  Check database id. */
00226             if (context->db_id == MyDatabaseId) {
00227                 /* This context is the one for the current database, 
00228                  * nothing else to do. */
00229                 return context;
00230             }
00231         }
00232         else {
00233             /* We Just allocated our first context */
00234             context->db_id = MyDatabaseId;
00235             context->next = sizeof(MemContext);
00236             context->limit = size;
00237             context->lwlock = VeilLWLock;
00238             return context;
00239         }
00240     }
00241 
00242     /* We reach this point if no existing contexts are allocated to our
00243      * database.  Now we check those existing contexts to see whether
00244      * they are still in use.  If not, we will redeploy them. */
00245 
00246     for (i = 0; i < max_dbs; i++) {
00247         (void) sprintf(uniqname, "%s_%d", name, i);
00248         context = ShmemInitStruct(uniqname, size, p_found);;
00249 
00250         if (!context) {
00251             ereport(ERROR,
00252                     (errcode(ERRCODE_INTERNAL_ERROR),
00253                      errmsg("veil: cannot allocate shared memory(2)")));
00254         }
00255 
00256         if (*p_found) {
00257             /* Is this context for a still existant database? */
00258             if (!vl_db_exists(context->db_id)) {
00259                 /* We can re-use this context. */
00260                 context->db_id = MyDatabaseId;
00261                 context->next = sizeof(MemContext);
00262                 context->limit = size;
00263 
00264                 *p_found = false;  /* Tell the caller that init is
00265                                     * required */
00266                 return context;
00267             }
00268         }
00269         else {
00270             /* We didn't find an unused context, so now we have created 
00271              * a new one. */
00272 
00273             context->db_id = MyDatabaseId;
00274             context->next = sizeof(MemContext);
00275             context->limit = size;
00276             return context;
00277         }
00278     }
00279     ereport(ERROR,
00280             (errcode(ERRCODE_INTERNAL_ERROR),
00281              errmsg("veil: no more shared memory contexts allowed")));
00282     return NULL;
00283 }
00284 
00285 /* Forward ref, required by next function. */
00286 static void shmalloc_init(void);
00287 
00288 /** 
00289  * Return the id (index) of the current context for this session 
00290  * 
00291  * @return The current context id
00292  */
00293 static int
00294 get_cur_context_id()
00295 {
00296     static bool initialised = false;
00297     int context;
00298 
00299     if (!initialised) {
00300         shmalloc_init();
00301         initialised = true;
00302     }
00303         
00304     context = shared_meminfo->current_context;
00305     if (prepared_for_switch) {
00306         context = OTHER_CONTEXT(context);
00307     }
00308     else {
00309         /* Check for the default context being for a later transaction
00310          * than current and, if so, use the other one. */
00311         if (TransactionIdPrecedes(GetCurrentTransactionId(), 
00312                                   shared_meminfo->xid[context]))
00313         {
00314             context = OTHER_CONTEXT(context);
00315         }
00316     }
00317 
00318     return context;
00319 }
00320 
00321 /** 
00322  * Return pointer to shared memory allocated for the current context.
00323  * 
00324  * @return The current context. 
00325  */
00326 static MemContext *
00327 get_cur_context()
00328 {
00329     int context;
00330     context = get_cur_context_id();
00331     return shared_meminfo->context[context];
00332 }
00333 
00334 /** 
00335  * Dynamically allocate a piece of shared memory from the current
00336  * context, doing no locking.
00337  * 
00338  * @param context The context in which we are operating
00339  * @param size The size of the requested piece of memory.
00340  * 
00341  * @return Pointer to dynamically allocated memory.
00342  */
00343 static void *
00344 do_vl_shmalloc(MemContext *context,
00345                size_t size)
00346 {
00347     void *result = NULL;
00348     size_t amount = (size_t) MAXALIGN(size);
00349 
00350     if ((amount + context->next) <= context->limit) {
00351         result = (void *) ((char *) context + context->next);
00352         context->next += amount;
00353     }
00354     else {
00355         ereport(ERROR,
00356                 (ERROR,
00357                  (errcode(ERRCODE_INTERNAL_ERROR),
00358                   errmsg("veil: out of shared memory"))));
00359     }
00360     return result;
00361 }
00362 
00363 /** 
00364  * Dynamically allocate a piece of shared memory from the current context. 
00365  * 
00366  * @param size The size of the requested piece of memory.
00367  * 
00368  * @return Pointer to dynamically allocated memory.
00369  */
00370 void *
00371 vl_shmalloc(size_t size)
00372 {
00373     MemContext *context;
00374     void       *result;
00375 
00376     context = get_cur_context();
00377 
00378     LWLockAcquire(VeilLWLock, LW_EXCLUSIVE);
00379     result = do_vl_shmalloc(context, size);
00380     LWLockRelease(VeilLWLock);
00381 
00382     return result;
00383 }
00384 
00385 /** 
00386  * Free a piece of shared memory within the current context.  Currently
00387  * this does nothing as implementation of freeing of shared memory has
00388  * been deferred.
00389  * 
00390  * @param mem Pointer to the memory to be freed.
00391  * 
00392  */
00393 void
00394 vl_free(void *mem)
00395 {
00396     return;
00397 }
00398 
00399 
00400 /** 
00401  * Attach to, creating and initialising as necessary, the shared memory
00402  * control structure.  Record this for the session in shared_meminfo.
00403  */
00404 static void
00405 shmalloc_init(void)
00406 {
00407     if (!shared_meminfo) {
00408         VarEntry   *var;
00409         MemContext *context0;
00410         MemContext *context1;
00411         bool        found = false;
00412         HTAB       *hash0;
00413         HTAB       *hash1;
00414         size_t      size;
00415 
00416         size = veil_shmem_context_size();
00417 
00418         LWLockAcquire(InitialLWLock, LW_EXCLUSIVE);
00419         context0 = get_shmem_context("VEIL_SHMEM0", size, &found);
00420 
00421         if (found && context0->memctl) {
00422             shared_meminfo = context0->memctl;
00423             VeilLWLock = shared_meminfo->veil_lwlock;
00424             /* By aquiring and releasing this lock, we ensure that Veil
00425              * shared memory has been fully initialised, by a process
00426              * following the else clause of this code path. */
00427             LWLockAcquire(VeilLWLock, LW_EXCLUSIVE);
00428             LWLockRelease(InitialLWLock);
00429             LWLockRelease(VeilLWLock);
00430         }
00431         else {
00432             /* Do minimum amount of initialisation while holding
00433              * the initial lock.  We don't want to do anything that
00434              * may cause other locks to be aquired as this could lead
00435              * to deadlock with other add-ins.  Instead, we aquire the
00436              * Veil-specific lock before finishing the initialisation. */
00437 
00438             shared_meminfo = do_vl_shmalloc(context0, sizeof(ShmemCtl));
00439 
00440             if (context0->lwlock != InitialLWLock) {
00441                 /* Re-use the LWLock previously allocated to this memory 
00442                  * context */
00443                 VeilLWLock = context0->lwlock;
00444             }
00445             else {
00446                 /* Allocate new LWLock for this new shared memory
00447                  * context */
00448                 VeilLWLock = LWLockAssign(); 
00449             }
00450             /* Record the lock id in context0 (for possible re-use if
00451              * the current database is dropped and a new veil-using
00452              * database created), and in the shared_meminfo struct */
00453             context0->lwlock = VeilLWLock;
00454             shared_meminfo->veil_lwlock = VeilLWLock;
00455             
00456             /* Exchange the initial lock for our Veil-specific one. */
00457             LWLockAcquire(VeilLWLock, LW_EXCLUSIVE);
00458             LWLockRelease(InitialLWLock);
00459     
00460             /* Now do the rest of the Veil shared memory initialisation */
00461 
00462             /* Set up the other memory context */
00463             context1 = get_shmem_context("VEIL_SHMEM1", size, &found);
00464             
00465             /* Record location of shmemctl structure in each context */
00466             context0->memctl = shared_meminfo;
00467             context1->memctl = shared_meminfo;
00468 
00469             /* Finish initialising the shmemctl structure */
00470             shared_meminfo->type = OBJ_SHMEMCTL;
00471             shared_meminfo->current_context = 0;
00472             shared_meminfo->total_allocated[0] = size;
00473             shared_meminfo->total_allocated[1] = size;
00474             shared_meminfo->switching = false;
00475             shared_meminfo->context[0] = context0;
00476             shared_meminfo->context[1] = context1;
00477             shared_meminfo->xid[0] = GetCurrentTransactionId();
00478             shared_meminfo->xid[1] = shared_meminfo->xid[0];
00479             shared_meminfo->initialised = true;
00480 
00481             /* Set up both shared hashes */
00482             hash0 = get_hash0();
00483             hash1 = get_hash1();
00484 
00485             /* Record the shmemctl structure in hash0 */
00486             var = (VarEntry *) hash_search(hash0, (void *) "VEIL_SHMEMCTL",
00487                                            HASH_ENTER, &found);
00488 
00489             var->obj = (Object *) shared_meminfo;
00490             var->shared = true;
00491 
00492             var = (VarEntry *) hash_search(hash0, (void *) "VEIL_SHMEMCTL",
00493                                            HASH_ENTER, &found);
00494 
00495             LWLockRelease(VeilLWLock);
00496         }
00497     }
00498 }
00499 
00500 /** 
00501  * Return the shared hash for the current context.
00502  * 
00503  * @return Pointer to the HTAB for the current context's shared hash.
00504  */
00505 HTAB *
00506 vl_get_shared_hash()
00507 {
00508     int context;
00509     HTAB *hash;
00510     static bool initialised = false;
00511 
00512     if (!initialised) {
00513         (void) get_cur_context();  /* Ensure shared memory is set up. */
00514         initialised = true;
00515     }
00516 
00517     context = get_cur_context_id();
00518 
00519     if (context == 0) {
00520         hash = get_hash0();
00521     }
00522     else {
00523         hash = get_hash1();
00524     }
00525     
00526     return hash;
00527 }
00528 
00529 /** 
00530  * Reset one of the shared hashes.  This is one of the final steps in a
00531  * context switch.
00532  * 
00533  * @return hash The shared hash that is to be reset.
00534  */
00535 static void
00536 clear_hash(HTAB *hash)
00537 {
00538     static HASH_SEQ_STATUS status;
00539     VarEntry *var;
00540 
00541     hash_seq_init(&status, hash);
00542     while ((var = hash_seq_search(&status))) {
00543         if (strncmp("VEIL_SHMEMCTL", var->key, strlen("VEIL_SHMEMCTL")) != 0) {
00544             (void) hash_search(hash, var->key, HASH_REMOVE, NULL);
00545         }
00546     }
00547 }
00548 
00549 /** 
00550  * Prepare for a switch to the alternate context.  Switching will
00551  * only be allowed if there are no transactions that may still be using
00552  * the context to which we are switching, and there is no other
00553  * process attempting the switch.
00554  * 
00555  * @return true if the switch preparation was successful.
00556  */
00557 bool
00558 vl_prepare_context_switch()
00559 {
00560     int   context_curidx;
00561     int   context_newidx;
00562     HTAB *hash0 = get_hash0(); /* We must not attempt to create hashes
00563                                 * on the fly below as they also acquire
00564                                 * the lock */
00565     HTAB *hash1 = get_hash1(); 
00566     TransactionId oldest_xid;
00567     MemContext *context;
00568 
00569     (void) get_cur_context();  /* Ensure shared memory is set up */
00570 
00571     LWLockAcquire(VeilLWLock, LW_EXCLUSIVE);
00572 
00573     if (shared_meminfo->switching) {
00574         /* Another process is performing the switch */
00575         LWLockRelease(VeilLWLock);
00576         return false;
00577     }
00578 
00579     shared_meminfo->switching = true;
00580 
00581     /* We have claimed the switch.  If we decide that we cannot proceed,
00582      * we will return it to its previous state. */
00583 
00584     context_curidx = shared_meminfo->current_context;
00585     context_newidx = OTHER_CONTEXT(context_curidx);
00586 
00587     /* In case the alternate context has been used before, we must
00588      * clear it. */
00589 
00590     oldest_xid = GetOldestXmin(false, true);
00591     if (TransactionIdPrecedes(oldest_xid, 
00592                               shared_meminfo->xid[context_curidx])) 
00593     {
00594         /* There is a transaction running that precedes the time of
00595          * the last context switch.  That transaction may still be
00596          * using the chunk to which we wish to switch.  We cannot
00597          * allow the switch. */
00598         shared_meminfo->switching = false;
00599         LWLockRelease(VeilLWLock);
00600         return false;
00601     }
00602     else {
00603         /* It looks like we can safely make the switch.  Reset the
00604          * new context, and make it the current context for this
00605          * session only. */
00606         context = shared_meminfo->context[context_newidx];
00607         context->next = sizeof(MemContext);
00608 
00609         /* If we are switching to context 0, reset the next field of
00610          * the first chunk to leave space for the ShmemCtl struct. */
00611         if (context_newidx == 0) {
00612             context->next += sizeof(ShmemCtl);
00613             clear_hash(hash0);
00614         }
00615         else {
00616             clear_hash(hash1);
00617         }
00618     }
00619 
00620     LWLockRelease(VeilLWLock);
00621     prepared_for_switch = true;
00622     return true;
00623 }
00624 
00625 /** 
00626  * Complete the context switch started by vl_prepare_context_switch().
00627  * Raise an ERROR if the context switch cannot be completed.
00628  * 
00629  * @return true if the context switch is successfully completed.
00630  */
00631 bool
00632 vl_complete_context_switch()
00633 {
00634     int  context_curidx;
00635     int  context_newidx;
00636 
00637     if (!prepared_for_switch) {
00638         ereport(ERROR,
00639                 (errcode(ERRCODE_INTERNAL_ERROR),
00640                  errmsg("failed to complete context switch"),
00641                  errdetail("Not prepared for switch - "
00642                            "invalid state for operation")));
00643     }
00644 
00645     LWLockAcquire(VeilLWLock, LW_EXCLUSIVE);
00646     context_curidx = shared_meminfo->current_context;
00647     context_newidx = OTHER_CONTEXT(context_curidx);
00648 
00649     if (!shared_meminfo->switching) {
00650         /* We do not claim to be switching.  We should. */
00651         LWLockRelease(VeilLWLock);
00652 
00653         ereport(ERROR,
00654                 (errcode(ERRCODE_INTERNAL_ERROR),
00655                  errmsg("failed to complete context switch"),
00656                  errdetail("Session does not have switching set to true- "
00657                            "invalid state for operation")));
00658     }
00659 
00660     shared_meminfo->switching = false;
00661     shared_meminfo->current_context = context_newidx;
00662     shared_meminfo->xid[context_newidx] = GetCurrentTransactionId();
00663     LWLockRelease(VeilLWLock);
00664     prepared_for_switch = false;
00665     return true;
00666 }
00667 
00668 /** 
00669  * In desparation, if we are unable to complete a context switch, we
00670  * should use this function.
00671  */
00672 void
00673 vl_force_context_switch()
00674 {
00675     int  context_curidx;
00676     int  context_newidx;
00677     MemContext *context;
00678     HTAB *hash0 = get_hash0();
00679     HTAB *hash1 = get_hash1();
00680 
00681     (void) get_cur_context();
00682 
00683     LWLockAcquire(VeilLWLock, LW_EXCLUSIVE);
00684 
00685     context_curidx = shared_meminfo->current_context;
00686     context_newidx = OTHER_CONTEXT(context_curidx);
00687 
00688     /* Clear the alternate context. */
00689 
00690     context = shared_meminfo->context[context_newidx];
00691     context->next = sizeof(MemContext);
00692     
00693     /* If we are switching to context 0, reset the next field of
00694      * the first chunk to leave space for the ShmemCtl struct. */
00695     if (context_newidx == 0) {
00696         context->next += sizeof(ShmemCtl);
00697         clear_hash(hash0);
00698     }
00699     else {
00700         clear_hash(hash1);
00701     }
00702     
00703     shared_meminfo->switching = false;
00704     shared_meminfo->current_context = context_newidx;
00705     shared_meminfo->xid[context_newidx] = GetCurrentTransactionId();
00706     shared_meminfo->xid[0] = GetCurrentTransactionId();
00707     LWLockRelease(VeilLWLock);
00708     prepared_for_switch = false;
00709 }
00710 

Generated on Mon Sep 12 15:26:45 2011 for Veil by  doxygen 1.5.6