mirror of
https://github.com/containers/podman.git
synced 2025-12-05 21:32:22 +08:00
After a reboot, when we refresh Podman's state, we retrieved the lock from the fresh SHM instance, but we did not mark it as allocated to prevent it being handed out to other containers and pods. Provide a method for marking locks as in-use, and use it when we refresh Podman state after a reboot. Fixes #2900 Signed-off-by: Matthew Heon <matthew.heon@pm.me>
540 lines
14 KiB
C
540 lines
14 KiB
C
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <pthread.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <unistd.h>
|
|
|
|
#include "shm_lock.h"
|
|
|
|
// Compute the size of the SHM struct
|
|
static size_t compute_shm_size(uint32_t num_bitmaps) {
|
|
return sizeof(shm_struct_t) + (num_bitmaps * sizeof(lock_group_t));
|
|
}
|
|
|
|
// Take the given mutex.
|
|
// Handles exceptional conditions, including a mutex locked by a process that
|
|
// died holding it.
|
|
// Returns 0 on success, or positive errno on failure.
|
|
static int take_mutex(pthread_mutex_t *mutex) {
|
|
int ret_code;
|
|
|
|
do {
|
|
ret_code = pthread_mutex_lock(mutex);
|
|
} while(ret_code == EAGAIN);
|
|
|
|
if (ret_code == EOWNERDEAD) {
|
|
// The previous owner of the mutex died while holding it
|
|
// Take it for ourselves
|
|
ret_code = pthread_mutex_consistent(mutex);
|
|
if (ret_code != 0) {
|
|
// Someone else may have gotten here first and marked the state consistent
|
|
// However, the mutex could also be invalid.
|
|
// Fail here instead of looping back to trying to lock the mutex.
|
|
return ret_code;
|
|
}
|
|
} else if (ret_code != 0) {
|
|
return ret_code;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Release the given mutex.
|
|
// Returns 0 on success, or positive errno on failure.
|
|
static int release_mutex(pthread_mutex_t *mutex) {
|
|
int ret_code;
|
|
|
|
do {
|
|
ret_code = pthread_mutex_unlock(mutex);
|
|
} while(ret_code == EAGAIN);
|
|
|
|
if (ret_code != 0) {
|
|
return ret_code;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Set up an SHM segment holding locks for libpod.
|
|
// num_locks must not be 0.
|
|
// Path is the path to the SHM segment. It must begin with a single / and
|
|
// container no other / characters, and be at most 255 characters including
|
|
// terminating NULL byte.
|
|
// Returns a valid pointer on success or NULL on error.
|
|
// If an error occurs, negative ERRNO values will be written to error_code.
|
|
shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code) {
|
|
int shm_fd, i, j, ret_code;
|
|
uint32_t num_bitmaps;
|
|
size_t shm_size;
|
|
shm_struct_t *shm;
|
|
pthread_mutexattr_t attr;
|
|
|
|
// If error_code doesn't point to anything, we can't reasonably return errors
|
|
// So fail immediately
|
|
if (error_code == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
// We need a nonzero number of locks
|
|
if (num_locks == 0) {
|
|
*error_code = -1 * EINVAL;
|
|
return NULL;
|
|
}
|
|
|
|
if (path == NULL) {
|
|
*error_code = -1 * EINVAL;
|
|
return NULL;
|
|
}
|
|
|
|
// Calculate the number of bitmaps required
|
|
num_bitmaps = num_locks / BITMAP_SIZE;
|
|
if (num_locks % BITMAP_SIZE != 0) {
|
|
// The actual number given is not an even multiple of our bitmap size
|
|
// So round up
|
|
num_bitmaps += 1;
|
|
}
|
|
|
|
// Calculate size of the shm segment
|
|
shm_size = compute_shm_size(num_bitmaps);
|
|
|
|
// Create a new SHM segment for us
|
|
shm_fd = shm_open(path, O_RDWR | O_CREAT | O_EXCL, 0600);
|
|
if (shm_fd < 0) {
|
|
*error_code = -1 * errno;
|
|
return NULL;
|
|
}
|
|
|
|
// Increase its size to what we need
|
|
ret_code = ftruncate(shm_fd, shm_size);
|
|
if (ret_code < 0) {
|
|
*error_code = -1 * errno;
|
|
goto CLEANUP_UNLINK;
|
|
}
|
|
|
|
// Map the shared memory in
|
|
shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
|
|
if (shm == MAP_FAILED) {
|
|
*error_code = -1 * errno;
|
|
goto CLEANUP_UNLINK;
|
|
}
|
|
|
|
// We have successfully mapped the memory, now initialize the region
|
|
shm->magic = MAGIC;
|
|
shm->unused = 0;
|
|
shm->num_locks = num_bitmaps * BITMAP_SIZE;
|
|
shm->num_bitmaps = num_bitmaps;
|
|
|
|
// Create an initializer for our pthread mutexes
|
|
ret_code = pthread_mutexattr_init(&attr);
|
|
if (ret_code != 0) {
|
|
*error_code = -1 * ret_code;
|
|
goto CLEANUP_UNMAP;
|
|
}
|
|
|
|
// Set mutexes to pshared - multiprocess-safe
|
|
ret_code = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
|
|
if (ret_code != 0) {
|
|
*error_code = -1 * ret_code;
|
|
goto CLEANUP_FREEATTR;
|
|
}
|
|
|
|
// Set mutexes to robust - if a process dies while holding a mutex, we'll get
|
|
// a special error code on the next attempt to lock it.
|
|
// This should prevent panicing processes from leaving the state unusable.
|
|
ret_code = pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST);
|
|
if (ret_code != 0) {
|
|
*error_code = -1 * ret_code;
|
|
goto CLEANUP_FREEATTR;
|
|
}
|
|
|
|
// Initialize the mutex that protects the bitmaps using the mutex attributes
|
|
ret_code = pthread_mutex_init(&(shm->segment_lock), &attr);
|
|
if (ret_code != 0) {
|
|
*error_code = -1 * ret_code;
|
|
goto CLEANUP_FREEATTR;
|
|
}
|
|
|
|
// Initialize all bitmaps to 0 initially
|
|
// And initialize all semaphores they use
|
|
for (i = 0; i < num_bitmaps; i++) {
|
|
shm->locks[i].bitmap = 0;
|
|
for (j = 0; j < BITMAP_SIZE; j++) {
|
|
// Initialize each mutex
|
|
ret_code = pthread_mutex_init(&(shm->locks[i].locks[j]), &attr);
|
|
if (ret_code != 0) {
|
|
*error_code = -1 * ret_code;
|
|
goto CLEANUP_FREEATTR;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Close the file descriptor, we're done with it
|
|
// Ignore errors, it's ok if we leak a single FD and this should only run once
|
|
close(shm_fd);
|
|
|
|
// Destroy the pthread initializer attribute.
|
|
// Again, ignore errors, this will only run once and we might leak a tiny bit
|
|
// of memory at worst.
|
|
pthread_mutexattr_destroy(&attr);
|
|
|
|
return shm;
|
|
|
|
// Cleanup after an error
|
|
CLEANUP_FREEATTR:
|
|
pthread_mutexattr_destroy(&attr);
|
|
CLEANUP_UNMAP:
|
|
munmap(shm, shm_size);
|
|
CLEANUP_UNLINK:
|
|
close(shm_fd);
|
|
shm_unlink(path);
|
|
return NULL;
|
|
}
|
|
|
|
// Open an existing SHM segment holding libpod locks.
|
|
// num_locks is the number of locks that will be configured in the SHM segment.
|
|
// num_locks cannot be 0.
|
|
// Path is the path to the SHM segment. It must begin with a single / and
|
|
// container no other / characters, and be at most 255 characters including
|
|
// terminating NULL byte.
|
|
// Returns a valid pointer on success or NULL on error.
|
|
// If an error occurs, negative ERRNO values will be written to error_code.
|
|
// ERANGE is returned for a mismatch between num_locks and the number of locks
|
|
// available in the the SHM lock struct.
|
|
shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code) {
|
|
int shm_fd;
|
|
shm_struct_t *shm;
|
|
size_t shm_size;
|
|
uint32_t num_bitmaps;
|
|
|
|
if (error_code == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
// We need a nonzero number of locks
|
|
if (num_locks == 0) {
|
|
*error_code = -1 * EINVAL;
|
|
return NULL;
|
|
}
|
|
|
|
if (path == NULL) {
|
|
*error_code = -1 * EINVAL;
|
|
return NULL;
|
|
}
|
|
|
|
// Calculate the number of bitmaps required
|
|
num_bitmaps = num_locks / BITMAP_SIZE;
|
|
if (num_locks % BITMAP_SIZE != 0) {
|
|
num_bitmaps += 1;
|
|
}
|
|
|
|
// Calculate size of the shm segment
|
|
shm_size = compute_shm_size(num_bitmaps);
|
|
|
|
shm_fd = shm_open(path, O_RDWR, 0600);
|
|
if (shm_fd < 0) {
|
|
*error_code = -1 * errno;
|
|
return NULL;
|
|
}
|
|
|
|
// Map the shared memory in
|
|
shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
|
|
if (shm == MAP_FAILED) {
|
|
*error_code = -1 * errno;
|
|
}
|
|
|
|
// Ignore errors, it's ok if we leak a single FD since this only runs once
|
|
close(shm_fd);
|
|
|
|
// Check if we successfully mmap'd
|
|
if (shm == MAP_FAILED) {
|
|
return NULL;
|
|
}
|
|
|
|
// Need to check the SHM to see if it's actually our locks
|
|
if (shm->magic != MAGIC) {
|
|
*error_code = -1 * EBADF;
|
|
goto CLEANUP;
|
|
}
|
|
if (shm->num_locks != (num_bitmaps * BITMAP_SIZE)) {
|
|
*error_code = -1 * ERANGE;
|
|
goto CLEANUP;
|
|
}
|
|
|
|
return shm;
|
|
|
|
CLEANUP:
|
|
munmap(shm, shm_size);
|
|
return NULL;
|
|
}
|
|
|
|
// Close an open SHM lock struct, unmapping the backing memory.
|
|
// The given shm_struct_t will be rendered unusable as a result.
|
|
// On success, 0 is returned. On failure, negative ERRNO values are returned.
|
|
int32_t close_lock_shm(shm_struct_t *shm) {
|
|
int ret_code;
|
|
size_t shm_size;
|
|
|
|
// We can't unmap null...
|
|
if (shm == NULL) {
|
|
return -1 * EINVAL;
|
|
}
|
|
|
|
shm_size = compute_shm_size(shm->num_bitmaps);
|
|
|
|
ret_code = munmap(shm, shm_size);
|
|
|
|
if (ret_code != 0) {
|
|
return -1 * errno;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Allocate the first available semaphore
|
|
// Returns a positive integer guaranteed to be less than UINT32_MAX on success,
|
|
// or negative errno values on failure
|
|
// On sucess, the returned integer is the number of the semaphore allocated
|
|
int64_t allocate_semaphore(shm_struct_t *shm) {
|
|
int ret_code, i;
|
|
bitmap_t test_map;
|
|
int64_t sem_number, num_within_bitmap;
|
|
|
|
if (shm == NULL) {
|
|
return -1 * EINVAL;
|
|
}
|
|
|
|
// Lock the semaphore controlling access to our shared memory
|
|
ret_code = take_mutex(&(shm->segment_lock));
|
|
if (ret_code != 0) {
|
|
return -1 * ret_code;
|
|
}
|
|
|
|
// Loop through our bitmaps to search for one that is not full
|
|
for (i = 0; i < shm->num_bitmaps; i++) {
|
|
if (shm->locks[i].bitmap != 0xFFFFFFFF) {
|
|
test_map = 0x1;
|
|
num_within_bitmap = 0;
|
|
while (test_map != 0) {
|
|
if ((test_map & shm->locks[i].bitmap) == 0) {
|
|
// Compute the number of the semaphore we are allocating
|
|
sem_number = (BITMAP_SIZE * i) + num_within_bitmap;
|
|
// OR in the bitmap
|
|
shm->locks[i].bitmap = shm->locks[i].bitmap | test_map;
|
|
|
|
// Clear the mutex
|
|
ret_code = release_mutex(&(shm->segment_lock));
|
|
if (ret_code != 0) {
|
|
return -1 * ret_code;
|
|
}
|
|
|
|
// Return the semaphore we've allocated
|
|
return sem_number;
|
|
}
|
|
test_map = test_map << 1;
|
|
num_within_bitmap++;
|
|
}
|
|
// We should never fall through this loop
|
|
// TODO maybe an assert() here to panic if we do?
|
|
}
|
|
}
|
|
|
|
// Clear the mutex
|
|
ret_code = release_mutex(&(shm->segment_lock));
|
|
if (ret_code != 0) {
|
|
return -1 * ret_code;
|
|
}
|
|
|
|
// All bitmaps are full
|
|
// We have no available semaphores, report allocation failure
|
|
return -1 * ENOSPC;
|
|
}
|
|
|
|
// Allocate the semaphore with the given ID.
|
|
// Returns an error if the semaphore with this ID does not exist, or has already
|
|
// been allocated.
|
|
// Returns 0 on success, or negative errno values on failure.
|
|
int32_t allocate_given_semaphore(shm_struct_t *shm, uint32_t sem_index) {
|
|
int bitmap_index, index_in_bitmap, ret_code;
|
|
bitmap_t test_map;
|
|
|
|
if (shm == NULL) {
|
|
return -1 * EINVAL;
|
|
}
|
|
|
|
// Check if the lock index is valid
|
|
if (sem_index >= shm->num_locks) {
|
|
return -1 * EINVAL;
|
|
}
|
|
|
|
bitmap_index = sem_index / BITMAP_SIZE;
|
|
index_in_bitmap = sem_index % BITMAP_SIZE;
|
|
|
|
// This should never happen if the sem_index test above succeeded, but better
|
|
// safe than sorry
|
|
if (bitmap_index >= shm->num_bitmaps) {
|
|
return -1 * EFAULT;
|
|
}
|
|
|
|
test_map = 0x1 << index_in_bitmap;
|
|
|
|
// Lock the mutex controlling access to our shared memory
|
|
ret_code = take_mutex(&(shm->segment_lock));
|
|
if (ret_code != 0) {
|
|
return -1 * ret_code;
|
|
}
|
|
|
|
// Check if the semaphore is allocated
|
|
if ((test_map & shm->locks[bitmap_index].bitmap) != 0) {
|
|
ret_code = release_mutex(&(shm->segment_lock));
|
|
if (ret_code != 0) {
|
|
return -1 * ret_code;
|
|
}
|
|
|
|
return -1 * EEXIST;
|
|
}
|
|
|
|
// The semaphore is not allocated, allocate it
|
|
shm->locks[bitmap_index].bitmap = shm->locks[bitmap_index].bitmap | test_map;
|
|
|
|
ret_code = release_mutex(&(shm->segment_lock));
|
|
if (ret_code != 0) {
|
|
return -1 * ret_code;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Deallocate a given semaphore
|
|
// Returns 0 on success, negative ERRNO values on failure
|
|
int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index) {
|
|
bitmap_t test_map;
|
|
int bitmap_index, index_in_bitmap, ret_code, i;
|
|
|
|
if (shm == NULL) {
|
|
return -1 * EINVAL;
|
|
}
|
|
|
|
// Check if the lock index is valid
|
|
if (sem_index >= shm->num_locks) {
|
|
return -1 * EINVAL;
|
|
}
|
|
|
|
bitmap_index = sem_index / BITMAP_SIZE;
|
|
index_in_bitmap = sem_index % BITMAP_SIZE;
|
|
|
|
// This should never happen if the sem_index test above succeeded, but better
|
|
// safe than sorry
|
|
if (bitmap_index >= shm->num_bitmaps) {
|
|
return -1 * EFAULT;
|
|
}
|
|
|
|
test_map = 0x1 << index_in_bitmap;
|
|
|
|
// Lock the mutex controlling access to our shared memory
|
|
ret_code = take_mutex(&(shm->segment_lock));
|
|
if (ret_code != 0) {
|
|
return -1 * ret_code;
|
|
}
|
|
|
|
// Check if the semaphore is allocated
|
|
if ((test_map & shm->locks[bitmap_index].bitmap) == 0) {
|
|
ret_code = release_mutex(&(shm->segment_lock));
|
|
if (ret_code != 0) {
|
|
return -1 * ret_code;
|
|
}
|
|
|
|
return -1 * ENOENT;
|
|
}
|
|
|
|
// The semaphore is allocated, clear it
|
|
// Invert the bitmask we used to test to clear the bit
|
|
test_map = ~test_map;
|
|
shm->locks[bitmap_index].bitmap = shm->locks[bitmap_index].bitmap & test_map;
|
|
|
|
ret_code = release_mutex(&(shm->segment_lock));
|
|
if (ret_code != 0) {
|
|
return -1 * ret_code;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Deallocate all semaphores unconditionally.
|
|
// Returns negative ERRNO values.
|
|
int32_t deallocate_all_semaphores(shm_struct_t *shm) {
|
|
int ret_code;
|
|
uint i;
|
|
|
|
if (shm == NULL) {
|
|
return -1 * EINVAL;
|
|
}
|
|
|
|
// Lock the mutex controlling access to our shared memory
|
|
ret_code = take_mutex(&(shm->segment_lock));
|
|
if (ret_code != 0) {
|
|
return -1 * ret_code;
|
|
}
|
|
|
|
// Iterate through all bitmaps and reset to unused
|
|
for (i = 0; i < shm->num_bitmaps; i++) {
|
|
shm->locks[i].bitmap = 0;
|
|
}
|
|
|
|
// Unlock the allocation control mutex
|
|
ret_code = release_mutex(&(shm->segment_lock));
|
|
if (ret_code != 0) {
|
|
return -1 * ret_code;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Lock a given semaphore
|
|
// Does not check if the semaphore is allocated - this ensures that, even for
|
|
// removed containers, we can still successfully lock to check status (and
|
|
// subsequently realize they have been removed).
|
|
// Returns 0 on success, -1 on failure
|
|
int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
|
|
int bitmap_index, index_in_bitmap, ret_code;
|
|
|
|
if (shm == NULL) {
|
|
return -1 * EINVAL;
|
|
}
|
|
|
|
if (sem_index >= shm->num_locks) {
|
|
return -1 * EINVAL;
|
|
}
|
|
|
|
bitmap_index = sem_index / BITMAP_SIZE;
|
|
index_in_bitmap = sem_index % BITMAP_SIZE;
|
|
|
|
return -1 * take_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]));
|
|
}
|
|
|
|
// Unlock a given semaphore
|
|
// Does not check if the semaphore is allocated - this ensures that, even for
|
|
// removed containers, we can still successfully lock to check status (and
|
|
// subsequently realize they have been removed).
|
|
// Returns 0 on success, -1 on failure
|
|
int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
|
|
int bitmap_index, index_in_bitmap, ret_code;
|
|
|
|
if (shm == NULL) {
|
|
return -1 * EINVAL;
|
|
}
|
|
|
|
if (sem_index >= shm->num_locks) {
|
|
return -1 * EINVAL;
|
|
}
|
|
|
|
bitmap_index = sem_index / BITMAP_SIZE;
|
|
index_in_bitmap = sem_index % BITMAP_SIZE;
|
|
|
|
return -1 * release_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]));
|
|
}
|