Merge pull request #1235 from mheon/shm_locking

SHM locking for Libpod
2025-06-22 09:58:10 +08:00 · 2019-01-04 10:41:05 -08:00
parent 6868b5aa14 56c5c89408
commit bf5f779331
25 changed files with 2001 additions and 619 deletions
--- a/.cirrus.yml
+++ b/.cirrus.yml
@ -115,7 +115,7 @@ build_each_commit_task:
        matrix:
            image_name: "fedora-29-libpod-9afa57a9"

-    timeout_in: 20m
+    timeout_in: 30m

    script:
        - $SCRIPT_BASE/setup_environment.sh
--- a/2
+++ b/2
@ -178,7 +178,7 @@ localunit: test/goecho/goecho varlink_generate
 ginkgo:
 	ginkgo -v -tags "$(BUILDTAGS)" -cover -flakeAttempts 3 -progress -trace -noColor test/e2e/.

-localintegration: varlink_generate test-binaries clientintegration ginkgo
+localintegration: varlink_generate test-binaries ginkgo

 localsystem: .install.ginkgo .install.gomega
 	ginkgo -v -noColor test/system/
--- a/libpod/boltdb_state_internal.go
+++ b/libpod/boltdb_state_internal.go
@ -3,7 +3,6 @@ package libpod
 import (
 	"bytes"
 	"encoding/json"
-	"path/filepath"
 	"runtime"
 	"strings"

@ -288,10 +287,9 @@ func (s *BoltState) getContainerFromDB(id []byte, ctr *Container, ctrsBkt *bolt.
 	}

 	// Get the lock
-	lockPath := filepath.Join(s.runtime.lockDir, string(id))
-	lock, err := storage.GetLockfile(lockPath)
+	lock, err := s.runtime.lockManager.RetrieveLock(ctr.config.LockID)
 	if err != nil {
-		return errors.Wrapf(err, "error retrieving lockfile for container %s", string(id))
+		return errors.Wrapf(err, "error retrieving lock for container %s", string(id))
 	}
 	ctr.lock = lock

@ -324,10 +322,9 @@ func (s *BoltState) getPodFromDB(id []byte, pod *Pod, podBkt *bolt.Bucket) error
 	}

 	// Get the lock
-	lockPath := filepath.Join(s.runtime.lockDir, string(id))
-	lock, err := storage.GetLockfile(lockPath)
+	lock, err := s.runtime.lockManager.RetrieveLock(pod.config.LockID)
 	if err != nil {
-		return errors.Wrapf(err, "error retrieving lockfile for pod %s", string(id))
+		return errors.Wrapf(err, "error retrieving lock for pod %s", string(id))
 	}
 	pod.lock = lock

@ -353,8 +350,7 @@ func (s *BoltState) getVolumeFromDB(name []byte, volume *Volume, volBkt *bolt.Bu
 	}

 	// Get the lock
-	lockPath := filepath.Join(s.runtime.lockDir, string(name))
-	lock, err := storage.GetLockfile(lockPath)
+	lock, err := s.runtime.lockManager.RetrieveLock(volume.config.LockID)
 	if err != nil {
 		return errors.Wrapf(err, "error retrieving lockfile for volume %s", string(name))
 	}
--- a/libpod/common_test.go
+++ b/libpod/common_test.go
@ -3,20 +3,19 @@ package libpod
 import (
 	"encoding/json"
 	"net"
-	"path/filepath"
 	"reflect"
 	"strings"
 	"testing"
 	"time"

-	"github.com/containers/storage"
+	"github.com/containers/libpod/libpod/lock"
 	"github.com/cri-o/ocicni/pkg/ocicni"
 	"github.com/opencontainers/runtime-tools/generate"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )

-func getTestContainer(id, name, locksDir string) (*Container, error) {
+func getTestContainer(id, name string, manager lock.Manager) (*Container, error) {
 	ctr := &Container{
 		config: &Config{
 			ID:              id,
@ -90,18 +89,18 @@ func getTestContainer(id, name, locksDir string) (*Container, error) {

 	ctr.config.Labels["test"] = "testing"

-	// Must make lockfile or container will error on being retrieved from DB
-	lockPath := filepath.Join(locksDir, id)
-	lock, err := storage.GetLockfile(lockPath)
+	// Allocate a lock for the container
+	lock, err := manager.AllocateLock()
 	if err != nil {
 		return nil, err
 	}
 	ctr.lock = lock
+	ctr.config.LockID = lock.ID()

 	return ctr, nil
 }

-func getTestPod(id, name, locksDir string) (*Pod, error) {
+func getTestPod(id, name string, manager lock.Manager) (*Pod, error) {
 	pod := &Pod{
 		config: &PodConfig{
 			ID:           id,
@ -115,38 +114,39 @@ func getTestPod(id, name, locksDir string) (*Pod, error) {
 		valid: true,
 	}

-	lockPath := filepath.Join(locksDir, id)
-	lock, err := storage.GetLockfile(lockPath)
+	// Allocate a lock for the pod
+	lock, err := manager.AllocateLock()
 	if err != nil {
 		return nil, err
 	}
 	pod.lock = lock
+	pod.config.LockID = lock.ID()

 	return pod, nil
 }

-func getTestCtrN(n, lockPath string) (*Container, error) {
-	return getTestContainer(strings.Repeat(n, 32), "test"+n, lockPath)
+func getTestCtrN(n string, manager lock.Manager) (*Container, error) {
+	return getTestContainer(strings.Repeat(n, 32), "test"+n, manager)
 }

-func getTestCtr1(lockPath string) (*Container, error) {
-	return getTestCtrN("1", lockPath)
+func getTestCtr1(manager lock.Manager) (*Container, error) {
+	return getTestCtrN("1", manager)
 }

-func getTestCtr2(lockPath string) (*Container, error) {
-	return getTestCtrN("2", lockPath)
+func getTestCtr2(manager lock.Manager) (*Container, error) {
+	return getTestCtrN("2", manager)
 }

-func getTestPodN(n, lockPath string) (*Pod, error) {
-	return getTestPod(strings.Repeat(n, 32), "test"+n, lockPath)
+func getTestPodN(n string, manager lock.Manager) (*Pod, error) {
+	return getTestPod(strings.Repeat(n, 32), "test"+n, manager)
 }

-func getTestPod1(lockPath string) (*Pod, error) {
-	return getTestPodN("1", lockPath)
+func getTestPod1(manager lock.Manager) (*Pod, error) {
+	return getTestPodN("1", manager)
 }

-func getTestPod2(lockPath string) (*Pod, error) {
-	return getTestPodN("2", lockPath)
+func getTestPod2(manager lock.Manager) (*Pod, error) {
+	return getTestPodN("2", manager)
 }

 // This horrible hack tests if containers are equal in a way that should handle
@ -174,6 +174,8 @@ func testContainersEqual(t *testing.T, a, b *Container, allowedEmpty bool) {

 	assert.Equal(t, a.valid, b.valid)

+	assert.Equal(t, a.lock.ID(), b.lock.ID())
+
 	aConfigJSON, err := json.Marshal(a.config)
 	assert.NoError(t, err)
 	err = json.Unmarshal(aConfigJSON, aConfig)
@ -223,6 +225,8 @@ func testPodsEqual(t *testing.T, a, b *Pod, allowedEmpty bool) {

 	assert.Equal(t, a.valid, b.valid)

+	assert.Equal(t, a.lock.ID(), b.lock.ID())
+
 	assert.EqualValues(t, a.config, b.config)

 	if allowedEmpty {
--- a/libpod/container.go
+++ b/libpod/container.go
@ -9,6 +9,7 @@ import (

 	"github.com/containernetworking/cni/pkg/types"
 	cnitypes "github.com/containernetworking/cni/pkg/types/current"
+	"github.com/containers/libpod/libpod/lock"
 	"github.com/containers/libpod/pkg/namespaces"
 	"github.com/containers/storage"
 	"github.com/cri-o/ocicni/pkg/ocicni"
@ -122,7 +123,7 @@ type Container struct {
 	batched bool

 	valid   bool
-	lock    storage.Locker
+	lock    lock.Locker
 	runtime *Runtime

 	rootlessSlirpSyncR *os.File
@ -211,6 +212,8 @@ type Config struct {
 	Pod string `json:"pod,omitempty"`
 	// Namespace the container is in
 	Namespace string `json:"namespace,omitempty"`
+	// ID of this container's lock
+	LockID uint32 `json:"lockID"`

 	// TODO consider breaking these subsections up into smaller structs

--- a/libpod/container_easyjson.go
+++ b/libpod/container_easyjson.go
@ -1275,6 +1275,8 @@ func easyjson1dbef17bDecodeGithubComContainersLibpodLibpod2(in *jlexer.Lexer, ou
 			out.Pod = string(in.String())
 		case "namespace":
 			out.Namespace = string(in.String())
+		case "lockID":
+			out.LockID = uint32(in.Uint32())
 		case "idMappingsOptions":
 			easyjson1dbef17bDecodeGithubComContainersLibpodVendorGithubComContainersStorage(in, &out.IDMappings)
 		case "rootfsImageID":
@ -1778,6 +1780,16 @@ func easyjson1dbef17bEncodeGithubComContainersLibpodLibpod2(out *jwriter.Writer,
 		}
 		out.String(string(in.Namespace))
 	}
+	{
+		const prefix string = ",\"lockID\":"
+		if first {
+			first = false
+			out.RawString(prefix[1:])
+		} else {
+			out.RawString(prefix)
+		}
+		out.Uint32(uint32(in.LockID))
+	}
 	if true {
 		const prefix string = ",\"idMappingsOptions\":"
 		if first {
--- a/libpod/container_graph_test.go
+++ b/libpod/container_graph_test.go
@ -1,10 +1,9 @@
 package libpod

 import (
-	"io/ioutil"
-	"os"
 	"testing"

+	"github.com/containers/libpod/libpod/lock"
 	"github.com/stretchr/testify/assert"
 )

@ -17,11 +16,12 @@ func TestBuildContainerGraphNoCtrsIsEmpty(t *testing.T) {
 }

 func TestBuildContainerGraphOneCtr(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)

 	graph, err := buildContainerGraph([]*Container{ctr1})
@ -39,13 +39,14 @@ func TestBuildContainerGraphOneCtr(t *testing.T) {
 }

 func TestBuildContainerGraphTwoCtrNoEdge(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)
-	ctr2, err := getTestCtr2(tmpDir)
+	ctr2, err := getTestCtr2(manager)
 	assert.NoError(t, err)

 	graph, err := buildContainerGraph([]*Container{ctr1, ctr2})
@ -64,13 +65,14 @@ func TestBuildContainerGraphTwoCtrNoEdge(t *testing.T) {
 }

 func TestBuildContainerGraphTwoCtrOneEdge(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)
-	ctr2, err := getTestCtr2(tmpDir)
+	ctr2, err := getTestCtr2(manager)
 	assert.NoError(t, err)
 	ctr2.config.UserNsCtr = ctr1.config.ID

@ -85,13 +87,14 @@ func TestBuildContainerGraphTwoCtrOneEdge(t *testing.T) {
 }

 func TestBuildContainerGraphTwoCtrCycle(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)
-	ctr2, err := getTestCtr2(tmpDir)
+	ctr2, err := getTestCtr2(manager)
 	assert.NoError(t, err)
 	ctr2.config.UserNsCtr = ctr1.config.ID
 	ctr1.config.NetNsCtr = ctr2.config.ID
@ -101,15 +104,16 @@ func TestBuildContainerGraphTwoCtrCycle(t *testing.T) {
 }

 func TestBuildContainerGraphThreeCtrNoEdges(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)
-	ctr2, err := getTestCtr2(tmpDir)
+	ctr2, err := getTestCtr2(manager)
 	assert.NoError(t, err)
-	ctr3, err := getTestCtrN("3", tmpDir)
+	ctr3, err := getTestCtrN("3", manager)
 	assert.NoError(t, err)

 	graph, err := buildContainerGraph([]*Container{ctr1, ctr2, ctr3})
@ -132,15 +136,16 @@ func TestBuildContainerGraphThreeCtrNoEdges(t *testing.T) {
 }

 func TestBuildContainerGraphThreeContainersTwoInCycle(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)
-	ctr2, err := getTestCtr2(tmpDir)
+	ctr2, err := getTestCtr2(manager)
 	assert.NoError(t, err)
-	ctr3, err := getTestCtrN("3", tmpDir)
+	ctr3, err := getTestCtrN("3", manager)
 	assert.NoError(t, err)
 	ctr1.config.UserNsCtr = ctr2.config.ID
 	ctr2.config.IPCNsCtr = ctr1.config.ID
@ -150,15 +155,16 @@ func TestBuildContainerGraphThreeContainersTwoInCycle(t *testing.T) {
 }

 func TestBuildContainerGraphThreeContainersCycle(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)
-	ctr2, err := getTestCtr2(tmpDir)
+	ctr2, err := getTestCtr2(manager)
 	assert.NoError(t, err)
-	ctr3, err := getTestCtrN("3", tmpDir)
+	ctr3, err := getTestCtrN("3", manager)
 	assert.NoError(t, err)
 	ctr1.config.UserNsCtr = ctr2.config.ID
 	ctr2.config.IPCNsCtr = ctr3.config.ID
@ -169,15 +175,16 @@ func TestBuildContainerGraphThreeContainersCycle(t *testing.T) {
 }

 func TestBuildContainerGraphThreeContainersNoCycle(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)
-	ctr2, err := getTestCtr2(tmpDir)
+	ctr2, err := getTestCtr2(manager)
 	assert.NoError(t, err)
-	ctr3, err := getTestCtrN("3", tmpDir)
+	ctr3, err := getTestCtrN("3", manager)
 	assert.NoError(t, err)
 	ctr1.config.UserNsCtr = ctr2.config.ID
 	ctr1.config.NetNsCtr = ctr3.config.ID
@ -194,17 +201,18 @@ func TestBuildContainerGraphThreeContainersNoCycle(t *testing.T) {
 }

 func TestBuildContainerGraphFourContainersNoEdges(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)
-	ctr2, err := getTestCtr2(tmpDir)
+	ctr2, err := getTestCtr2(manager)
 	assert.NoError(t, err)
-	ctr3, err := getTestCtrN("3", tmpDir)
+	ctr3, err := getTestCtrN("3", manager)
 	assert.NoError(t, err)
-	ctr4, err := getTestCtrN("4", tmpDir)
+	ctr4, err := getTestCtrN("4", manager)
 	assert.NoError(t, err)

 	graph, err := buildContainerGraph([]*Container{ctr1, ctr2, ctr3, ctr4})
@ -231,18 +239,20 @@ func TestBuildContainerGraphFourContainersNoEdges(t *testing.T) {
 }

 func TestBuildContainerGraphFourContainersTwoInCycle(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)
-	ctr2, err := getTestCtr2(tmpDir)
+	ctr2, err := getTestCtr2(manager)
 	assert.NoError(t, err)
-	ctr3, err := getTestCtrN("3", tmpDir)
+	ctr3, err := getTestCtrN("3", manager)
 	assert.NoError(t, err)
-	ctr4, err := getTestCtrN("4", tmpDir)
+	ctr4, err := getTestCtrN("4", manager)
 	assert.NoError(t, err)
+
 	ctr1.config.IPCNsCtr = ctr2.config.ID
 	ctr2.config.UserNsCtr = ctr1.config.ID

@ -251,18 +261,20 @@ func TestBuildContainerGraphFourContainersTwoInCycle(t *testing.T) {
 }

 func TestBuildContainerGraphFourContainersAllInCycle(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)
-	ctr2, err := getTestCtr2(tmpDir)
+	ctr2, err := getTestCtr2(manager)
 	assert.NoError(t, err)
-	ctr3, err := getTestCtrN("3", tmpDir)
+	ctr3, err := getTestCtrN("3", manager)
 	assert.NoError(t, err)
-	ctr4, err := getTestCtrN("4", tmpDir)
+	ctr4, err := getTestCtrN("4", manager)
 	assert.NoError(t, err)
+
 	ctr1.config.IPCNsCtr = ctr2.config.ID
 	ctr2.config.UserNsCtr = ctr3.config.ID
 	ctr3.config.NetNsCtr = ctr4.config.ID
@ -273,18 +285,20 @@ func TestBuildContainerGraphFourContainersAllInCycle(t *testing.T) {
 }

 func TestBuildContainerGraphFourContainersNoneInCycle(t *testing.T) {
-	tmpDir, err := ioutil.TempDir("", tmpDirPrefix)
-	assert.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
+	manager, err := lock.NewInMemoryManager(16)
+	if err != nil {
+		t.Fatalf("Error setting up locks: %v", err)
+	}

-	ctr1, err := getTestCtr1(tmpDir)
+	ctr1, err := getTestCtr1(manager)
 	assert.NoError(t, err)
-	ctr2, err := getTestCtr2(tmpDir)
+	ctr2, err := getTestCtr2(manager)
 	assert.NoError(t, err)
-	ctr3, err := getTestCtrN("3", tmpDir)
+	ctr3, err := getTestCtrN("3", manager)
 	assert.NoError(t, err)
-	ctr4, err := getTestCtrN("4", tmpDir)
+	ctr4, err := getTestCtrN("4", manager)
 	assert.NoError(t, err)
+
 	ctr1.config.IPCNsCtr = ctr2.config.ID
 	ctr1.config.NetNsCtr = ctr3.config.ID
 	ctr2.config.UserNsCtr = ctr3.config.ID
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@ -401,7 +401,10 @@ func resetState(state *containerState) error {
 	return nil
 }

-// Refresh refreshes the container's state after a restart
+// Refresh refreshes the container's state after a restart.
+// Refresh cannot perform any operations that would lock another container.
+// We cannot guarantee any other container has a valid lock at the time it is
+// running.
 func (c *Container) refresh() error {
 	// Don't need a full sync, but we do need to update from the database to
 	// pick up potentially-missing container state
@ -447,6 +450,13 @@ func (c *Container) refresh() error {
 		c.state.DestinationRunDir = filepath.Join(c.state.UserNSRoot, "rundir")
 	}

+	// We need to pick up a new lock
+	lock, err := c.runtime.lockManager.RetrieveLock(c.config.LockID)
+	if err != nil {
+		return errors.Wrapf(err, "error acquiring lock for container %s", c.ID())
+	}
+	c.lock = lock
+
 	if err := c.save(); err != nil {
 		return errors.Wrapf(err, "error refreshing state for container %s", c.ID())
 	}
--- a/libpod/lock/in_memory_locks.go
+++ b/libpod/lock/in_memory_locks.go
@ -0,0 +1,91 @@
+package lock
+
+import (
+	"sync"
+
+	"github.com/pkg/errors"
+)
+
+// Mutex holds a single mutex and whether it has been allocated.
+type Mutex struct {
+	id        uint32
+	lock      sync.Mutex
+	allocated bool
+}
+
+// ID retrieves the ID of the mutex
+func (m *Mutex) ID() uint32 {
+	return m.id
+}
+
+// Lock locks the mutex
+func (m *Mutex) Lock() {
+	m.lock.Lock()
+}
+
+// Unlock unlocks the mutex
+func (m *Mutex) Unlock() {
+	m.lock.Unlock()
+}
+
+// Free deallocates the mutex to allow its reuse
+func (m *Mutex) Free() error {
+	m.allocated = false
+
+	return nil
+}
+
+// InMemoryManager is a lock manager that allocates and retrieves local-only
+// locks - that is, they are not multiprocess. This lock manager is intended
+// purely for unit and integration testing and should not be used in production
+// deployments.
+type InMemoryManager struct {
+	locks     []*Mutex
+	numLocks  uint32
+	localLock sync.Mutex
+}
+
+// NewInMemoryManager creates a new in-memory lock manager with the given number
+// of locks.
+func NewInMemoryManager(numLocks uint32) (Manager, error) {
+	if numLocks == 0 {
+		return nil, errors.Errorf("must provide a non-zero number of locks!")
+	}
+
+	manager := new(InMemoryManager)
+	manager.numLocks = numLocks
+	manager.locks = make([]*Mutex, numLocks)
+
+	var i uint32
+	for i = 0; i < numLocks; i++ {
+		lock := new(Mutex)
+		lock.id = i
+		manager.locks[i] = lock
+	}
+
+	return manager, nil
+}
+
+// AllocateLock allocates a lock from the manager.
+func (m *InMemoryManager) AllocateLock() (Locker, error) {
+	m.localLock.Lock()
+	defer m.localLock.Unlock()
+
+	for _, lock := range m.locks {
+		if !lock.allocated {
+			lock.allocated = true
+			return lock, nil
+		}
+	}
+
+	return nil, errors.Errorf("all locks have been allocated")
+}
+
+// RetrieveLock retrieves a lock from the manager.
+func (m *InMemoryManager) RetrieveLock(id uint32) (Locker, error) {
+	if id >= m.numLocks {
+		return nil, errors.Errorf("given lock ID %d is too large - this manager only supports lock indexes up to %d", id, m.numLocks-1)
+	}
+
+	return m.locks[id], nil
+}
--- a/libpod/lock/lock.go
+++ b/libpod/lock/lock.go
@ -0,0 +1,58 @@
+package lock
+
+// Manager provides an interface for allocating multiprocess locks.
+// Locks returned by Manager MUST be multiprocess - allocating a lock in
+// process A and retrieving that lock's ID in process B must return handles for
+// the same lock, and locking the lock in A should exclude B from the lock until
+// it is unlocked in A.
+// All locks must be identified by a UUID (retrieved with Locker's ID() method).
+// All locks with a given UUID must refer to the same underlying lock, and it
+// must be possible to retrieve the lock given its UUID.
+// Each UUID should refer to a unique underlying lock.
+// Calls to AllocateLock() must return a unique, unallocated UUID.
+// AllocateLock() must fail once all available locks have been allocated.
+// Locks are returned to use by calls to Free(), and can subsequently be
+// reallocated.
+type Manager interface {
+	// AllocateLock returns an unallocated lock.
+	// It is guaranteed that the same lock will not be returned again by
+	// AllocateLock until the returned lock has Free() called on it.
+	// If all available locks are allocated, AllocateLock will return an
+	// error.
+	AllocateLock() (Locker, error)
+	// RetrieveLock retrieves a lock given its UUID.
+	// The underlying lock MUST be the same as another other lock with the
+	// same UUID.
+	RetrieveLock(id uint32) (Locker, error)
+}
+
+// Locker is similar to sync.Locker, but provides a method for freeing the lock
+// to allow its reuse.
+// All Locker implementations must maintain mutex semantics - the lock only
+// allows one caller in the critical section at a time.
+// All locks with the same ID must refer to the same underlying lock, even
+// if they are within multiple processes.
+type Locker interface {
+	// ID retrieves the lock's ID.
+	// ID is guaranteed to uniquely identify the lock within the
+	// Manager - that is, calling RetrieveLock with this ID will return
+	// another instance of the same lock.
+	ID() uint32
+	// Lock locks the lock.
+	// This call MUST block until it successfully acquires the lock or
+	// encounters a fatal error.
+	// All errors must be handled internally, as they are not returned. For
+	// the most part, panicking should be appropriate.
+	Lock()
+	// Unlock unlocks the lock.
+	// All errors must be handled internally, as they are not returned. For
+	// the most part, panicking should be appropriate.
+	// This includes unlocking locks which are already unlocked.
+	Unlock()
+	// Free deallocates the underlying lock, allowing its reuse by other
+	// pods and containers.
+	// The lock MUST still be usable after a Free() - some libpod instances
+	// may still retain Container structs with the old lock. This simply
+	// advises the manager that the lock may be reallocated.
+	Free() error
+}
--- a/libpod/lock/shm/shm_lock.c
+++ b/libpod/lock/shm/shm_lock.c
@ -0,0 +1,452 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "shm_lock.h"
+
+// Compute the size of the SHM struct
+static size_t compute_shm_size(uint32_t num_bitmaps) {
+  return sizeof(shm_struct_t) + (num_bitmaps * sizeof(lock_group_t));
+}
+
+// Take the given mutex.
+// Handles exceptional conditions, including a mutex locked by a process that
+// died holding it.
+// Returns 0 on success, or positive errno on failure.
+static int take_mutex(pthread_mutex_t *mutex) {
+  int ret_code;
+
+  do {
+    ret_code = pthread_mutex_lock(mutex);
+  } while(ret_code == EAGAIN);
+
+  if (ret_code == EOWNERDEAD) {
+    // The previous owner of the mutex died while holding it
+    // Take it for ourselves
+    ret_code = pthread_mutex_consistent(mutex);
+    if (ret_code != 0) {
+      // Someone else may have gotten here first and marked the state consistent
+      // However, the mutex could also be invalid.
+      // Fail here instead of looping back to trying to lock the mutex.
+      return ret_code;
+    }
+  } else if (ret_code != 0) {
+    return ret_code;
+  }
+
+  return 0;
+}
+
+// Release the given mutex.
+// Returns 0 on success, or positive errno on failure.
+static int release_mutex(pthread_mutex_t *mutex) {
+  int ret_code;
+
+  do {
+    ret_code = pthread_mutex_unlock(mutex);
+  } while(ret_code == EAGAIN);
+
+  if (ret_code != 0) {
+    return ret_code;
+  }
+
+  return 0;
+}
+
+// Set up an SHM segment holding locks for libpod.
+// num_locks must not be 0.
+// Path is the path to the SHM segment. It must begin with a single / and
+// container no other / characters, and be at most 255 characters including
+// terminating NULL byte.
+// Returns a valid pointer on success or NULL on error.
+// If an error occurs, negative ERRNO values will be written to error_code.
+shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code) {
+  int shm_fd, i, j, ret_code;
+  uint32_t num_bitmaps;
+  size_t shm_size;
+  shm_struct_t *shm;
+  pthread_mutexattr_t attr;
+
+  // If error_code doesn't point to anything, we can't reasonably return errors
+  // So fail immediately
+  if (error_code == NULL) {
+    return NULL;
+  }
+
+  // We need a nonzero number of locks
+  if (num_locks == 0) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  if (path == NULL) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  // Calculate the number of bitmaps required
+  num_bitmaps = num_locks / BITMAP_SIZE;
+  if (num_locks % BITMAP_SIZE != 0) {
+    // The actual number given is not an even multiple of our bitmap size
+    // So round up
+    num_bitmaps += 1;
+  }
+
+  // Calculate size of the shm segment
+  shm_size = compute_shm_size(num_bitmaps);
+
+  // Create a new SHM segment for us
+  shm_fd = shm_open(path, O_RDWR | O_CREAT | O_EXCL, 0600);
+  if (shm_fd < 0) {
+    *error_code = -1 * errno;
+    return NULL;
+  }
+
+  // Increase its size to what we need
+  ret_code = ftruncate(shm_fd, shm_size);
+  if (ret_code < 0) {
+    *error_code = -1 * errno;
+    goto CLEANUP_UNLINK;
+  }
+
+  // Map the shared memory in
+  shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+  if (shm == MAP_FAILED) {
+    *error_code = -1 * errno;
+    goto CLEANUP_UNLINK;
+  }
+
+  // We have successfully mapped the memory, now initialize the region
+  shm->magic = MAGIC;
+  shm->unused = 0;
+  shm->num_locks = num_bitmaps * BITMAP_SIZE;
+  shm->num_bitmaps = num_bitmaps;
+
+  // Create an initializer for our pthread mutexes
+  ret_code = pthread_mutexattr_init(&attr);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_UNMAP;
+  }
+
+  // Set mutexes to pshared - multiprocess-safe
+  ret_code = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_FREEATTR;
+  }
+
+  // Set mutexes to robust - if a process dies while holding a mutex, we'll get
+  // a special error code on the next attempt to lock it.
+  // This should prevent panicing processes from leaving the state unusable.
+  ret_code = pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_FREEATTR;
+  }
+
+  // Initialize the mutex that protects the bitmaps using the mutex attributes
+  ret_code = pthread_mutex_init(&(shm->segment_lock), &attr);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_FREEATTR;
+  }
+
+  // Initialize all bitmaps to 0 initially
+  // And initialize all semaphores they use
+  for (i = 0; i < num_bitmaps; i++) {
+    shm->locks[i].bitmap = 0;
+    for (j = 0; j < BITMAP_SIZE; j++) {
+      // Initialize each mutex
+      ret_code = pthread_mutex_init(&(shm->locks[i].locks[j]), &attr);
+      if (ret_code != 0) {
+	*error_code = -1 * ret_code;
+	goto CLEANUP_FREEATTR;
+      }
+    }
+  }
+
+  // Close the file descriptor, we're done with it
+  // Ignore errors, it's ok if we leak a single FD and this should only run once
+  close(shm_fd);
+
+  // Destroy the pthread initializer attribute.
+  // Again, ignore errors, this will only run once and we might leak a tiny bit
+  // of memory at worst.
+  pthread_mutexattr_destroy(&attr);
+
+  return shm;
+
+  // Cleanup after an error
+ CLEANUP_FREEATTR:
+  pthread_mutexattr_destroy(&attr);
+ CLEANUP_UNMAP:
+  munmap(shm, shm_size);
+ CLEANUP_UNLINK:
+  close(shm_fd);
+  shm_unlink(path);
+  return NULL;
+}
+
+// Open an existing SHM segment holding libpod locks.
+// num_locks is the number of locks that will be configured in the SHM segment.
+// num_locks cannot be 0.
+// Path is the path to the SHM segment. It must begin with a single / and
+// container no other / characters, and be at most 255 characters including
+// terminating NULL byte.
+// Returns a valid pointer on success or NULL on error.
+// If an error occurs, negative ERRNO values will be written to error_code.
+shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code) {
+  int shm_fd;
+  shm_struct_t *shm;
+  size_t shm_size;
+  uint32_t num_bitmaps;
+
+  if (error_code == NULL) {
+    return NULL;
+  }
+
+  // We need a nonzero number of locks
+  if (num_locks == 0) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  if (path == NULL) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  // Calculate the number of bitmaps required
+  num_bitmaps = num_locks / BITMAP_SIZE;
+  if (num_locks % BITMAP_SIZE != 0) {
+    num_bitmaps += 1;
+  }
+
+  // Calculate size of the shm segment
+  shm_size = compute_shm_size(num_bitmaps);
+
+  shm_fd = shm_open(path, O_RDWR, 0600);
+  if (shm_fd < 0) {
+    *error_code = -1 * errno;
+    return NULL;
+  }
+
+  // Map the shared memory in
+  shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+  if (shm == MAP_FAILED) {
+    *error_code = -1 * errno;
+  }
+
+  // Ignore errors, it's ok if we leak a single FD since this only runs once
+  close(shm_fd);
+
+  // Check if we successfully mmap'd
+  if (shm == MAP_FAILED) {
+    return NULL;
+  }
+
+  // Need to check the SHM to see if it's actually our locks
+  if (shm->magic != MAGIC) {
+    *error_code = -1 * errno;
+    goto CLEANUP;
+  }
+  if (shm->num_locks != (num_bitmaps * BITMAP_SIZE)) {
+    *error_code = -1 * errno;
+    goto CLEANUP;
+  }
+
+  return shm;
+
+ CLEANUP:
+  munmap(shm, shm_size);
+  return NULL;
+}
+
+// Close an open SHM lock struct, unmapping the backing memory.
+// The given shm_struct_t will be rendered unusable as a result.
+// On success, 0 is returned. On failure, negative ERRNO values are returned.
+int32_t close_lock_shm(shm_struct_t *shm) {
+  int ret_code;
+  size_t shm_size;
+
+  // We can't unmap null...
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  shm_size = compute_shm_size(shm->num_bitmaps);
+
+  ret_code = munmap(shm, shm_size);
+
+  if (ret_code != 0) {
+    return -1 * errno;
+  }
+
+  return 0;
+}
+
+// Allocate the first available semaphore
+// Returns a positive integer guaranteed to be less than UINT32_MAX on success,
+// or negative errno values on failure
+// On sucess, the returned integer is the number of the semaphore allocated
+int64_t allocate_semaphore(shm_struct_t *shm) {
+  int ret_code, i;
+  bitmap_t test_map;
+  int64_t sem_number, num_within_bitmap;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  // Lock the semaphore controlling access to our shared memory
+  ret_code = take_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  // Loop through our bitmaps to search for one that is not full
+  for (i = 0; i < shm->num_bitmaps; i++) {
+    if (shm->locks[i].bitmap != 0xFFFFFFFF) {
+      test_map = 0x1;
+      num_within_bitmap = 0;
+      while (test_map != 0) {
+	if ((test_map & shm->locks[i].bitmap) == 0) {
+	  // Compute the number of the semaphore we are allocating
+	  sem_number = (BITMAP_SIZE * i) + num_within_bitmap;
+	  // OR in the bitmap
+	  shm->locks[i].bitmap = shm->locks[i].bitmap | test_map;
+
+	  // Clear the mutex
+	  ret_code = release_mutex(&(shm->segment_lock));
+	  if (ret_code != 0) {
+	    return -1 * ret_code;
+	  }
+
+	  // Return the semaphore we've allocated
+	  return sem_number;
+	}
+	test_map = test_map << 1;
+	num_within_bitmap++;
+      }
+      // We should never fall through this loop
+      // TODO maybe an assert() here to panic if we do?
+    }
+  }
+
+  // Clear the mutex
+  ret_code = release_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  // All bitmaps are full
+  // We have no available semaphores, report allocation failure
+  return -1 * ENOSPC;
+}
+
+// Deallocate a given semaphore
+// Returns 0 on success, negative ERRNO values on failure
+int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+  bitmap_t test_map;
+  int bitmap_index, index_in_bitmap, ret_code, i;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  // Check if the lock index is valid
+  if (sem_index >= shm->num_locks) {
+    return -1 * EINVAL;
+  }
+
+  bitmap_index = sem_index / BITMAP_SIZE;
+  index_in_bitmap = sem_index % BITMAP_SIZE;
+
+  // This should never happen if the sem_index test above succeeded, but better
+  // safe than sorry
+  if (bitmap_index >= shm->num_bitmaps) {
+    return -1 * EFAULT;
+  }
+
+  test_map = 0x1 << index_in_bitmap;
+
+  // Lock the mutex controlling access to our shared memory
+  ret_code = take_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  // Check if the semaphore is allocated
+  if ((test_map & shm->locks[bitmap_index].bitmap) == 0) {
+    ret_code = release_mutex(&(shm->segment_lock));
+    if (ret_code != 0) {
+      return -1 * ret_code;
+    }
+
+    return -1 * ENOENT;
+  }
+
+  // The semaphore is allocated, clear it
+  // Invert the bitmask we used to test to clear the bit
+  test_map = ~test_map;
+  shm->locks[bitmap_index].bitmap = shm->locks[bitmap_index].bitmap & test_map;
+
+  ret_code = release_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  return 0;
+}
+
+// Lock a given semaphore
+// Does not check if the semaphore is allocated - this ensures that, even for
+// removed containers, we can still successfully lock to check status (and
+// subsequently realize they have been removed).
+// Returns 0 on success, -1 on failure
+int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+  int bitmap_index, index_in_bitmap, ret_code;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  if (sem_index >= shm->num_locks) {
+    return -1 * EINVAL;
+  }
+
+  bitmap_index = sem_index / BITMAP_SIZE;
+  index_in_bitmap = sem_index % BITMAP_SIZE;
+
+  return -1 * take_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]));
+}
+
+// Unlock a given semaphore
+// Does not check if the semaphore is allocated - this ensures that, even for
+// removed containers, we can still successfully lock to check status (and
+// subsequently realize they have been removed).
+// Returns 0 on success, -1 on failure
+int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+  int bitmap_index, index_in_bitmap, ret_code;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  if (sem_index >= shm->num_locks) {
+    return -1 * EINVAL;
+  }
+
+  bitmap_index = sem_index / BITMAP_SIZE;
+  index_in_bitmap = sem_index % BITMAP_SIZE;
+
+  return -1 * release_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]));
+}
--- a/libpod/lock/shm/shm_lock.go
+++ b/libpod/lock/shm/shm_lock.go
@ -0,0 +1,216 @@
+package shm
+
+// #cgo LDFLAGS: -lrt -lpthread
+// #include <stdlib.h>
+// #include "shm_lock.h"
+// const uint32_t bitmap_size_c = BITMAP_SIZE;
+import "C"
+
+import (
+	"runtime"
+	"syscall"
+	"unsafe"
+
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+)
+
+var (
+	// BitmapSize is the size of the bitmap used when managing SHM locks.
+	// an SHM lock manager's max locks will be rounded up to a multiple of
+	// this number.
+	BitmapSize uint32 = uint32(C.bitmap_size_c)
+)
+
+// SHMLocks is a struct enabling POSIX semaphore locking in a shared memory
+// segment.
+type SHMLocks struct { // nolint
+	lockStruct *C.shm_struct_t
+	maxLocks   uint32
+	valid      bool
+}
+
+// CreateSHMLock sets up a shared-memory segment holding a given number of POSIX
+// semaphores, and returns a struct that can be used to operate on those locks.
+// numLocks must not be 0, and may be rounded up to a multiple of the bitmap
+// size used by the underlying implementation.
+func CreateSHMLock(path string, numLocks uint32) (*SHMLocks, error) {
+	if numLocks == 0 {
+		return nil, errors.Wrapf(syscall.EINVAL, "number of locks must greater than 0 0")
+	}
+
+	locks := new(SHMLocks)
+
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+
+	var errCode C.int
+	lockStruct := C.setup_lock_shm(cPath, C.uint32_t(numLocks), &errCode)
+	if lockStruct == nil {
+		// We got a null pointer, so something errored
+		return nil, syscall.Errno(-1 * errCode)
+	}
+
+	locks.lockStruct = lockStruct
+	locks.maxLocks = uint32(lockStruct.num_locks)
+	locks.valid = true
+
+	logrus.Debugf("Initialized SHM lock manager at path %s", path)
+
+	return locks, nil
+}
+
+// OpenSHMLock opens an existing shared-memory segment holding a given number of
+// POSIX semaphores. numLocks must match the number of locks the shared memory
+// segment was created with.
+func OpenSHMLock(path string, numLocks uint32) (*SHMLocks, error) {
+	if numLocks == 0 {
+		return nil, errors.Wrapf(syscall.EINVAL, "number of locks must greater than 0")
+	}
+
+	locks := new(SHMLocks)
+
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+
+	var errCode C.int
+	lockStruct := C.open_lock_shm(cPath, C.uint32_t(numLocks), &errCode)
+	if lockStruct == nil {
+		// We got a null pointer, so something errored
+		return nil, syscall.Errno(-1 * errCode)
+	}
+
+	locks.lockStruct = lockStruct
+	locks.maxLocks = numLocks
+	locks.valid = true
+
+	return locks, nil
+}
+
+// GetMaxLocks returns the maximum number of locks in the SHM
+func (locks *SHMLocks) GetMaxLocks() uint32 {
+	return locks.maxLocks
+}
+
+// Close closes an existing shared-memory segment.
+// The segment will be rendered unusable after closing.
+// WARNING: If you Close() while there are still locks locked, these locks may
+// fail to release, causing a program freeze.
+// Close() is only intended to be used while testing the locks.
+func (locks *SHMLocks) Close() error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	locks.valid = false
+
+	retCode := C.close_lock_shm(locks.lockStruct)
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// AllocateSemaphore allocates a semaphore from a shared-memory segment for use
+// by a container or pod.
+// Returns the index of the semaphore that was allocated.
+// Allocations past the maximum number of locks given when the SHM segment was
+// created will result in an error, and no semaphore will be allocated.
+func (locks *SHMLocks) AllocateSemaphore() (uint32, error) {
+	if !locks.valid {
+		return 0, errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	// This returns a U64, so we have the full u32 range available for
+	// semaphore indexes, and can still return error codes.
+	retCode := C.allocate_semaphore(locks.lockStruct)
+	if retCode < 0 {
+		// Negative errno returned
+		return 0, syscall.Errno(-1 * retCode)
+	}
+
+	return uint32(retCode), nil
+}
+
+// DeallocateSemaphore frees a semaphore in a shared-memory segment so it can be
+// reallocated to another container or pod.
+// The given semaphore must be already allocated, or an error will be returned.
+func (locks *SHMLocks) DeallocateSemaphore(sem uint32) error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	if sem > locks.maxLocks {
+		return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+	}
+
+	retCode := C.deallocate_semaphore(locks.lockStruct, C.uint32_t(sem))
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// LockSemaphore locks the given semaphore.
+// If the semaphore is already locked, LockSemaphore will block until the lock
+// can be acquired.
+// There is no requirement that the given semaphore be allocated.
+// This ensures that attempts to lock a container after it has been deleted,
+// but before the caller has queried the database to determine this, will
+// succeed.
+func (locks *SHMLocks) LockSemaphore(sem uint32) error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	if sem > locks.maxLocks {
+		return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+	}
+
+	// For pthread mutexes, we have to guarantee lock and unlock happen in
+	// the same thread.
+	runtime.LockOSThread()
+
+	retCode := C.lock_semaphore(locks.lockStruct, C.uint32_t(sem))
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// UnlockSemaphore unlocks the given semaphore.
+// Unlocking a semaphore that is already unlocked with return EBUSY.
+// There is no requirement that the given semaphore be allocated.
+// This ensures that attempts to lock a container after it has been deleted,
+// but before the caller has queried the database to determine this, will
+// succeed.
+func (locks *SHMLocks) UnlockSemaphore(sem uint32) error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	if sem > locks.maxLocks {
+		return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+	}
+
+	retCode := C.unlock_semaphore(locks.lockStruct, C.uint32_t(sem))
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	// For pthread mutexes, we have to guarantee lock and unlock happen in
+	// the same thread.
+	// OK if we take multiple locks - UnlockOSThread() won't actually unlock
+	// until the number of calls equals the number of calls to
+	// LockOSThread()
+	runtime.UnlockOSThread()
+
+	return nil
+}
--- a/libpod/lock/shm/shm_lock.h
+++ b/libpod/lock/shm/shm_lock.h
@ -0,0 +1,46 @@
+#ifndef shm_locks_h_
+#define shm_locks_h_
+
+#include <pthread.h>
+#include <stdint.h>
+
+// Magic number to ensure we open the right SHM segment
+#define MAGIC 0x87D1
+
+// Type for our bitmaps
+typedef uint32_t bitmap_t;
+
+// bitmap size
+#define BITMAP_SIZE (sizeof(bitmap_t) * 8)
+
+// Struct to hold a single bitmap and associated locks
+typedef struct lock_group {
+  bitmap_t        bitmap;
+  pthread_mutex_t locks[BITMAP_SIZE];
+} lock_group_t;
+
+// Struct to hold our SHM locks.
+// Unused is required to be 0 in the current implementation. If we ever make
+// changes to this structure in the future, this will be repurposed as a version
+// field.
+typedef struct shm_struct {
+  uint16_t        magic;
+  uint16_t        unused;
+  pthread_mutex_t segment_lock;
+  uint32_t        num_bitmaps;
+  uint32_t        num_locks;
+  lock_group_t    locks[];
+} shm_struct_t;
+
+static size_t compute_shm_size(uint32_t num_bitmaps);
+static int take_mutex(pthread_mutex_t *mutex);
+static int release_mutex(pthread_mutex_t *mutex);
+shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code);
+shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code);
+int32_t close_lock_shm(shm_struct_t *shm);
+int64_t allocate_semaphore(shm_struct_t *shm);
+int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index);
+int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index);
+int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index);
+
+#endif
--- a/libpod/lock/shm/shm_lock_test.go
+++ b/libpod/lock/shm/shm_lock_test.go
@ -0,0 +1,278 @@
+package shm
+
+import (
+	"fmt"
+	"os"
+	"runtime"
+	"syscall"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// All tests here are in the same process, which somewhat limits their utility
+// The big intent of this package it multiprocess locking, which is really hard
+// to test without actually having multiple processes...
+// We can at least verify that the locks work within the local process.
+
+var (
+	// 4 * BITMAP_SIZE to ensure we have to traverse bitmaps
+	numLocks = 4 * BitmapSize
+)
+
+const lockPath = "/libpod_test"
+
+// We need a test main to ensure that the SHM is created before the tests run
+func TestMain(m *testing.M) {
+	shmLock, err := CreateSHMLock(lockPath, numLocks)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error creating SHM for tests: %v\n", err)
+		os.Exit(-1)
+	}
+
+	// Close the SHM - every subsequent test will reopen
+	if err := shmLock.Close(); err != nil {
+		fmt.Fprintf(os.Stderr, "Error closing SHM locks: %v\n", err)
+		os.Exit(-1)
+	}
+
+	exitCode := m.Run()
+
+	// We need to remove the SHM segment to clean up after ourselves
+	os.RemoveAll("/dev/shm/libpod_lock")
+
+	os.Exit(exitCode)
+}
+
+func runLockTest(t *testing.T, testFunc func(*testing.T, *SHMLocks)) {
+	locks, err := OpenSHMLock(lockPath, numLocks)
+	if err != nil {
+		t.Fatalf("Error opening locks: %v", err)
+	}
+	defer func() {
+		// Deallocate all locks
+		// Ignore ENOENT (lock is not allocated)
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			if err := locks.DeallocateSemaphore(i); err != nil && err != syscall.ENOENT {
+				t.Fatalf("Error deallocating semaphore %d: %v", i, err)
+			}
+		}
+
+		if err := locks.Close(); err != nil {
+			t.Fatalf("Error closing locks: %v", err)
+		}
+	}()
+
+	success := t.Run("locks", func(t *testing.T) {
+		testFunc(t, locks)
+	})
+	if !success {
+		t.Fail()
+	}
+}
+
+// Test that creating an SHM with a bad size rounds up to a good size
+func TestCreateNewSHMBadSizeRoundsUp(t *testing.T) {
+	// Odd number, not a power of 2, should never be a word size on a system
+	lock, err := CreateSHMLock("/test1", 7)
+	assert.NoError(t, err)
+
+	assert.Equal(t, lock.GetMaxLocks(), BitmapSize)
+
+	if err := lock.Close(); err != nil {
+		t.Fatalf("Error closing locks: %v", err)
+	}
+}
+
+// Test that creating an SHM with 0 size fails
+func TestCreateNewSHMZeroSize(t *testing.T) {
+	_, err := CreateSHMLock("/test2", 0)
+	assert.Error(t, err)
+}
+
+// Test that deallocating an unallocated lock errors
+func TestDeallocateUnallocatedLockErrors(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.DeallocateSemaphore(0)
+		assert.Error(t, err)
+	})
+}
+
+// Test that unlocking an unlocked lock fails
+func TestUnlockingUnlockedLockFails(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.UnlockSemaphore(0)
+		assert.Error(t, err)
+	})
+}
+
+// Test that locking and double-unlocking fails
+func TestDoubleUnlockFails(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.LockSemaphore(0)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(0)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(0)
+		assert.Error(t, err)
+	})
+}
+
+// Test allocating - lock - unlock - deallocate cycle, single lock
+func TestLockLifecycleSingleLock(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		sem, err := locks.AllocateSemaphore()
+		require.NoError(t, err)
+
+		err = locks.LockSemaphore(sem)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(sem)
+		assert.NoError(t, err)
+
+		err = locks.DeallocateSemaphore(sem)
+		assert.NoError(t, err)
+	})
+}
+
+// Test allocate two locks returns different locks
+func TestAllocateTwoLocksGetsDifferentLocks(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		sem1, err := locks.AllocateSemaphore()
+		assert.NoError(t, err)
+
+		sem2, err := locks.AllocateSemaphore()
+		assert.NoError(t, err)
+
+		assert.NotEqual(t, sem1, sem2)
+	})
+}
+
+// Test allocate all locks successful and all are unique
+func TestAllocateAllLocksSucceeds(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		sems := make(map[uint32]bool)
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			sem, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+
+			// Ensure the allocate semaphore is unique
+			_, ok := sems[sem]
+			assert.False(t, ok)
+
+			sems[sem] = true
+		}
+	})
+}
+
+// Test allocating more than the given max fails
+func TestAllocateTooManyLocksFails(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		// Allocate all locks
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			_, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+		}
+
+		// Try and allocate one more
+		_, err := locks.AllocateSemaphore()
+		assert.Error(t, err)
+	})
+}
+
+// Test allocating max locks, deallocating one, and then allocating again succeeds
+func TestAllocateDeallocateCycle(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		// Allocate all locks
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			_, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+		}
+
+		// Now loop through again, deallocating and reallocating.
+		// Each time we free 1 semaphore, allocate again, and make sure
+		// we get the same semaphore back.
+		var j uint32
+		for j = 0; j < numLocks; j++ {
+			err := locks.DeallocateSemaphore(j)
+			assert.NoError(t, err)
+
+			newSem, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+			assert.Equal(t, j, newSem)
+		}
+	})
+}
+
+// Test that locks actually lock
+func TestLockSemaphoreActuallyLocks(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		// This entire test is very ugly - lots of sleeps to try and get
+		// things to occur in the right order.
+		// It also doesn't even exercise the multiprocess nature of the
+		// locks.
+
+		// Get the current time
+		startTime := time.Now()
+
+		// Start a goroutine to take the lock and then release it after
+		// a second.
+		go func() {
+			err := locks.LockSemaphore(0)
+			assert.NoError(t, err)
+
+			time.Sleep(1 * time.Second)
+
+			err = locks.UnlockSemaphore(0)
+			assert.NoError(t, err)
+		}()
+
+		// Sleep for a quarter of a second to give the goroutine time
+		// to kick off and grab the lock
+		time.Sleep(250 * time.Millisecond)
+
+		// Take the lock
+		err := locks.LockSemaphore(0)
+		assert.NoError(t, err)
+
+		// Get the current time
+		endTime := time.Now()
+
+		// Verify that at least 1 second has passed since start
+		duration := endTime.Sub(startTime)
+		assert.True(t, duration.Seconds() > 1.0)
+	})
+}
+
+// Test that locking and unlocking two semaphores succeeds
+// Ensures that runtime.LockOSThread() is doing its job
+func TestLockAndUnlockTwoSemaphore(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.LockSemaphore(0)
+		assert.NoError(t, err)
+
+		err = locks.LockSemaphore(1)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(1)
+		assert.NoError(t, err)
+
+		// Now yield scheduling
+		// To try and get us on another OS thread
+		runtime.Gosched()
+
+		// And unlock the last semaphore
+		// If we are in a different OS thread, this should fail.
+		// However, runtime.UnlockOSThread() should guarantee we are not
+		err = locks.UnlockSemaphore(0)
+		assert.NoError(t, err)
+	})
+}
--- a/libpod/lock/shm_lock_manager_linux.go
+++ b/libpod/lock/shm_lock_manager_linux.go
@ -0,0 +1,94 @@
+// +build linux
+
+package lock
+
+import (
+	"github.com/containers/libpod/libpod/lock/shm"
+)
+
+// SHMLockManager manages shared memory locks.
+type SHMLockManager struct {
+	locks *shm.SHMLocks
+}
+
+// NewSHMLockManager makes a new SHMLockManager with the given number of locks.
+// Due to the underlying implementation, the exact number of locks created may
+// be greater than the number given here.
+func NewSHMLockManager(path string, numLocks uint32) (Manager, error) {
+	locks, err := shm.CreateSHMLock(path, numLocks)
+	if err != nil {
+		return nil, err
+	}
+
+	manager := new(SHMLockManager)
+	manager.locks = locks
+
+	return manager, nil
+}
+
+// OpenSHMLockManager opens an existing SHMLockManager with the given number of
+// locks.
+func OpenSHMLockManager(path string, numLocks uint32) (Manager, error) {
+	locks, err := shm.OpenSHMLock(path, numLocks)
+	if err != nil {
+		return nil, err
+	}
+
+	manager := new(SHMLockManager)
+	manager.locks = locks
+
+	return manager, nil
+}
+
+// AllocateLock allocates a new lock from the manager.
+func (m *SHMLockManager) AllocateLock() (Locker, error) {
+	semIndex, err := m.locks.AllocateSemaphore()
+	if err != nil {
+		return nil, err
+	}
+
+	lock := new(SHMLock)
+	lock.lockID = semIndex
+	lock.manager = m
+
+	return lock, nil
+}
+
+// RetrieveLock retrieves a lock from the manager given its ID.
+func (m *SHMLockManager) RetrieveLock(id uint32) (Locker, error) {
+	lock := new(SHMLock)
+	lock.lockID = id
+	lock.manager = m
+
+	return lock, nil
+}
+
+// SHMLock is an individual shared memory lock.
+type SHMLock struct {
+	lockID  uint32
+	manager *SHMLockManager
+}
+
+// ID returns the ID of the lock.
+func (l *SHMLock) ID() uint32 {
+	return l.lockID
+}
+
+// Lock acquires the lock.
+func (l *SHMLock) Lock() {
+	if err := l.manager.locks.LockSemaphore(l.lockID); err != nil {
+		panic(err.Error())
+	}
+}
+
+// Unlock releases the lock.
+func (l *SHMLock) Unlock() {
+	if err := l.manager.locks.UnlockSemaphore(l.lockID); err != nil {
+		panic(err.Error())
+	}
+}
+
+// Free releases the lock, allowing it to be reused.
+func (l *SHMLock) Free() error {
+	return l.manager.locks.DeallocateSemaphore(l.lockID)
+}
--- a/libpod/lock/shm_lock_manager_unsupported.go
+++ b/libpod/lock/shm_lock_manager_unsupported.go
@ -0,0 +1,29 @@
+// +build !linux
+
+package lock
+
+import "fmt"
+
+// SHMLockManager is a shared memory lock manager.
+// It is not supported on non-Unix platforms.
+type SHMLockManager struct{}
+
+// NewSHMLockManager is not supported on this platform
+func NewSHMLockManager(numLocks uint32) (Manager, error) {
+	return nil, fmt.Errorf("not supported")
+}
+
+// OpenSHMLockManager is not supported on this platform
+func OpenSHMLockManager(numLocks uint32) (Manager, error) {
+	return nil, fmt.Errorf("not supported")
+}
+
+// AllocateLock is not supported on this platform
+func (m *SHMLockManager) AllocateLock() (Locker, error) {
+	return nil, fmt.Errorf("not supported")
+}
+
+// RetrieveLock is not supported on this platform
+func (m *SHMLockManager) RetrieveLock(id string) (Locker, error) {
+	return nil, fmt.Errorf("not supported")
+}
--- a/libpod/pod.go
+++ b/libpod/pod.go
@ -3,7 +3,7 @@ package libpod
 import (
 	"time"

-	"github.com/containers/storage"
+	"github.com/containers/libpod/libpod/lock"
 	"github.com/cri-o/ocicni/pkg/ocicni"
 	"github.com/pkg/errors"
 )
@ -26,7 +26,7 @@ type Pod struct {

 	valid   bool
 	runtime *Runtime
-	lock    storage.Locker
+	lock    lock.Locker
 }

 // PodConfig represents a pod's static configuration
@ -60,6 +60,9 @@ type PodConfig struct {

 	// Time pod was created
 	CreatedTime time.Time `json:"created"`
+
+	// ID of the pod's lock
+	LockID uint32 `json:"lockID"`
 }

 // podState represents a pod's state
--- a/libpod/pod_easyjson.go
+++ b/libpod/pod_easyjson.go
@ -501,6 +501,8 @@ func easyjsonBe091417DecodeGithubComContainersLibpodLibpod4(in *jlexer.Lexer, ou
 			if data := in.Raw(); in.Ok() {
 				in.AddError((out.CreatedTime).UnmarshalJSON(data))
 			}
+		case "lockID":
+			out.LockID = uint32(in.Uint32())
 		default:
 			in.SkipRecursive()
 		}
@ -675,6 +677,16 @@ func easyjsonBe091417EncodeGithubComContainersLibpodLibpod4(out *jwriter.Writer,
 		}
 		out.Raw((in.CreatedTime).MarshalJSON())
 	}
+	{
+		const prefix string = ",\"lockID\":"
+		if first {
+			first = false
+			out.RawString(prefix[1:])
+		} else {
+			out.RawString(prefix)
+		}
+		out.Uint32(uint32(in.LockID))
+	}
 	out.RawByte('}')
 }

--- a/libpod/pod_internal.go
+++ b/libpod/pod_internal.go
@ -7,14 +7,13 @@ import (
 	"strings"
 	"time"

-	"github.com/containers/storage"
 	"github.com/containers/storage/pkg/stringid"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )

 // Creates a new, empty pod
-func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
+func newPod(runtime *Runtime) (*Pod, error) {
 	pod := new(Pod)
 	pod.config = new(PodConfig)
 	pod.config.ID = stringid.GenerateNonCryptoID()
@ -24,15 +23,6 @@ func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
 	pod.state = new(podState)
 	pod.runtime = runtime

-	// Path our lock file will reside at
-	lockPath := filepath.Join(lockDir, pod.config.ID)
-	// Grab a lockfile at the given path
-	lock, err := storage.GetLockfile(lockPath)
-	if err != nil {
-		return nil, errors.Wrapf(err, "error creating lockfile for new pod")
-	}
-	pod.lock = lock
-
 	return pod, nil
 }

@ -55,6 +45,8 @@ func (p *Pod) save() error {
 }

 // Refresh a pod's state after restart
+// This cannot lock any other pod, but may lock individual containers, as those
+// will have refreshed by the time pod refresh runs.
 func (p *Pod) refresh() error {
 	// Need to to an update from the DB to pull potentially-missing state
 	if err := p.runtime.state.UpdatePod(p); err != nil {
@ -65,6 +57,13 @@ func (p *Pod) refresh() error {
 		return ErrPodRemoved
 	}

+	// Retrieve the pod's lock
+	lock, err := p.runtime.lockManager.RetrieveLock(p.config.LockID)
+	if err != nil {
+		return errors.Wrapf(err, "error retrieving lock for pod %s", p.ID())
+	}
+	p.lock = lock
+
 	// We need to recreate the pod's cgroup
 	if p.config.UsePodCgroup {
 		switch p.runtime.config.CgroupManager {
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@ -11,6 +11,7 @@ import (
 	is "github.com/containers/image/storage"
 	"github.com/containers/image/types"
 	"github.com/containers/libpod/libpod/image"
+	"github.com/containers/libpod/libpod/lock"
 	"github.com/containers/libpod/pkg/firewall"
 	sysreg "github.com/containers/libpod/pkg/registries"
 	"github.com/containers/libpod/pkg/rootless"
@ -64,6 +65,11 @@ const (

 	// DefaultInitPath is the default path to the container-init binary
 	DefaultInitPath = "/usr/libexec/podman/catatonit"
+
+	// DefaultSHMLockPath is the default path for SHM locks
+	DefaultSHMLockPath = "/libpod_lock"
+	// DefaultRootlessSHMLockPath is the default path for rootless SHM locks
+	DefaultRootlessSHMLockPath = "/libpod_rootless_lock"
 )

 // A RuntimeOption is a functional option which alters the Runtime created by
@ -78,7 +84,6 @@ type Runtime struct {
 	storageService  *storageService
 	imageContext    *types.SystemContext
 	ociRuntime      *OCIRuntime
-	lockDir         string
 	netPlugin       ocicni.CNIPlugin
 	ociRuntimePath  string
 	conmonPath      string
@ -86,6 +91,7 @@ type Runtime struct {
 	lock            sync.RWMutex
 	imageRuntime    *image.Runtime
 	firewallBackend firewall.FirewallBackend
+	lockManager     lock.Manager
 	configuredFrom  *runtimeConfiguredFrom
 }

@ -165,6 +171,7 @@ type RuntimeConfig struct {
 	// and all containers and pods will be visible.
 	// The default namespace is "".
 	Namespace string `toml:"namespace,omitempty"`
+
 	// InfraImage is the image a pod infra container will use to manage namespaces
 	InfraImage string `toml:"infra_image"`
 	// InfraCommand is the command run to start up a pod infra container
@ -179,6 +186,10 @@ type RuntimeConfig struct {
 	EnablePortReservation bool `toml:"enable_port_reservation"`
 	// EnableLabeling indicates wether libpod will support container labeling
 	EnableLabeling bool `toml:"label"`
+
+	// NumLocks is the number of locks to make available for containers and
+	// pods.
+	NumLocks uint32 `toml:"num_locks,omitempty"`
 }

 // runtimeConfiguredFrom is a struct used during early runtime init to help
@ -234,6 +245,7 @@ var (
 		InfraImage:            DefaultInfraImage,
 		EnablePortReservation: true,
 		EnableLabeling:        true,
+		NumLocks:              2048,
 	}
 )

@ -616,17 +628,6 @@ func makeRuntime(runtime *Runtime) (err error) {
 	}
 	runtime.ociRuntime = ociRuntime

-	// Make a directory to hold container lockfiles
-	lockDir := filepath.Join(runtime.config.TmpDir, "lock")
-	if err := os.MkdirAll(lockDir, 0755); err != nil {
-		// The directory is allowed to exist
-		if !os.IsExist(err) {
-			return errors.Wrapf(err, "error creating runtime lockfiles directory %s",
-				lockDir)
-		}
-	}
-	runtime.lockDir = lockDir
-
 	// Make the per-boot files directory if it does not exist
 	if err := os.MkdirAll(runtime.config.TmpDir, 0755); err != nil {
 		// The directory is allowed to exist
@ -671,6 +672,7 @@ func makeRuntime(runtime *Runtime) (err error) {
 	// and use it to lock important operations
 	aliveLock.Lock()
 	locked := true
+	doRefresh := false
 	defer func() {
 		if locked {
 			aliveLock.Unlock()
@ -683,22 +685,52 @@ func makeRuntime(runtime *Runtime) (err error) {
 		// empty state only creates a single file
 		// As such, it's not really a performance concern
 		if os.IsNotExist(err) {
-			if os.Geteuid() != 0 {
-				aliveLock.Unlock()
-				locked = false
-				if err2 := runtime.refreshRootless(); err2 != nil {
-					return err2
-				}
-			} else {
-				if err2 := runtime.refresh(runtimeAliveFile); err2 != nil {
-					return err2
-				}
-			}
+			doRefresh = true
 		} else {
 			return errors.Wrapf(err, "error reading runtime status file %s", runtimeAliveFile)
 		}
 	}

+	// Set up the lock manager
+	var manager lock.Manager
+	lockPath := DefaultSHMLockPath
+	if rootless.IsRootless() {
+		lockPath = DefaultRootlessSHMLockPath
+	}
+	if doRefresh {
+		// If SHM locks already exist, delete them and reinitialize
+		if err := os.Remove(filepath.Join("/dev/shm", lockPath)); err != nil && !os.IsNotExist(err) {
+			return errors.Wrapf(err, "error deleting existing libpod SHM segment %s", lockPath)
+		}
+
+		manager, err = lock.NewSHMLockManager(lockPath, runtime.config.NumLocks)
+		if err != nil {
+			return errors.Wrapf(err, "error creating SHM locks for libpod")
+		}
+	} else {
+		manager, err = lock.OpenSHMLockManager(lockPath, runtime.config.NumLocks)
+		if err != nil {
+			return errors.Wrapf(err, "error opening libpod SHM locks")
+		}
+	}
+	runtime.lockManager = manager
+
+	// If we need to refresh the state, do it now - things are guaranteed to
+	// be set up by now.
+	if doRefresh {
+		if os.Geteuid() != 0 {
+			aliveLock.Unlock()
+			locked = false
+			if err2 := runtime.refreshRootless(); err2 != nil {
+				return err2
+			}
+		} else {
+			if err2 := runtime.refresh(runtimeAliveFile); err2 != nil {
+				return err2
+			}
+		}
+	}
+
 	// Mark the runtime as valid - ready to be used, cannot be modified
 	// further
 	runtime.valid = true
@ -794,19 +826,22 @@ func (r *Runtime) refresh(alivePath string) error {
 	if err != nil {
 		return errors.Wrapf(err, "error retrieving all pods from state")
 	}
+	// No locks are taken during pod and container refresh.
+	// Furthermore, the pod and container refresh() functions are not
+	// allowed to take locks themselves.
+	// We cannot assume that any pod or container has a valid lock until
+	// after this function has returned.
+	// The runtime alive lock should suffice to provide mutual exclusion
+	// until this has run.
 	for _, ctr := range ctrs {
-		ctr.lock.Lock()
 		if err := ctr.refresh(); err != nil {
 			logrus.Errorf("Error refreshing container %s: %v", ctr.ID(), err)
 		}
-		ctr.lock.Unlock()
 	}
 	for _, pod := range pods {
-		pod.lock.Lock()
 		if err := pod.refresh(); err != nil {
 			logrus.Errorf("Error refreshing pod %s: %v", pod.ID(), err)
 		}
-		pod.lock.Unlock()
 	}

 	// Create a file indicating the runtime is alive and ready
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@ -9,7 +9,6 @@ import (
 	"time"

 	"github.com/containers/libpod/pkg/rootless"
-	"github.com/containers/storage"
 	"github.com/containers/storage/pkg/stringid"
 	spec "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
@ -61,15 +60,6 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ..

 	ctr.state.BindMounts = make(map[string]string)

-	// Path our lock file will reside at
-	lockPath := filepath.Join(r.lockDir, ctr.config.ID)
-	// Grab a lockfile at the given path
-	lock, err := storage.GetLockfile(lockPath)
-	if err != nil {
-		return nil, errors.Wrapf(err, "error creating lockfile for new container")
-	}
-	ctr.lock = lock
-
 	ctr.config.StopTimeout = CtrRemoveTimeout

 	// Set namespace based on current runtime namespace
@ -85,6 +75,19 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ..
 		}
 	}

+	// Allocate a lock for the container
+	lock, err := r.lockManager.AllocateLock()
+	if err != nil {
+		return nil, errors.Wrapf(err, "error allocating lock for new container")
+	}
+	ctr.lock = lock
+	ctr.config.LockID = ctr.lock.ID()
+	logrus.Debugf("Allocated lock %d for container %s", ctr.lock.ID(), ctr.ID())
+
+	ctr.valid = true
+	ctr.state.State = ContainerStateConfigured
+	ctr.runtime = r
+
 	ctr.valid = true
 	ctr.state.State = ContainerStateConfigured

@ -379,6 +382,15 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool)
 		}
 	}

+	// Deallocate the container's lock
+	if err := c.lock.Free(); err != nil {
+		if cleanupErr == nil {
+			cleanupErr = err
+		} else {
+			logrus.Errorf("free container lock: %v", err)
+		}
+	}
+
 	return cleanupErr
 }

--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@ -23,7 +23,7 @@ func (r *Runtime) NewPod(ctx context.Context, options ...PodCreateOption) (*Pod,
 		return nil, ErrRuntimeStopped
 	}

-	pod, err := newPod(r.lockDir, r)
+	pod, err := newPod(r)
 	if err != nil {
 		return nil, errors.Wrapf(err, "error creating pod")
 	}
@ -48,6 +48,14 @@ func (r *Runtime) NewPod(ctx context.Context, options ...PodCreateOption) (*Pod,
 		pod.config.Name = name
 	}

+	// Allocate a lock for the pod
+	lock, err := r.lockManager.AllocateLock()
+	if err != nil {
+		return nil, errors.Wrapf(err, "error allocating lock for new pod")
+	}
+	pod.lock = lock
+	pod.config.LockID = pod.lock.ID()
+
 	pod.valid = true

 	// Check CGroup parent sanity, and set it if it was not set
@ -239,6 +247,11 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 				return err
 			}
 		}
+
+		// Free the container's lock
+		if err := ctr.lock.Free(); err != nil {
+			return err
+		}
 	}

 	// Remove containers from the state
--- a/libpod/runtime_volume_linux.go
+++ b/libpod/runtime_volume_linux.go
@ -8,7 +8,6 @@ import (
 	"path/filepath"
 	"strings"

-	"github.com/containers/storage"
 	"github.com/containers/storage/pkg/stringid"
 	"github.com/opencontainers/selinux/go-selinux/label"
 	"github.com/pkg/errors"
@ -68,14 +67,12 @@ func (r *Runtime) newVolume(ctx context.Context, options ...VolumeCreateOption)
 	}
 	volume.config.MountPoint = fullVolPath

-	// Path our lock file will reside at
-	lockPath := filepath.Join(r.lockDir, volume.config.Name)
-	// Grab a lockfile at the given path
-	lock, err := storage.GetLockfile(lockPath)
+	lock, err := r.lockManager.AllocateLock()
 	if err != nil {
-		return nil, errors.Wrapf(err, "error creating lockfile for new volume")
+		return nil, errors.Wrapf(err, "error allocating lock for new volume")
 	}
 	volume.lock = lock
+	volume.config.LockID = volume.lock.ID()

 	volume.valid = true

--- a/libpod/state_test.go
+++ b/libpod/state_test.go
--- a/libpod/volume.go
+++ b/libpod/volume.go
@ -1,6 +1,6 @@
 package libpod

-import "github.com/containers/storage"
+import "github.com/containers/libpod/libpod/lock"

 // Volume is the type used to create named volumes
 // TODO: all volumes should be created using this and the Volume API
@ -9,13 +9,17 @@ type Volume struct {

 	valid   bool
 	runtime *Runtime
-	lock    storage.Locker
+	lock    lock.Locker
 }

 // VolumeConfig holds the volume's config information
 //easyjson:json
 type VolumeConfig struct {
-	Name       string            `json:"name"`
+	// Name of the volume
+	Name string `json:"name"`
+	// ID of this volume's lock
+	LockID uint32 `json:"lockID"`
+
 	Labels     map[string]string `json:"labels"`
 	MountPoint string            `json:"mountPoint"`
 	Driver     string            `json:"driver"`