From b11588a6a0d7c50685914066951eadec4314b932 Mon Sep 17 00:00:00 2001 From: Mikkel Krautz Date: Sun, 10 Apr 2011 19:42:47 +0200 Subject: [PATCH] Add blobstore package. --- Makefile | 3 + pkg/blobstore/Makefile | 8 + pkg/blobstore/blobreader.go | 53 ++++++ pkg/blobstore/blobstore.go | 318 ++++++++++++++++++++++++++++++++ pkg/blobstore/blobstore_test.go | 207 +++++++++++++++++++++ 5 files changed, 589 insertions(+) create mode 100644 pkg/blobstore/Makefile create mode 100644 pkg/blobstore/blobreader.go create mode 100644 pkg/blobstore/blobstore.go create mode 100644 pkg/blobstore/blobstore_test.go diff --git a/Makefile b/Makefile index b384813..56ad839 100644 --- a/Makefile +++ b/Makefile @@ -10,18 +10,21 @@ PACKAGES = \ pkg/packetdatastream \ pkg/cryptstate \ pkg/mumbleproto \ + pkg/blobstore \ pkg/sqlite GCFLAGS = \ -Ipkg/cryptstate/_obj \ -Ipkg/packetdatastream/_obj \ -Ipkg/mumbleproto/_obj \ + -Ipkg/blobstore/_obj \ -Ipkg/sqlite/_obj LDFLAGS = \ -Lpkg/cryptstate/_obj \ -Lpkg/packetdatastream/_obj \ -Lpkg/mumbleproto/_obj \ + -Ipkg/blobstore/_obj \ -Lpkg/sqlite/_obj GOFILES = \ diff --git a/pkg/blobstore/Makefile b/pkg/blobstore/Makefile new file mode 100644 index 0000000..62ad8a0 --- /dev/null +++ b/pkg/blobstore/Makefile @@ -0,0 +1,8 @@ +include $(GOROOT)/src/Make.inc + +TARG = blobstore +GOFILES = \ + blobstore.go \ + blobreader.go \ + +include $(GOROOT)/src/Make.pkg diff --git a/pkg/blobstore/blobreader.go b/pkg/blobstore/blobreader.go new file mode 100644 index 0000000..088d848 --- /dev/null +++ b/pkg/blobstore/blobreader.go @@ -0,0 +1,53 @@ +// Copyright (c) 2011 The Grumble Authors +// The use of this source code is goverened by a BSD-style +// license that can be found in the LICENSE-file. + +package blobstore + +import ( + "bytes" + "crypto/sha1" + "encoding/hex" + "hash" + "io" + "os" +) + +// blobReader is based on the principles of the checksumReader from the archive/zip +// package of the Go standard library. + +// ErrHashMismatch is returned if a blobReader has read a file whose computed hash +// did not match its key. +var ErrHashMismatch = os.NewError("hash mismatch") + +// blobReader reads a blob from disk, hashing all incoming data. On EOF, it checks +// whether the read data matches the key. +type blobReader struct { + rc io.ReadCloser + sum []byte + hash hash.Hash +} + +func newBlobReader(rc io.ReadCloser, key string) (br *blobReader, err os.Error) { + sum, err := hex.DecodeString(key) + if err != nil { + return + } + return &blobReader{rc, sum, sha1.New()}, nil +} + +func (r *blobReader) Read(b []byte) (n int, err os.Error) { + n, err = r.rc.Read(b) + r.hash.Write(b[:n]) + if err != os.EOF { + return + } + if !bytes.Equal(r.sum, r.hash.Sum()) { + err = ErrHashMismatch + } + return +} + +func (r *blobReader) Close() os.Error { + return r.rc.Close() +} diff --git a/pkg/blobstore/blobstore.go b/pkg/blobstore/blobstore.go new file mode 100644 index 0000000..558db3b --- /dev/null +++ b/pkg/blobstore/blobstore.go @@ -0,0 +1,318 @@ +// Copyright (c) 2011 The Grumble Authors +// The use of this source code is goverened by a BSD-style +// license that can be found in the LICENSE-file. + +// This package implements a simple disk-persisted content-addressed +// blobstore. +package blobstore + +import ( + "crypto/sha1" + "encoding/hex" + "io/ioutil" + "path/filepath" + "os" + "strconv" + "syscall" +) + +type BlobStore struct { + dir string + lockfn string + makeall bool +} + +var ( + ErrLocked = os.NewError("lockfile acquired by another process") + ErrLockAcquirement = os.NewError("unable to acquire lockfile") + ErrBadFile = os.NewError("a bad file exists in the blobstore directory. unable to create container directores.") + ErrNoSuchKey = os.NewError("no such key") + ErrInvalidKey = os.NewError("invalid key") +) + +// Open an existing, or create a new BlobStore residing at path. +// Path must point to a directory, and must already exist. +// +// The makeall argument determines whether the BlobStore should +// create all possible blob-container directories a priori. +// This can take up a bit of disk space since the metadata for +// those directories can take up a lot of space. However, tt saves +// some I/O operations when writing blobs. (Since the BlobStore +// knows that all directories will exist, it does not need to check +// whether they do, and create them if they do not.). +func NewBlobStore(path string, makeall bool) (bs *BlobStore, err os.Error) { + // Does the directory exist? + dir, err := os.Open(path) + if err != nil { + return + } + dir.Close() + + // Try to acquire an exclusive lock on the blobstore. + lockfn := filepath.Join(path, "lock") + err = acquireLockfile(lockfn) + if err != nil { + return nil, err + } + // Make sure to remove the lockfile if we return with an error. + // It would be impossible for users to remove it (they wouldn't + // know the filename.) + defer func() { + if err != nil { + os.Remove(lockfn) + } + }() + + if makeall { + for i := 0; i < 256; i++ { + outer := filepath.Join(path, hex.EncodeToString([]byte{byte(i)})) + err = os.Mkdir(outer, 0700) + if e, ok := err.(*os.PathError); ok { + switch e.Error { + case os.EEXIST: + // The file alread exists. Stat it to check whether it is indeed + // a directory. + fi, err := os.Stat(outer) + if err != nil { + return nil, err + } + if !fi.IsDirectory() { + return nil, ErrBadFile + } + case os.ENOTDIR: + return nil, ErrBadFile + } + } else if err != nil { + return nil, err + } + for j := 0; j < 256; j++ { + inner := filepath.Join(outer, hex.EncodeToString([]byte{byte(j)})) + err = os.Mkdir(inner, 0700) + if e, ok := err.(*os.PathError); ok { + switch e.Error { + case os.EEXIST: + // The file alread exists. Stat it to check whether it is indeed + // a directory. + fi, err := os.Stat(inner) + if err != nil { + return nil, err + } + if !fi.IsDirectory() { + return nil, ErrBadFile + } + case os.ENOTDIR: + return nil, ErrBadFile + } + } else if err != nil { + return nil, err + } + } + } + } + + bs = &BlobStore{ + dir: path, + lockfn: lockfn, + makeall: makeall, + } + return bs, nil +} + +// Close an open BlobStore. This removes the lockfile allowing +// other processes to open the BlobStore. +func (bs *BlobStore) Close() (err os.Error) { + return os.Remove(bs.lockfn) +} + +// Acquire an exclusive lock for the BlobStore in directory dir. +func acquireLockfile(path string) os.Error { + dir, fn := filepath.Split(path) + lockfn := filepath.Join(dir, fn) + + lockfile, err := os.OpenFile(lockfn, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0600) + if e, ok := err.(*os.PathError); ok && e.Error == os.EEXIST { + content, err := ioutil.ReadFile(lockfn) + if err != nil { + return err + } + + pid, err := strconv.Atoi(string(content)) + if err == nil { + errno := syscall.Kill(pid, 0) + if errno == 0 { + return ErrLocked + } + } + + lockfile, err = ioutil.TempFile(dir, "lock") + if err != nil { + return err + } + + _, err = lockfile.WriteString(strconv.Itoa(syscall.Getpid())) + if err != nil { + lockfile.Close() + return ErrLockAcquirement + } + + curfn := lockfile.Name() + + err = lockfile.Close() + if err != nil { + return err + } + + err = os.Rename(curfn, lockfn) + if err != nil { + os.Remove(curfn) + return ErrLockAcquirement + } + } else if err != nil { + return err + } else { + _, err = lockfile.WriteString(strconv.Itoa(syscall.Getpid())) + if err != nil { + return err + } + lockfile.Close() + } + + return nil +} + +// Checks that a given key is a valid key for the BlobStore. +// If it is, it returns the three components that make up the on-disk path +// the given key can be found or should be stored at. +func getKeyComponents(key string) (dir1, dir2, fn string, err os.Error) { + // SHA1 digests are 40 bytes long when hex-encoded + if len(key) != 40 { + err = ErrInvalidKey + return + } + // Check whether the string is valid hex-encoding. + _, err = hex.DecodeString(key) + if err != nil { + err = ErrInvalidKey + return + } + + return key[0:2], key[2:4], key[4:], nil +} + +// Lookup the path hat a key would have on disk. +// Returns an error if the key is not a valid BlobStore key. +func (bs *BlobStore) pathForKey(key string) (fn string, err os.Error) { + dir1, dir2, rest, err := getKeyComponents(key) + if err != nil { + return + } + + fn = filepath.Join(bs.dir, dir1, dir2, rest) + return +} + +// Lookup a blob by its key and return a buffer containing the contents +// of the blob. +func (bs *BlobStore) Get(key string) (buf []byte, err os.Error) { + fn, err := bs.pathForKey(key) + if err != nil { + return + } + + file, err := os.Open(fn) + if e, ok := err.(*os.PathError); ok && e.Error == os.ENOENT { + err = ErrNoSuchKey + return + } else if err != nil { + return + } + + br, err := newBlobReader(file, key) + if err != nil { + file.Close() + return + } + defer br.Close() + + buf, err = ioutil.ReadAll(br) + if err != nil { + return + } + + return +} + +// Store a blob. If the blob was successfully stored, the returned key +// can be used to retrieve the buf from the BlobStore. +func (bs *BlobStore) Put(buf []byte) (key string, err os.Error) { + // Calculate the key for the blob. We can't really delay it more than this, + // since we need to know the key for the blob to check whether it's already on + // disk. + h := sha1.New() + h.Write(buf) + key = hex.EncodeToString(h.Sum()) + + // Get the components that make up the on-disk + // path for the blob. + dir1, dir2, rest, err := getKeyComponents(key) + if err != nil { + return + } + + blobpath := filepath.Join(bs.dir, dir1, dir2, rest) + blobdir := filepath.Join(bs.dir, dir1, dir2) + + // Check if the blob already exists. + file, err := os.Open(blobpath) + if err == nil { + // File exists. Job's done. + file.Close() + return + } else { + if e, ok := err.(*os.PathError); ok && e.Error == os.ENOENT { + // No such file exists on disk. Ready to rock! + } else { + return + } + } + + if !bs.makeall { + // Make sure the leading directories exist... + err = os.MkdirAll(blobdir, 0700) + if err != nil { + return + } + } + + // Create a temporary file to write to. Once we're done, we + // can atomically rename the file to the correct key. + file, err = ioutil.TempFile(blobdir, rest) + if err != nil { + return + } + + tmpfn := file.Name() + + _, err = file.Write(buf) + if err != nil { + return + } + + err = file.Sync() + if err != nil { + return "", err + } + + err = file.Close() + if err != nil { + return "", err + } + + err = os.Rename(tmpfn, blobpath) + if err != nil { + os.Remove(tmpfn) + return + } + + return key, nil +} diff --git a/pkg/blobstore/blobstore_test.go b/pkg/blobstore/blobstore_test.go new file mode 100644 index 0000000..384ca70 --- /dev/null +++ b/pkg/blobstore/blobstore_test.go @@ -0,0 +1,207 @@ +// Copyright (c) 2011 The Grumble Authors +// The use of this source code is goverened by a BSD-style +// license that can be found in the LICENSE-file. + +package blobstore + +import ( + "bytes" + "crypto/sha1" + "encoding/hex" + "io/ioutil" + "os" + "path/filepath" + "testing" +) + +func TestMakeAllCreateAll(t *testing.T) { + dir, err := ioutil.TempDir("", "blobstore") + if err != nil { + t.Errorf(err.String()) + return + } + defer os.RemoveAll(dir) + + bs, err := NewBlobStore(dir, true) + if err != nil { + t.Errorf(err.String()) + return + } + defer bs.Close() + + // Check whether the blobstore created all the directories... + for i := 0; i < 256; i++ { + for j := 0; j < 256; j++ { + dirname := filepath.Join(dir, hex.EncodeToString([]byte{byte(i)}), hex.EncodeToString([]byte{byte(j)})) + fi, err := os.Stat(dirname) + if err != nil { + t.Errorf(err.String()) + } + if !fi.IsDirectory() { + t.Errorf("Not a directory") + } + } + } +} + +func TestAllInvalidFiles(t *testing.T) { + dir, err := ioutil.TempDir("", "blobstore") + if err != nil { + t.Errorf(err.String()) + return + } + defer os.RemoveAll(dir) + + err = ioutil.WriteFile(filepath.Join(dir, "00"), []byte{0x0f, 0x00}, 0600) + if err != nil { + t.Errorf(err.String()) + } + + _, err = NewBlobStore(dir, true) + if err == ErrBadFile { + // Success + } else if err != nil { + t.Errorf(err.String()) + } else { + t.Errorf("NewBlobStore returned without error") + } +} + +func TestAllInvalidFilesLevel2(t *testing.T) { + dir, err := ioutil.TempDir("", "blobstore") + if err != nil { + t.Errorf(err.String()) + return + } + defer os.RemoveAll(dir) + + err = os.Mkdir(filepath.Join(dir, "00"), 0700) + if err != nil { + t.Errorf(err.String()) + } + + err = ioutil.WriteFile(filepath.Join(dir, "00", "00"), []byte{0x0f, 0x00}, 0600) + if err != nil { + t.Errorf(err.String()) + } + + _, err = NewBlobStore(dir, true) + if err == ErrBadFile { + // Success + } else if err != nil { + t.Errorf(err.String()) + } else { + t.Errorf("NewBlobStore returned without error") + } +} + +func TestStoreRetrieve(t *testing.T) { + dir, err := ioutil.TempDir("", "blobstore") + if err != nil { + t.Errorf(err.String()) + return + } + defer os.RemoveAll(dir) + + bs, err := NewBlobStore(dir, false) + if err != nil { + t.Errorf(err.String()) + return + } + defer bs.Close() + + data := []byte{0xde, 0xad, 0xca, 0xfe, 0xba, 0xbe, 0xbe, 0xef} + + key, err := bs.Put(data) + if err != nil { + t.Errorf(err.String()) + return + } + + recv, err := bs.Get(key) + if err != nil { + t.Errorf(err.String()) + } + + if !bytes.Equal(recv, data) { + t.Errorf("stored data and retrieved data does not match: %v vs. %v", recv, data) + } +} + +func TestReadNonExistantKey(t *testing.T) { + dir, err := ioutil.TempDir("", "blobstore") + if err != nil { + t.Errorf(err.String()) + return + } + defer os.RemoveAll(dir) + + bs, err := NewBlobStore(dir, false) + if err != nil { + t.Errorf(err.String()) + return + } + defer bs.Close() + + h := sha1.New() + h.Write([]byte{0x42}) + key := hex.EncodeToString(h.Sum()) + buf, err := bs.Get(key) + if err != ErrNoSuchKey { + t.Errorf("Expected no such key %v, found it anyway. (buf=%v, err=%v)", key, buf, err) + return + } +} + +func TestReadInvalidKeyLength(t *testing.T) { + dir, err := ioutil.TempDir("", "blobstore") + if err != nil { + t.Errorf(err.String()) + } + defer os.RemoveAll(dir) + + bs, err := NewBlobStore(dir, false) + if err != nil { + t.Errorf(err.String()) + return + } + defer bs.Close() + + key := "" + for i := 0; i < 5; i++ { + key += "0" + } + + _, err = bs.Get(key) + if err != ErrInvalidKey { + t.Errorf("Expected invalid key for %v, got %v", key, err) + return + } +} + +func TestReadInvalidKeyNonHex(t *testing.T) { + dir, err := ioutil.TempDir("", "blobstore") + if err != nil { + t.Errorf(err.String()) + return + } + defer os.RemoveAll(dir) + + bs, err := NewBlobStore(dir, false) + if err != nil { + t.Errorf(err.String()) + return + } + defer bs.Close() + + key := "" + for i := 0; i < 40; i++ { + key += "i" + } + + _, err = bs.Get(key) + if err != ErrInvalidKey { + t.Errorf("Expected invalid key for %v, got %v", key, err) + return + } +}