From 876f7eab812449fbae8d3f24594f67acd3c1bc9a Mon Sep 17 00:00:00 2001 From: Blake Mizerany Date: Mon, 1 Apr 2024 21:43:30 -0700 Subject: [PATCH] build: move Manifest from internal/blobstore to build It was getting confusing to have the arbirary handling of manifests in the blobstore. It also prevented us from using model.Ref in the blobstore because of cyclic dependencies. This is much easier to grok now. --- build/build.go | 57 +++++++++++++++++++++-- build/internal/blobstore/blob.go | 62 +++----------------------- build/internal/blobstore/store_test.go | 33 -------------- registry/server.go | 4 +- 4 files changed, 63 insertions(+), 93 deletions(-) diff --git a/build/build.go b/build/build.go index 374ba827..f668bb73 100644 --- a/build/build.go +++ b/build/build.go @@ -6,6 +6,7 @@ import ( "fmt" "io/fs" "os" + "path/filepath" "bllamo.com/build/blob" "bllamo.com/build/internal/blobstore" @@ -20,6 +21,7 @@ var ( ErrMissingFileType = errors.New("missing 'general.file_type' key") ErrNoSuchBlob = errors.New("no such blob") ErrNotFound = errors.New("not found") + ErrUnknownRef = errors.New("unknown ref") ) type mediaType string @@ -96,7 +98,7 @@ func (s *Server) Build(ref string, f model.File) error { if err != nil { return err } - return s.st.Set(br.WithBuild(info.FileType.String()), data) + return s.Set(br.WithBuild(info.FileType.String()), data) } func (s *Server) LayerFile(digest string) (string, error) { @@ -135,6 +137,55 @@ func (s *Server) WeightsFile(ref string) (string, error) { return "", fmt.Errorf("missing weights layer for %q", ref) } +// resolve returns the data for the given ref, if any. +// +// TODO: This should ideally return an ID, but the current on +// disk layout is that the actual manifest is stored in the "ref" instead of +// a pointer to a content-addressed blob. I (bmizerany) think we should +// change the on-disk layout to store the manifest in a content-addressed +// blob, and then have the ref point to that blob. This would simplify the +// code, allow us to have integrity checks on the manifest, and clean up +// this interface. +func (s *Server) resolve(ref blob.Ref) (data []byte, path string, err error) { + path, err = s.refFileName(ref) + if err != nil { + return nil, "", err + } + data, err = os.ReadFile(path) + if errors.Is(err, fs.ErrNotExist) { + return nil, "", fmt.Errorf("%w: %q", ErrUnknownRef, ref) + } + if err != nil { + // do not wrap the error here, as it is likely an I/O error + // and we want to preserve the absraction since we may not + // be on disk later. + return nil, "", fmt.Errorf("manifest read error: %v", err) + } + return data, path, nil +} + +// Set sets the data for the given ref. +func (s *Server) Set(ref blob.Ref, data []byte) error { + path, err := s.refFileName(ref) + if err != nil { + return err + } + if err := os.MkdirAll(filepath.Dir(path), 0777); err != nil { + return err + } + if err := os.WriteFile(path, data, 0666); err != nil { + return err + } + return nil +} + +func (s *Server) refFileName(ref blob.Ref) (string, error) { + if !ref.Complete() { + return "", fmt.Errorf("ref not fully qualified: %q", ref) + } + return filepath.Join(s.st.Dir(), "manifests", filepath.Join(ref.Parts()...)), nil +} + type manifestJSON struct { // Layers is the list of layers in the manifest. Layers []layerJSON `json:"layers"` @@ -161,8 +212,8 @@ func (s *Server) getManifest(ref blob.Ref) (manifestJSON, error) { } func (s *Server) getManifestData(ref blob.Ref) (data []byte, path string, err error) { - data, path, err = s.st.Resolve(ref) - if errors.Is(err, blobstore.ErrUnknownRef) { + data, path, err = s.resolve(ref) + if errors.Is(err, ErrUnknownRef) { return nil, "", fmt.Errorf("%w: %q", ErrNotFound, ref) } return data, path, err diff --git a/build/internal/blobstore/blob.go b/build/internal/blobstore/blob.go index 18664abe..24dbad6f 100644 --- a/build/internal/blobstore/blob.go +++ b/build/internal/blobstore/blob.go @@ -13,13 +13,11 @@ import ( "strings" "time" - "bllamo.com/build/blob" "bllamo.com/types/structs" ) var ( - ErrInvalidID = errors.New("invalid ID") - ErrUnknownRef = errors.New("unknown ref") + ErrInvalidID = errors.New("invalid ID") ) const HashSize = 32 @@ -100,13 +98,9 @@ func Open(dir string) (*Store, error) { if !info.IsDir() { return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} } - - for _, sub := range []string{"blobs", "manifests"} { - if err := os.MkdirAll(filepath.Join(dir, sub), 0777); err != nil { - return nil, err - } + if err := os.MkdirAll(filepath.Join(dir, "blobs"), 0777); err != nil { + return nil, err } - c := &Store{ dir: dir, now: time.Now, @@ -114,6 +108,10 @@ func Open(dir string) (*Store, error) { return c, nil } +func (s *Store) Dir() string { + return s.dir +} + // fileName returns the name of the blob file corresponding to the given id. func (s *Store) fileName(id ID) string { return filepath.Join(s.dir, "blobs", fmt.Sprintf("sha256-%x", id.a[:])) @@ -185,52 +183,6 @@ func (s *Store) OutputFilename(id ID) string { return file } -// Resolve returns the data for the given ref, if any. -// -// TODO: This should ideally return an ID, but the current on -// disk layout is that the actual manifest is stored in the "ref" instead of -// a pointer to a content-addressed blob. I (bmizerany) think we should -// change the on-disk layout to store the manifest in a content-addressed -// blob, and then have the ref point to that blob. This would simplify the -// code, allow us to have integrity checks on the manifest, and clean up -// this interface. -func (s *Store) Resolve(ref blob.Ref) (data []byte, path string, err error) { - path, err = s.refFileName(ref) - if err != nil { - return nil, "", err - } - data, err = os.ReadFile(path) - if errors.Is(err, fs.ErrNotExist) { - return nil, "", fmt.Errorf("%w: %q", ErrUnknownRef, ref) - } - if err != nil { - return nil, "", &entryNotFoundError{Err: err} - } - return data, path, nil -} - -// Set sets the data for the given ref. -func (s *Store) Set(ref blob.Ref, data []byte) error { - path, err := s.refFileName(ref) - if err != nil { - return err - } - if err := os.MkdirAll(filepath.Dir(path), 0777); err != nil { - return err - } - if err := os.WriteFile(path, data, 0666); err != nil { - return err - } - return nil -} - -func (s *Store) refFileName(ref blob.Ref) (string, error) { - if !ref.Complete() { - return "", fmt.Errorf("ref not fully qualified: %q", ref) - } - return filepath.Join(s.dir, "manifests", filepath.Join(ref.Parts()...)), nil -} - // Get looks up the blob ID in the store, // returning the corresponding output ID and file size, if any. // Note that finding an output ID does not guarantee that the diff --git a/build/internal/blobstore/store_test.go b/build/internal/blobstore/store_test.go index 6f698f9f..0e64fc1a 100644 --- a/build/internal/blobstore/store_test.go +++ b/build/internal/blobstore/store_test.go @@ -31,7 +31,6 @@ func TestStoreBasicBlob(t *testing.T) { checkDir(t, dir, []string{ "blobs/", - "manifests/", }) id, size, err := PutBytes(st, []byte("hello")) @@ -49,7 +48,6 @@ func TestStoreBasicBlob(t *testing.T) { checkDir(t, dir, []string{ "blobs/", "blobs/" + blobNameHello, - "manifests/", }) got, err := st.Get(id) @@ -74,37 +72,6 @@ func TestStoreBasicBlob(t *testing.T) { t.Logf("RESOLVING: %q", ref.Parts()) - data, _, err := st.Resolve(ref) - if !errors.Is(err, ErrUnknownRef) { - t.Fatalf("unexpected error: %v", err) - } - if data != nil { - t.Errorf("unexpected data: %q", data) - } - - if err := st.Set(ref, []byte("{}")); err != nil { - t.Fatal(err) - } - - data, _, err = st.Resolve(ref) - if err != nil { - t.Fatal(err) - } - - if g := string(data); g != "{}" { - t.Errorf("g = %q; want %q", g, "{}") - } - - checkDir(t, dir, []string{ - "blobs/", - "blobs/sha256-2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824", - "manifests/", - "manifests/registry.ollama.ai/", - "manifests/registry.ollama.ai/library/", - "manifests/registry.ollama.ai/library/test/", - "manifests/registry.ollama.ai/library/test/latest/", - "manifests/registry.ollama.ai/library/test/latest/KQED", - }) } // checkDir checks that the directory at dir contains the files in want. The diff --git a/registry/server.go b/registry/server.go index f22d0b1f..1dbc6392 100644 --- a/registry/server.go +++ b/registry/server.go @@ -61,7 +61,7 @@ func (s *Server) handlePush(w http.ResponseWriter, r *http.Request) error { } ref := blob.ParseRef(pr.Ref) - if !ref.FullyQualified() { + if !ref.Complete() { return oweb.Mistake("invalid", "name", "must be fully qualified") } @@ -106,7 +106,7 @@ func (s *Server) handlePush(w http.ResponseWriter, r *http.Request) error { if len(requirements) == 0 { // Commit the manifest body := bytes.NewReader(pr.Manifest) - path := path.Join("manifests", ref.Path()) + path := path.Join("manifests", path.Join(ref.Parts()...)) _, err := mc.PutObject(r.Context(), "test", path, body, int64(len(pr.Manifest)), minio.PutObjectOptions{}) if err != nil { return err