diff --git a/build/build.go b/build/build.go index 374ba827..f668bb73 100644 --- a/build/build.go +++ b/build/build.go @@ -6,6 +6,7 @@ import ( "fmt" "io/fs" "os" + "path/filepath" "bllamo.com/build/blob" "bllamo.com/build/internal/blobstore" @@ -20,6 +21,7 @@ var ( ErrMissingFileType = errors.New("missing 'general.file_type' key") ErrNoSuchBlob = errors.New("no such blob") ErrNotFound = errors.New("not found") + ErrUnknownRef = errors.New("unknown ref") ) type mediaType string @@ -96,7 +98,7 @@ func (s *Server) Build(ref string, f model.File) error { if err != nil { return err } - return s.st.Set(br.WithBuild(info.FileType.String()), data) + return s.Set(br.WithBuild(info.FileType.String()), data) } func (s *Server) LayerFile(digest string) (string, error) { @@ -135,6 +137,55 @@ func (s *Server) WeightsFile(ref string) (string, error) { return "", fmt.Errorf("missing weights layer for %q", ref) } +// resolve returns the data for the given ref, if any. +// +// TODO: This should ideally return an ID, but the current on +// disk layout is that the actual manifest is stored in the "ref" instead of +// a pointer to a content-addressed blob. I (bmizerany) think we should +// change the on-disk layout to store the manifest in a content-addressed +// blob, and then have the ref point to that blob. This would simplify the +// code, allow us to have integrity checks on the manifest, and clean up +// this interface. +func (s *Server) resolve(ref blob.Ref) (data []byte, path string, err error) { + path, err = s.refFileName(ref) + if err != nil { + return nil, "", err + } + data, err = os.ReadFile(path) + if errors.Is(err, fs.ErrNotExist) { + return nil, "", fmt.Errorf("%w: %q", ErrUnknownRef, ref) + } + if err != nil { + // do not wrap the error here, as it is likely an I/O error + // and we want to preserve the absraction since we may not + // be on disk later. + return nil, "", fmt.Errorf("manifest read error: %v", err) + } + return data, path, nil +} + +// Set sets the data for the given ref. +func (s *Server) Set(ref blob.Ref, data []byte) error { + path, err := s.refFileName(ref) + if err != nil { + return err + } + if err := os.MkdirAll(filepath.Dir(path), 0777); err != nil { + return err + } + if err := os.WriteFile(path, data, 0666); err != nil { + return err + } + return nil +} + +func (s *Server) refFileName(ref blob.Ref) (string, error) { + if !ref.Complete() { + return "", fmt.Errorf("ref not fully qualified: %q", ref) + } + return filepath.Join(s.st.Dir(), "manifests", filepath.Join(ref.Parts()...)), nil +} + type manifestJSON struct { // Layers is the list of layers in the manifest. Layers []layerJSON `json:"layers"` @@ -161,8 +212,8 @@ func (s *Server) getManifest(ref blob.Ref) (manifestJSON, error) { } func (s *Server) getManifestData(ref blob.Ref) (data []byte, path string, err error) { - data, path, err = s.st.Resolve(ref) - if errors.Is(err, blobstore.ErrUnknownRef) { + data, path, err = s.resolve(ref) + if errors.Is(err, ErrUnknownRef) { return nil, "", fmt.Errorf("%w: %q", ErrNotFound, ref) } return data, path, err diff --git a/build/internal/blobstore/blob.go b/build/internal/blobstore/blob.go index 18664abe..24dbad6f 100644 --- a/build/internal/blobstore/blob.go +++ b/build/internal/blobstore/blob.go @@ -13,13 +13,11 @@ import ( "strings" "time" - "bllamo.com/build/blob" "bllamo.com/types/structs" ) var ( - ErrInvalidID = errors.New("invalid ID") - ErrUnknownRef = errors.New("unknown ref") + ErrInvalidID = errors.New("invalid ID") ) const HashSize = 32 @@ -100,13 +98,9 @@ func Open(dir string) (*Store, error) { if !info.IsDir() { return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} } - - for _, sub := range []string{"blobs", "manifests"} { - if err := os.MkdirAll(filepath.Join(dir, sub), 0777); err != nil { - return nil, err - } + if err := os.MkdirAll(filepath.Join(dir, "blobs"), 0777); err != nil { + return nil, err } - c := &Store{ dir: dir, now: time.Now, @@ -114,6 +108,10 @@ func Open(dir string) (*Store, error) { return c, nil } +func (s *Store) Dir() string { + return s.dir +} + // fileName returns the name of the blob file corresponding to the given id. func (s *Store) fileName(id ID) string { return filepath.Join(s.dir, "blobs", fmt.Sprintf("sha256-%x", id.a[:])) @@ -185,52 +183,6 @@ func (s *Store) OutputFilename(id ID) string { return file } -// Resolve returns the data for the given ref, if any. -// -// TODO: This should ideally return an ID, but the current on -// disk layout is that the actual manifest is stored in the "ref" instead of -// a pointer to a content-addressed blob. I (bmizerany) think we should -// change the on-disk layout to store the manifest in a content-addressed -// blob, and then have the ref point to that blob. This would simplify the -// code, allow us to have integrity checks on the manifest, and clean up -// this interface. -func (s *Store) Resolve(ref blob.Ref) (data []byte, path string, err error) { - path, err = s.refFileName(ref) - if err != nil { - return nil, "", err - } - data, err = os.ReadFile(path) - if errors.Is(err, fs.ErrNotExist) { - return nil, "", fmt.Errorf("%w: %q", ErrUnknownRef, ref) - } - if err != nil { - return nil, "", &entryNotFoundError{Err: err} - } - return data, path, nil -} - -// Set sets the data for the given ref. -func (s *Store) Set(ref blob.Ref, data []byte) error { - path, err := s.refFileName(ref) - if err != nil { - return err - } - if err := os.MkdirAll(filepath.Dir(path), 0777); err != nil { - return err - } - if err := os.WriteFile(path, data, 0666); err != nil { - return err - } - return nil -} - -func (s *Store) refFileName(ref blob.Ref) (string, error) { - if !ref.Complete() { - return "", fmt.Errorf("ref not fully qualified: %q", ref) - } - return filepath.Join(s.dir, "manifests", filepath.Join(ref.Parts()...)), nil -} - // Get looks up the blob ID in the store, // returning the corresponding output ID and file size, if any. // Note that finding an output ID does not guarantee that the diff --git a/build/internal/blobstore/store_test.go b/build/internal/blobstore/store_test.go index 6f698f9f..0e64fc1a 100644 --- a/build/internal/blobstore/store_test.go +++ b/build/internal/blobstore/store_test.go @@ -31,7 +31,6 @@ func TestStoreBasicBlob(t *testing.T) { checkDir(t, dir, []string{ "blobs/", - "manifests/", }) id, size, err := PutBytes(st, []byte("hello")) @@ -49,7 +48,6 @@ func TestStoreBasicBlob(t *testing.T) { checkDir(t, dir, []string{ "blobs/", "blobs/" + blobNameHello, - "manifests/", }) got, err := st.Get(id) @@ -74,37 +72,6 @@ func TestStoreBasicBlob(t *testing.T) { t.Logf("RESOLVING: %q", ref.Parts()) - data, _, err := st.Resolve(ref) - if !errors.Is(err, ErrUnknownRef) { - t.Fatalf("unexpected error: %v", err) - } - if data != nil { - t.Errorf("unexpected data: %q", data) - } - - if err := st.Set(ref, []byte("{}")); err != nil { - t.Fatal(err) - } - - data, _, err = st.Resolve(ref) - if err != nil { - t.Fatal(err) - } - - if g := string(data); g != "{}" { - t.Errorf("g = %q; want %q", g, "{}") - } - - checkDir(t, dir, []string{ - "blobs/", - "blobs/sha256-2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824", - "manifests/", - "manifests/registry.ollama.ai/", - "manifests/registry.ollama.ai/library/", - "manifests/registry.ollama.ai/library/test/", - "manifests/registry.ollama.ai/library/test/latest/", - "manifests/registry.ollama.ai/library/test/latest/KQED", - }) } // checkDir checks that the directory at dir contains the files in want. The diff --git a/registry/server.go b/registry/server.go index f22d0b1f..1dbc6392 100644 --- a/registry/server.go +++ b/registry/server.go @@ -61,7 +61,7 @@ func (s *Server) handlePush(w http.ResponseWriter, r *http.Request) error { } ref := blob.ParseRef(pr.Ref) - if !ref.FullyQualified() { + if !ref.Complete() { return oweb.Mistake("invalid", "name", "must be fully qualified") } @@ -106,7 +106,7 @@ func (s *Server) handlePush(w http.ResponseWriter, r *http.Request) error { if len(requirements) == 0 { // Commit the manifest body := bytes.NewReader(pr.Manifest) - path := path.Join("manifests", ref.Path()) + path := path.Join("manifests", path.Join(ref.Parts()...)) _, err := mc.PutObject(r.Context(), "test", path, body, int64(len(pr.Manifest)), minio.PutObjectOptions{}) if err != nil { return err