From 19a388bfb8d65569e21d55e7d7bc6b40a1c1da5d Mon Sep 17 00:00:00 2001 From: Yurzs Date: Sun, 1 Sep 2024 23:35:58 +0700 Subject: [PATCH 1/2] api: expose tokenize and detokenize endpoints --- api/client.go | 18 +++++++++++++++++ api/types.go | 38 +++++++++++++++++++++++++++++++++++ server/routes.go | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+) diff --git a/api/client.go b/api/client.go index 2528fb21..e1e8ff55 100644 --- a/api/client.go +++ b/api/client.go @@ -360,6 +360,24 @@ func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*Embedd return &resp, nil } +// Tokenize tokenizes a string. +func (c *Client) Tokenize(ctx context.Context, req *TokenizeRequest) (*TokenizeResponse, error) { + var resp TokenizeResponse + if err := c.do(ctx, http.MethodPost, "/api/tokenize", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +// Detokenize detokenizes a string. +func (c *Client) Detokenize(ctx context.Context, req *DetokenizeRequest) (*DetokenizeResponse, error) { + var resp DetokenizeResponse + if err := c.do(ctx, http.MethodPost, "/api/detokenize", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + // CreateBlob creates a blob from a file on the server. digest is the // expected SHA256 digest of the file, and r represents the file. func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error { diff --git a/api/types.go b/api/types.go index df7bab21..13cb236f 100644 --- a/api/types.go +++ b/api/types.go @@ -293,6 +293,44 @@ type EmbeddingResponse struct { Embedding []float64 `json:"embedding"` } +// TokenizeRequest is the request passed to [Client.Tokenize]. +type TokenizeRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + + // KeepAlive controls how long the model will stay loaded in memory following + // this request. + KeepAlive *Duration `json:"keep_alive,omitempty"` + + // Options lists model-specific options. + Options map[string]interface{} `json:"options"` +} + +// TokenizeResponse is the response from [Client.Tokenize]. +type TokenizeResponse struct { + Model string `json:"model"` + Tokens []int `json:"tokens"` +} + +// DetokenizeRequest is the request passed to [Client.Detokenize]. +type DetokenizeRequest struct { + Model string `json:"model"` + Tokens []int `json:"tokens"` + + // KeepAlive controls how long the model will stay loaded in memory following + // this request. + KeepAlive *Duration `json:"keep_alive,omitempty"` + + // Options lists model-specific options. + Options map[string]interface{} `json:"options"` +} + +// DetokenizeResponse is the response from [Client.Detokenize]. +type DetokenizeResponse struct { + Model string `json:"model"` + Text string `json:"text"` +} + // CreateRequest is the request passed to [Client.Create]. type CreateRequest struct { Model string `json:"model"` diff --git a/server/routes.go b/server/routes.go index 5e9f51e1..c9f2bf69 100644 --- a/server/routes.go +++ b/server/routes.go @@ -463,6 +463,56 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) { c.JSON(http.StatusOK, resp) } +func (s *Server) TokenizeHandler(c *gin.Context) { + var req api.TokenizeRequest + if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"}) + return + } else if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive) + if err != nil { + handleScheduleError(c, req.Model, err) + return + } + + tokens, err := r.Tokenize(c.Request.Context(), req.Prompt) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + + c.JSON(http.StatusOK, api.TokenizeResponse{Model: req.Model, Tokens: tokens}) +} + +func (s *Server) DetokenizeHandler(c *gin.Context) { + var req api.DetokenizeRequest + if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"}) + return + } else if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive) + if err != nil { + handleScheduleError(c, req.Model, err) + return + } + + text, err := r.Detokenize(c.Request.Context(), req.Tokens) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + + c.JSON(http.StatusOK, api.DetokenizeResponse{Model: req.Model, Text: text}) +} + func (s *Server) PullHandler(c *gin.Context) { var req api.PullRequest err := c.ShouldBindJSON(&req) @@ -1086,6 +1136,8 @@ func (s *Server) GenerateRoutes() http.Handler { r.POST("/api/chat", s.ChatHandler) r.POST("/api/embed", s.EmbedHandler) r.POST("/api/embeddings", s.EmbeddingsHandler) + r.POST("/api/tokenize", s.TokenizeHandler) + r.POST("/api/detokenize", s.DetokenizeHandler) r.POST("/api/create", s.CreateHandler) r.POST("/api/push", s.PushHandler) r.POST("/api/copy", s.CopyHandler) From cf2fb5e0ea1a88380467b91dc64f8027edf3de1d Mon Sep 17 00:00:00 2001 From: Yurzs Date: Mon, 2 Sep 2024 00:06:36 +0700 Subject: [PATCH 2/2] docs: add tokenize and detokenize api --- docs/api.md | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/docs/api.md b/docs/api.md index aed2b69f..39baa337 100644 --- a/docs/api.md +++ b/docs/api.md @@ -13,6 +13,8 @@ - [Push a Model](#push-a-model) - [Generate Embeddings](#generate-embeddings) - [List Running Models](#list-running-models) +- [Tokenize Text](#tokenize-text) +- [Detokenize Text](#detokenize-text) ## Conventions @@ -1250,6 +1252,83 @@ A single JSON object will be returned. } ``` +## Tokenize Text + +Tokenize text using a model + +```shell +POST /api/tokenize +``` + +##### Parameters + +- `model`: name of model to generate tokens from +- `prompt`: text to generate tokens for + +##### Advanced parameters: + +- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` +- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`) + +### Examples + +#### Request + +```shell +curl -X POST http://localhost:11434/api/tokenize -d '{ + "model": "llama3.1:8b", + "prompt": "Why the sky is blue?" +}' +``` + +#### Response + +```json +{ + "model": "llama3.1:8b", + "tokens": [10445,279,13180,374,6437,30] +} +``` + +## Detokenize Text + +Detokenize text using a model + +```shell +POST /api/detokenize +``` + +#### Parameters + +- `model`: name of model to generate text from +- `tokens`: list of tokens to generate text from + +##### Advanced parameters: + +- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` +- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`) + +### Examples + +#### Request + +```shell +curl -X POST http://localhost:11434/api/detokenize -d '{ + "model": "llama3.1:8b", + "tokens": [10445,279,13180,374,6437,30] +}' +``` + +#### Response + +```json +{ + "model": "llama3.1:8b", + "text": "Why the sky is blue?" +} +``` + + ## Generate Embedding > Note: this endpoint has been superseded by `/api/embed`