From bfff252fa936f3a23380aae3e75debc8b1d70c4c Mon Sep 17 00:00:00 2001 From: JD Davis Date: Fri, 14 Jun 2024 13:35:15 -0500 Subject: [PATCH] chore: converted from quoted strings to multiline --- specs/openapi-3.1.yaml | 298 ++++++++++++++++++++++++++++++++--------- 1 file changed, 234 insertions(+), 64 deletions(-) diff --git a/specs/openapi-3.1.yaml b/specs/openapi-3.1.yaml index b601c51e..dba03d79 100644 --- a/specs/openapi-3.1.yaml +++ b/specs/openapi-3.1.yaml @@ -35,8 +35,16 @@ paths: operationId: generateResponse tags: - generate - description: Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request. - summary: Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request. + description: | + Generate a response for a given prompt with a provided model. This is + a streaming endpoint, so there will be a series of responses. The + final response object will include statistics and additional data from + the request. + summary: | + Generate a response for a given prompt with a provided model. This is + a streaming endpoint, so there will be a series of responses. The final + response object will include statistics and additional data from the + request. requestBody: required: true description: Request to generate a response @@ -58,8 +66,16 @@ paths: tags: - chat - generate - description: 'Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. Streaming can be disabled using "stream": false. The final response object will include statistics and additional data from the request.' - summary: 'Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. Streaming can be disabled using "stream": false. The final response object will include statistics and additional data from the request.' + description: | + Generate the next message in a chat with a provided model. This is a + streaming endpoint, so there will be a series of responses. Streaming + can be disabled using "stream": false. The final response object will + include statistics and additional data from the request. + summary: | + Generate the next message in a chat with a provided model. This is a + streaming endpoint, so there will be a series of responses. Streaming + can be disabled using "stream": false. The final response object will + include statistics and additional data from the request. requestBody: required: true description: Request to generate a response in a chat @@ -80,8 +96,20 @@ paths: operationId: createModel tags: - models - description: Create a model from a Modelfile. It is recommended to set modelfile to the content of the Modelfile rather than just set path. This is a requirement for remote create. Remote model creation must also create any file blobs, fields such as FROM and ADAPTER, explicitly with the server using Create a Blob and the value to the path indicated in the response. - summary: 'Create a model from a Modelfile. It is recommended to set modelfile to the content of the Modelfile rather than just set path. This is a requirement for remote create. Remote model creation must also create any file blobs, fields such as FROM and ADAPTER, explicitly with the server using Create a Blob and the value to the path indicated in the response.' + description: | + Create a model from a Modelfile. It is recommended to set modelfile + to the content of the Modelfile rather than just set path. This is a + requirement for remote create. Remote model creation must also create + any file blobs, fields such as FROM and ADAPTER, explicitly with the + server using Create a Blob and the value to the path indicated in the + response. + summary: | + Create a model from a Modelfile. It is recommended to set modelfile to + the content of the Modelfile rather than just set path. This is a + requirement for remote create. Remote model creation must also create + any file blobs, fields such as FROM and ADAPTER, explicitly with the + server using Create a Blob and the value to the path indicated in the + response. requestBody: required: true description: Request to create a model @@ -102,8 +130,12 @@ paths: operationId: getBlob tags: - blobs - description: Ensures that the file blob used for a FROM or ADAPTER field exists on the server. This is checking your Ollama server and not Ollama.ai. - summary: 'Ensures that the file blob used for a FROM or ADAPTER field exists on the server. This is checking your Ollama server and not Ollama.ai.' + description: | + Ensures that the file blob used for a FROM or ADAPTER field exists on + the server. This is checking your Ollama server and not Ollama.ai. + summary: | + Ensures that the file blob used for a FROM or ADAPTER field exists on + the server. This is checking your Ollama server and not Ollama.ai. parameters: - name: digest in: path @@ -164,8 +196,12 @@ paths: operationId: showModel tags: - models - description: Show information about a model including details, modelfile, template, parameters, license, and system prompt. - summary: 'Show information about a model including details, modelfile, template, parameters, license, and system prompt.' + description: | + Show information about a model including details, modelfile, template, + parameters, license, and system prompt. + summary: | + Show information about a model including details, modelfile, template, + parameters, license, and system prompt. requestBody: required: true description: Request to show a model @@ -186,8 +222,10 @@ paths: operationId: copyModel tags: - models - description: Copy a model. Creates a model with another name from an existing model. - summary: 'Copy a model. Creates a model with another name from an existing model.' + description: | + Copy a model. Creates a model with another name from an existing model. + summary: | + Copy a model. Creates a model with another name from an existing model. requestBody: required: true description: Request to copy a model @@ -226,8 +264,14 @@ paths: operationId: pullModel tags: - models - description: Download a model from the ollama library. Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress. - summary: 'Download a model from the ollama library. Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress.' + description: | + Download a model from the ollama library. Cancelled pulls are resumed + from where they left off, and multiple calls will share the same + download progress. + summary: | + Download a model from the ollama library. Cancelled pulls are resumed + from where they left off, and multiple calls will share the same + download progress. requestBody: required: true description: Request to pull a model @@ -248,8 +292,12 @@ paths: operationId: pushModel tags: - models - description: Upload a model to a model library. Requires registering for ollama.ai and adding a public key first. - summary: 'Upload a model to a model library. Requires registering for ollama.ai and adding a public key first.' + description: | + Upload a model to a model library. Requires registering for ollama.ai + and adding a public key first. + summary: | + Upload a model to a model library. Requires registering for ollama.ai + and adding a public key first.' requestBody: required: true description: Request to push a model @@ -340,30 +388,44 @@ components: items: type: string format: byte - description: A list of base64-encoded images (for multimodal models such as llava) + description: | + A list of base64-encoded images (for multimodal models such as + llava) format: type: string - description: The format to return a response in. Currently the only accepted value is json + description: | + The format to return a response in. Currently the only accepted + value is json options: $ref: '#/components/schemas/Options' system: type: string - description: System message to (overrides what is defined in the Modelfile) + description: | + System message to (overrides what is defined in the Modelfile) template: type: string - description: The prompt template to use (overrides what is defined in the Modelfile) + description: | + The prompt template to use (overrides what is defined in the + Modelfile) context: type: array items: type: integer - description: The context parameter returned from a previous request to /generate, this can be used to keep a short conversational memory + description: | + The context parameter returned from a previous request to generate, + this can be used to keep a short conversational memory example: [] stream: type: boolean - description: If false the response will be returned as a single response object, rather than a stream of objects + description: | + If false the response will be returned as a single response object, + rather than a stream of objects raw: type: boolean - description: If true no formatting will be applied to the prompt. You may choose to use the raw parameter if you are specifying a full templated prompt in your request to the API + description: | + If true no formatting will be applied to the prompt. You may choose + to use the raw parameter if you are specifying a full templated + prompt in your request to the API keep_alive: $ref: '#/components/schemas/Duration' required: @@ -383,7 +445,9 @@ components: description: Timestamp of the response response: type: string - description: The textual response itself. When done, empty if the response was streamed, if not streamed, this will contain the full response + description: | + The textual response itself. When done, empty if the response was + streamed, if not streamed, this will contain the full response done: type: boolean description: Specifies if the response is complete @@ -391,7 +455,8 @@ components: type: array items: type: integer - description: When done, encoding of the conversation used in this response + description: | + When done, encoding of the conversation used in this response total_duration: type: number description: When done, time spent generating the response @@ -403,13 +468,15 @@ components: description: When done, number of tokens in the prompt prompt_eval_duration: type: number - description: When done, time spent in nanoseconds evaluating the prompt + description: | + When done, time spent in nanoseconds evaluating the prompt eval_count: type: integer description: When done, number of tokens in the response eval_duration: type: number - description: When done, time in nanoseconds spent generating the response + description: | + When done, time in nanoseconds spent generating the response ChatRequest: type: object @@ -486,10 +553,12 @@ components: modelfile: type: string description: The modelfile content - example: 'FROM llama3\nSYSTEM You are mario from Super Mario Bros.' + example: FROM llama3\nSYSTEM You are mario from Super Mario Bros. stream: type: boolean - description: If false the response will be returned as a single response object, rather than a stream of objects + description: | + If false the response will be returned as a single response object, + rather than a stream of objects quantize: type: string description: Specifies the quantization level of the model @@ -588,10 +657,14 @@ components: example: llama3 insecure: type: boolean - description: allow insecure connections to the library. Only use this if you are pulling from your own library during development. + description: | + allow insecure connections to the library. Only use this if you are + pulling from your own library during development. stream: type: boolean - description: If false the response will be returned as a single response object, rather than a stream of objects + description: | + If false the response will be returned as a single response object, + rather than a stream of objects required: - model @@ -601,13 +674,18 @@ components: properties: model: type: string - description: The name of the model to push in the form of /: + description: | + The name of the model to push in the form of /: insecure: type: boolean - description: Whether to allow insecure connections to the library. Only use this if you are pushing to your library during development + description: | + Whether to allow insecure connections to the library. Only use this + if you are pushing to your library during development stream: type: boolean - description: If false the response will be returned as a single response object, rather than a stream of objects + description: | + If false the response will be returned as a single response object, + rather than a stream of objects required: - model @@ -725,132 +803,224 @@ components: Options: type: object - description: Advanced model and runner options for generation and chat requests + description: | + Advanced model and runner options for generation and chat requests properties: num_keep: type: integer - description: 'Specifies the number of tokens from the beginning of the context ot retain when the context limit is reached. (Default: 4)' + description: | + Specifies the number of tokens from the beginning of + the context ot retain when the context limit is reached. + (Default: 4) example: 4 seed: type: integer - description: 'Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0)' + description: | + Sets the random number seed to use for generation. Setting this to + a specific number will make the model generate the same text for + the same prompt. + (Default: 0) example: -1 num_predict: type: integer - description: 'Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)' + description: | + Maximum number of tokens to predict when generating text. + (Default: 128, -1 = infinite generation, -2 = fill context) example: -1 top_k: type: integer - description: 'Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)' + description: | + Reduces the probability of generating nonsense. A higher value + (e.g. 100) will give more diverse answers, while a lower value + (e.g. 10) will be more conservative. + (Default: 40) example: 40 top_p: type: number format: float - description: 'Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)' + description: | + Works together with top-k. A higher value (e.g., 0.95) will lead to + more diverse text, while a lower value (e.g., 0.5) will generate + more focused and conservative text. + (Default: 0.9) example: 0.9 tfs_z: type: number format: float - description: 'Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)' + description: | + Tail free sampling is used to reduce the impact of less probable + tokens from the output. A higher value (e.g., 2.0) will reduce the + impact more, while a value of 1.0 disables this setting. + (default: 1) example: 1.0 typical_p: type: number format: float - description: 'Controls the selection of typical words based on their probability distribution. A higher value (e.g., 0.95) focuses on more typical words, reducing the chance of unusual words being selected. (Default: 1.0)' + description: | + Controls the selection of typical words based on their probability + distribution. A higher value (e.g., 0.95) focuses on more typical + words, reducing the chance of unusual words being selected. + (Default: 1.0) example: 1.0 repeat_last_n: type: integer - description: 'Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)' + description: | + Sets how far back for the model to look back to prevent repetition. + (Default: 64, 0 = disabled, -1 = num_ctx) example: 64 temperature: type: number format: float - description: 'The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)' + description: | + The temperature of the model. Increasing the temperature will make + the model answer more creatively. + (Default: 0.8) example: 0.8 repeat_penalty: type: number format: float - description: 'Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)' + description: | + Sets how strongly to penalize repetitions. A higher value + (e.g., 1.5) will penalize repetitions more strongly, while a lower + value (e.g., 0.9) will be more lenient. + (Default: 1.1) example: 1.1 presence_penalty: type: number format: float - description: 'Applies a penalty to tokens that have already appeared in the generated text, encouraging the model to introduce new tokens. A higher value increases this penalty, promoting more varied and less repetitive output. (Default: 0.8)' + description: | + Applies a penalty to tokens that have already appeared in the + generated text, encouraging the model to introduce new tokens. A + higher value increases this penalty, promoting more varied and less + repetitive output. + (Default: 0.8) example: 0.8 frequency_penalty: type: number format: float - description: 'Penalizes tokens based on their frequency in the generated text so far. A higher value reduces the likelihood of frequent tokens being generated again, promoting more diverse outputs. (Default: 0.8)' + description: | + Penalizes tokens based on their frequency in the generated text so + far. A higher value reduces the likelihood of frequent tokens being + generated again, promoting more diverse outputs. + (Default: 0.8) example: 0.8 mirostat: type: number format: float - description: 'Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)' + description: | + Enable Mirostat sampling for controlling perplexity. + (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) example: 0 mirostat_tau: type: number format: float - description: 'Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)' + description: | + Controls the balance between coherence and diversity of the output. + A lower value will result in more focused and coherent text. + (Default: 5.0) example: 5.8 mirostat_eta: type: number format: float - description: 'Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)' + description: | + Influences how quickly the algorithm responds to feedback from the + generated text. A lower learning rate will result in slower + adjustments, while a higher learning rate will make the algorithm + more responsive. + (Default: 0.1) example: 0.1 penalize_newline: type: boolean - description: 'Determines whether the model should penalize the generation of newlines, which can help control the structure and formatting of the output. (Default: true)' + description: | + Determines whether the model should penalize the generation of + newlines, which can help control the structure and formatting of + the output. + (Default: true) example: true stop: type: array items: type: string - description: 'Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate stop parameters in a modelfile.' + description: | + Sets the stop sequences to use. When this pattern is encountered + the LLM will stop generating text and return. Multiple stop patterns + may be set by specifying multiple separate stop parameters in a + modelfile. example: ['AI assistant.'] numa: type: boolean - description: 'Indicates whether to use Non-Uniform Memory Access (NUMA) for optimizing memory usage and performance on multi-processor systems. (Default: false)' + description: | + Indicates whether to use Non-Uniform Memory Access (NUMA) for + optimizing memory usage and performance on multi-processor systems. + (Default: false) example: false num_ctx: type: integer - description: 'Sets the size of the context window used to generate the next token. (Default: 2048)' + description: | + Sets the size of the context window used to generate the next token. + (Default: 2048) example: 2048 num_batch: type: integer - description: 'Specifies the number of batches for processing. (Default: 512)' + description: | + Specifies the number of batches for processing. + (Default: 512) example: 512 num_gpu: type: integer - description: 'Specifies the number of GPUs to use. A value of -1 uses all available GPUs. (Default: -1)' + description: | + Specifies the number of GPUs to use. A value of -1 uses all + available GPUs. + (Default: -1) example: -1 main_gpu: type: integer - description: 'Specifies the primary GPU to use for processing. (Default: 0)' + description: | + Specifies the primary GPU to use for processing. + (Default: 0) low_vram: type: boolean - description: 'Indicates whether to optimize the model for low VRAM usage. (Default: false)' + description: | + Indicates whether to optimize the model for low VRAM usage. + (Default: false) example: false f16_kv: type: boolean - description: 'Indicates whether to use 16-bit floating point precision for key-value pairs, reducing memory usage. (Default: false)' + description: | + Indicates whether to use 16-bit floating point precision for + key-value pairs, reducing memory usage. + (Default: false) example: true logits_all: type: boolean - description: 'Specifies whether to output logits for all tokens. (Default: false)' + description: | + Specifies whether to output logits for all tokens. + (Default: false) example: false vocab_only: type: boolean - description: 'Indicates whether to only load the vocabulary without the full model. (Default: false)' + description: | + Indicates whether to only load the vocabulary without the full model. + (Default: false) example: false use_mmap: type: boolean - description: 'Determines whether to use memory-mapped files for loading the model, improving performance on large models. (Default: true)' + description: | + Determines whether to use memory-mapped files for loading the model, + improving performance on large models. + (Default: true) example: true use_mlock: type: boolean - description: 'Determines whether to use memory locking to prevent swapping the model out of RAM. (Default: false)' + description: | + Determines whether to use memory locking to prevent swapping the + model out of RAM. + (Default: false) example: false num_thread: type: integer - description: 'Specifies the number of threads to use for processing. A value of 0 uses all available threads. (Default: 0)' + description: | + Specifies the number of threads to use for processing. A value of + 0 uses all available threads. + (Default: 0) example: 0