From 73cba3f0d7abd19846da0e45342cdcae76eeaabb Mon Sep 17 00:00:00 2001
From: JD Davis <mxjerrett@gmail.com>
Date: Thu, 13 Jun 2024 23:09:04 -0500
Subject: [PATCH 1/4] chore: add openapi 3.1 spec for public api

---
 specs/openapi-3.1.yaml | 857 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 857 insertions(+)
 create mode 100644 specs/openapi-3.1.yaml

diff --git a/specs/openapi-3.1.yaml b/specs/openapi-3.1.yaml
new file mode 100644
index 00000000..93c2753c
--- /dev/null
+++ b/specs/openapi-3.1.yaml
@@ -0,0 +1,857 @@
+openapi: 3.1.0
+info:
+  title: Ollama API
+  description: API for interacting with the Ollama service.
+  version: 0.1.44
+servers:
+  - url: http://{host}:{port}
+    description: Ollama API server
+    variables:
+      host:
+        default: 127.0.0.1
+      port:
+        default: '11434'
+
+tags:
+  - name: generate
+    description: Generate responses
+  - name: chat
+    description: Generate chat responses
+  - name: models
+    description: Manage models
+  - name: blobs
+    description: Manage blobs
+  - name: embeddings
+    description: Generate embeddings
+  - name: server
+    description: Server information
+
+paths:
+  /api/generate:
+    post:
+      operationId: generateResponse
+      tags:
+        - generate
+      description: Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
+      summary: Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
+      requestBody:
+        required: true
+        description: Request to generate a response
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/GenerateRequest'
+      responses:
+        '200':
+          description: A response was successfully generated for the prompt
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateResponse'
+
+  /api/chat:
+    post:
+      operationId: generateChat
+      tags:
+        - chat
+        - generate
+      description: 'Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. Streaming can be disabled using "stream": false. The final response object will include statistics and additional data from the request.'
+      summary: 'Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. Streaming can be disabled using "stream": false. The final response object will include statistics and additional data from the request.'
+      requestBody:
+        required: true
+        description: Request to generate a response in a chat
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ChatRequest'
+      responses:
+        '200':
+          description: The next message was successfully generated for the chat
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ChatResponse'
+                
+  /api/create:
+    post:
+      operationId: createModel
+      tags:
+        - models
+      description: Create a model from a Modelfile. It is recommended to set modelfile to the content of the Modelfile rather than just set path. This is a requirement for remote create. Remote model creation must also create any file blobs, fields such as FROM and ADAPTER, explicitly with the server using Create a Blob and the value to the path indicated in the response.
+      summary: 'Create a model from a Modelfile. It is recommended to set modelfile to the content of the Modelfile rather than just set path. This is a requirement for remote create. Remote model creation must also create any file blobs, fields such as FROM and ADAPTER, explicitly with the server using Create a Blob and the value to the path indicated in the response.'
+      requestBody:
+        required: true
+        description: Request to create a model
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateRequest'
+      responses:
+        '200':
+          description: The model was successfully created
+          content:
+            application/x-ndjson:
+              schema:
+                $ref: '#/components/schemas/ProgressResponse'
+
+  /api/blobs/{digest}:
+    get:
+      operationId: getBlob
+      tags:
+        - blobs
+      description: Ensures that the file blob used for a FROM or ADAPTER field exists on the server. This is checking your Ollama server and not Ollama.ai.
+      summary: 'Ensures that the file blob used for a FROM or ADAPTER field exists on the server. This is checking your Ollama server and not Ollama.ai.'
+      parameters:
+        - name: digest
+          in: path
+          required: true
+          description: The SHA256 digest of the blob
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The blob exists on the server
+        '404':
+          description: The blob does not exist on the server
+    post:
+      operationId: createBlob
+      tags:
+        - blobs
+      description: Create a blob from a file on the server
+      summary: Create a blob from a file on the server
+      parameters:
+        - name: digest
+          in: path
+          required: true
+          description: The SHA256 digest of the blob
+          schema:
+            type: string
+      requestBody:
+        required: true
+        description: The file to create the blob from
+        content:
+          application/octet-stream:
+            schema:
+              type: string
+              format: binary
+      responses:
+        '201':
+          description: Blob was successfully created
+        '400':
+          description: The digest used is not expected
+                
+  /api/tags:
+    get:
+      operationId: getModels
+      tags:
+        - models
+      description: List models that are available locally
+      summary: List models that are available locally
+      responses:
+        '200':
+          description: The models were successfully fetched
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListResponse'
+
+
+  /api/show:
+    post:
+      operationId: showModel
+      tags:
+        - models
+      description: Show information about a model including details, modelfile, template, parameters, license, and system prompt.
+      summary: 'Show information about a model including details, modelfile, template, parameters, license, and system prompt.'
+      requestBody:
+        required: true
+        description: Request to show a model
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ShowRequest'
+      responses:
+        '200':
+          description: The model's information was successfully fetched
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ShowResponse'
+
+  /api/copy:
+    post:
+      operationId: copyModel
+      tags:
+        - models
+      description: Copy a model. Creates a model with another name from an existing model.
+      summary: 'Copy a model. Creates a model with another name from an existing model.'
+      requestBody:
+        required: true
+        description: Request to copy a model
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CopyRequest'
+      responses:
+        '200':
+          description: Model was successfully copied
+        '404':
+          description: Source model does not exist
+          
+  /api/delete:
+    delete:
+      operationId: deleteModel
+      tags:
+        - models
+      description: Delete a model and its data
+      summary: 'Delete a model and its data'
+      requestBody:
+        required: true
+        description: Request to delete a model
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/DeleteRequest'
+      responses:
+        '200':
+          description: Model was successfully deleted
+        '404':
+          description: Model does not exist
+          
+  /api/pull:
+    post:
+      operationId: pullModel
+      tags:
+        - models
+      description: Download a model from the ollama library. Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress.
+      summary: 'Download a model from the ollama library. Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress.'
+      requestBody:
+        required: true
+        description: Request to pull a model
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/PullRequest'
+      responses:
+        '200':
+          description: Model was successfully pulled to the server
+          content:
+            application/x-ndjson:
+              schema:
+                $ref: '#/components/schemas/ProgressResponse'
+
+  /api/push:
+    post:
+      operationId: pushModel
+      tags:
+        - models
+      description: Upload a model to a model library. Requires registering for ollama.ai and adding a public key first.
+      summary: 'Upload a model to a model library. Requires registering for ollama.ai and adding a public key first.'
+      requestBody:
+        required: true
+        description: Request to push a model
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/PushRequest'
+      responses:
+        '200':
+          description: Model was successfully pushed to the server
+          content:
+            application/x-ndjson:
+              schema:
+                $ref: '#/components/schemas/ProgressResponse'
+
+
+  /api/embeddings:
+    post:
+      operationId: generateEmbeddings
+      tags:
+        - embeddings
+        - generate
+      description: Generate embeddings from a model
+      summary: Generate embeddings from a model
+      requestBody:
+        required: true
+        description: Request to generate embeddings
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/EmbeddingRequest'
+      responses:
+        '200':
+          description: The embeddings were successfully generated
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/EmbeddingResponse'
+
+
+  /api/ps:
+    get:
+      operationId: getRunningModels
+      tags:
+        - models
+      description: List running models
+      summary: List running models
+      responses:
+        '200':
+          description: The list of running models was successfully fetched
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ProcessResponse'
+
+  /api/version:
+    get:
+      operationId: getOllamaVersion
+      tags:
+        - server
+      description: Return the Ollama server version
+      summary: Return the Ollama server version
+      responses:
+        '200':
+          description: The Ollama server version was successfully fetched
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  version:
+                    type: string
+
+components:
+  schemas:
+    GenerateRequest:
+      type: object
+      description: Request to generate a response
+      properties:
+        model:
+          type: string
+          description: The model name
+        prompt:
+          type: string
+          description: The prompt to generate a response for
+        images:
+          type: array
+          items:
+            type: string
+            format: byte
+          description: A list of base64-encoded images (for multimodal models such as llava)
+          example: null
+        format:
+          type: string
+          description: The format to return a response in. Currently the only accepted value is json
+          example: null
+        options:
+          $ref: '#/components/schemas/Options'
+        system:
+          type: string
+          description: System message to (overrides what is defined in the Modelfile)
+          example: null
+        template:
+          type: string
+          description: The prompt template to use (overrides what is defined in the Modelfile)
+          example: null
+        context:
+          type: array
+          items:
+            type: integer
+          description: The context parameter returned from a previous request to /generate, this can be used to keep a short conversational memory
+          example: []
+        stream:
+          type: boolean
+          description: If false the response will be returned as a single response object, rather than a stream of objects
+        raw:
+          type: boolean
+          description: If true no formatting will be applied to the prompt. You may choose to use the raw parameter if you are specifying a full templated prompt in your request to the API
+        keep_alive:
+          $ref: '#/components/schemas/Duration'
+      required:
+        - model
+        - prompt
+
+    GenerateResponse:
+      type: object
+      description: Response from a generate request
+      properties:
+        model:
+          type: string
+          description: The model name that generated the response
+        created_at:
+          type: string
+          format: date-time
+          description: Timestamp of the response
+        response:
+          type: string
+          description: The textual response itself. When done, empty if the response was streamed, if not streamed, this will contain the full response
+        done:
+          type: boolean
+          description: Specifies if the response is complete
+        context:
+          type: array
+          items:
+            type: integer
+          description: When done, encoding of the conversation used in this response
+        total_duration:
+          type: number
+          description: When done, time spent generating the response
+        load_duration:
+          type: number
+          description: When done, time spent in nanoseconds loading the model
+        prompt_eval_count:
+          type: integer
+          description: When done, number of tokens in the prompt
+        prompt_eval_duration:
+          type: number
+          description: When done, time spent in nanoseconds evaluating the prompt
+        eval_count:
+          type: integer
+          description: When done, number of tokens in the response
+        eval_duration:
+          type: number
+          description: When done, time in nanoseconds spent generating the response 
+
+    ChatRequest:
+      type: object
+      description: Request to generate a response in a chat
+      properties:
+        model:
+          type: string
+          description: The model name
+        messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/Message'
+          description: Messages of the chat - can be used to keep a chat memory
+        stream:
+          type: boolean
+          description: Enable streaming of returned response
+        format:
+          type: string
+          description: Format to return the response in (e.g. "json")
+        keep_alive:
+          $ref: '#/components/schemas/Duration'
+        options:
+          $ref: '#/components/schemas/Options'
+
+    ChatResponse:
+      type: object
+      description: Response from a chat request
+      properties:
+        model:
+          type: string
+          description: The model name
+        created_at:
+          type: string
+          format: date-time
+          description: Timestamp of the response
+        message:
+          $ref: '#/components/schemas/Message'
+        done_reason:
+          type: string
+          description: Reason the model stopped generating text
+        done:
+          type: boolean
+          description: Specifies if the response is complete
+        total_duration:
+          type: number
+          description: Total duration of the request
+        load_duration:
+          type: string
+          description: Load duration of the request
+        prompt_eval_count:
+          type: integer
+          description: Count of prompt evaluations
+        prompt_eval_duration:
+          type: string
+          description: Duration of prompt evaluations
+        eval_count:
+          type: integer
+          description: Count of evaluations
+        eval_duration:
+          type: string
+          description: Duration of evaluations
+          
+    CreateRequest:
+      type: object
+      description: Request to create a model
+      properties:
+        model:
+          type: string
+          description: The name of the model to create
+          example: mario
+        path:
+          type: string
+          description: The path to the model file
+        modelfile:
+          type: string
+          description: The modelfile content
+          example: 'FROM llama3\nSYSTEM You are mario from Super Mario Bros.'
+        stream:
+          type: boolean
+          description: If false the response will be returned as a single response object, rather than a stream of objects
+        quantize:
+          type: string
+          description: Specifies the quantization level of the model
+      required:
+        - model
+        
+    ListResponse:
+      type: object
+      description: Response from a list request
+      properties:
+        models:
+          type: array
+          items:
+            $ref: '#/components/schemas/ListModelResponse'
+
+    ListModelResponse:
+      type: object
+      description: Response from a list request
+      properties:
+        name:
+          type: string
+        model:
+          type: string
+        modified_at:
+          type: string
+          format: date-time
+        size:
+          type: integer
+        digest:
+          type: string
+        details:
+          $ref: '#/components/schemas/ModelDetails'
+        
+    ShowRequest:
+      type: object
+      description: Request to show a model
+      properties:
+        model:
+          type: string
+          description: The name of the model to show
+      required:
+        - model
+
+    ShowResponse:
+      type: object
+      description: Response from a show request
+      properties:
+        license:
+          type: string
+          description: The model license
+        modelfile:
+          type: string
+          description: The modelfile content
+        parameters:
+          type: string
+          description: The model parameters
+        template:
+          type: string
+          description: The model template
+        system:
+          type: string
+          description: The model system message/prompt
+        details:
+          $ref: '#/components/schemas/ModelDetails'
+        messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/Message'
+            
+    CopyRequest:
+      type: object
+      description: Request to copy a model
+      properties:
+        source:
+          type: string
+        destination:
+          type: string
+          
+    DeleteRequest:
+      type: object
+      description: Request to delete a model
+      properties:
+        model:
+          type: string
+          description: The name of the model to delete
+      required:
+        - model
+
+    PullRequest:
+      type: object
+      description: Request to pull a model
+      properties:
+        model:
+          type: string
+          description: The name of the model to pull
+          example: llama3
+        insecure:
+          type: boolean
+          description: allow insecure connections to the library. Only use this if you are pulling from your own library during development.
+        stream:
+          type: boolean
+          description: If false the response will be returned as a single response object, rather than a stream of objects
+      required: 
+        - model
+
+    PushRequest:
+      type: object
+      description: Request to push a model
+      properties:
+        model:
+          type: string
+          description: The name of the model to push in the form of <namespace>/<model>:<tag>
+        insecure:
+          type: boolean
+          description: Whether to allow insecure connections to the library. Only use this if you are pushing to your library during development
+        stream:
+          type: boolean
+          description: If false the response will be returned as a single response object, rather than a stream of objects
+      required: 
+        - model
+        
+    ProgressResponse:
+      type: object
+      description: The response returned from various streaming endpoints
+      properties:
+        status:
+          type: string
+          description: The status of the request
+        digest:
+          type: string
+          description: The SHA256 digest of the blob
+        total:
+          type: integer
+          description: The total size of the task
+        completed:
+          type: integer
+          description: The completed size of the task
+
+    EmbeddingRequest:
+      type: object
+      description: Request to generate embeddings
+      properties:
+        model:
+          type: string
+          description: The name of model to generate embeddings from
+        prompt:
+          type: string
+          description: The text to generate embeddings for
+        keep_alive:
+          $ref: '#/components/schemas/Duration'
+        options:
+          $ref: '#/components/schemas/Options'
+      required:
+        - model
+        - prompt
+
+    EmbeddingResponse:
+      type: object
+      description: Response from an embedding request
+      properties:
+        embedding:
+          type: array
+          items:
+            type: number
+          description: The generated embeddings
+          
+    ProcessResponse:
+      type: object
+      description: Response with a list of running models
+      properties:
+        models:
+          type: array
+          items:
+            $ref: '#/components/schemas/ProcessModelResponse'
+
+    ProcessModelResponse:
+      type: object
+      description: Running model description
+      properties:
+        name:
+          type: string
+        model:
+          type: string
+        size:
+          type: integer
+        digest:
+          type: string
+        details:
+          $ref: '#/components/schemas/ModelDetails'
+        expires_at:
+          type: string
+          format: date-time
+        size_vram:
+          type: integer
+
+    Message:
+      type: object
+      description: A message in a chat
+      properties:
+        role:
+          type: string
+        content:
+          type: string
+        images:
+          type: array
+          items:
+            type: string
+            format: byte
+
+    ModelDetails:
+      type: object
+      description: Details about a model
+      properties:
+        parent_model:
+          type: string
+        format:
+          type: string
+        family:
+          type: string
+        families:
+          type: array
+          items:
+            type: string
+        parameter_size:
+          type: string
+        quantization_level:
+          type: string
+
+    Duration:
+      type: string
+      description: A string representing the duration
+      example: "5m"
+
+    Options:
+      type: object
+      description: Advanced model and runner options for generation and chat requests
+      properties:
+        num_keep:
+          type: integer
+          description: 'Specifies the number of tokens from the beginning of the context ot retain when the context limit is reached. (Default: 4)'
+          example: 4
+        seed:
+          type: integer
+          description: 'Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0)'
+          example: -1
+        num_predict:
+          type: integer
+          description: 'Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)'
+          example: -1
+        top_k:
+          type: integer
+          description: 'Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)'
+          example: 40
+        top_p:
+          type: number
+          format: float
+          description: 'Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)'
+          example: 0.9
+        tfs_z:
+          type: number
+          format: float
+          description: 'Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)'
+          example: 1.0
+        typical_p:
+          type: number
+          format: float
+          description: 'Controls the selection of typical words based on their probability distribution. A higher value (e.g., 0.95) focuses on more typical words, reducing the chance of unusual words being selected. (Default: 1.0)'
+          example: 1.0
+        repeat_last_n:
+          type: integer
+          description: 'Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)'
+          example: 64
+        temperature:
+          type: number
+          format: float
+          description: 'The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)'
+          example: 0.8
+        repeat_penalty:
+          type: number
+          format: float
+          description: 'Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)'
+          example: 1.1
+        presence_penalty:
+          type: number
+          format: float
+          description: 'Applies a penalty to tokens that have already appeared in the generated text, encouraging the model to introduce new tokens. A higher value increases this penalty, promoting more varied and less repetitive output. (Default: 0.8)'
+          example: 0.8
+        frequency_penalty:
+          type: number
+          format: float
+          description: 'Penalizes tokens based on their frequency in the generated text so far. A higher value reduces the likelihood of frequent tokens being generated again, promoting more diverse outputs. (Default: 0.8)'
+          example: 0.8
+        mirostat:
+          type: number
+          format: float
+          description: 'Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)'
+          example: 0
+        mirostat_tau:
+          type: number
+          format: float
+          description: 'Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)'
+          example: 5.8
+        mirostat_eta:
+          type: number
+          format: float
+          description: 'Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)'
+          example: 0.1
+        penalize_newline:
+          type: boolean
+          description: 'Determines whether the model should penalize the generation of newlines, which can help control the structure and formatting of the output. (Default: true)'
+          example: true
+        stop:
+          type: array
+          items:
+            type: string
+          description: 'Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate stop parameters in a modelfile.'
+          example: ['AI assistant.']
+        numa:
+          type: boolean
+          description: 'Indicates whether to use Non-Uniform Memory Access (NUMA) for optimizing memory usage and performance on multi-processor systems. (Default: false)'
+          example: false
+        num_ctx:
+          type: integer
+          description: 'Sets the size of the context window used to generate the next token. (Default: 2048)'
+          example: 2048
+        num_batch:
+          type: integer
+          description: 'Specifies the number of batches for processing. (Default: 512)'
+          example: 512
+        num_gpu:
+          type: integer
+          description: 'Specifies the number of GPUs to use. A value of -1 uses all available GPUs. (Default: -1)'
+          example: -1
+        main_gpu:
+          type: integer
+          description: 'Specifies the primary GPU to use for processing. (Default: 0)'
+        low_vram:
+          type: boolean
+          description: 'Indicates whether to optimize the model for low VRAM usage. (Default: false)'
+          example: false
+        f16_kv:
+          type: boolean
+          description: 'Indicates whether to use 16-bit floating point precision for key-value pairs, reducing memory usage. (Default: false)'
+          example: true
+        logits_all:
+          type: boolean
+          description: 'Specifies whether to output logits for all tokens. (Default: false)'
+          example: false
+        vocab_only:
+          type: boolean
+          description: 'Indicates whether to only load the vocabulary without the full model. (Default: false)'
+          example: false
+        use_mmap:
+          type: boolean
+          description: 'Determines whether to use memory-mapped files for loading the model, improving performance on large models. (Default: true)'
+          example: true
+        use_mlock:
+          type: boolean
+          description: 'Determines whether to use memory locking to prevent swapping the model out of RAM. (Default: false)'
+          example: false
+        num_thread:
+          type: integer
+          description: 'Specifies the number of threads to use for processing. A value of 0 uses all available threads. (Default: 0)'
+          example: 0

From ef7c6cb43aa8cc8c38a4e51d4d7e78b66e08a5c1 Mon Sep 17 00:00:00 2001
From: JD Davis <mxjerrett@gmail.com>
Date: Thu, 13 Jun 2024 23:25:47 -0500
Subject: [PATCH 2/4] chore: added spectral to lint OpenAPI spec

---
 .github/workflows/test.yaml | 10 ++++++++++
 .spectral.yaml              |  1 +
 specs/openapi-3.1.yaml      |  9 ++++-----
 3 files changed, 15 insertions(+), 5 deletions(-)
 create mode 100644 .spectral.yaml

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index dbb6c2fd..9745f266 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -231,6 +231,16 @@ jobs:
           OLLAMA_SKIP_CPU_GENERATE: '1'
       # TODO - do we need any artifacts?
 
+  validate-openapi:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: stoplightio/spectral-action@latest
+        with:
+          file_glob: 'specs/*.yaml'
+
   lint:
     strategy:
       matrix:
diff --git a/.spectral.yaml b/.spectral.yaml
new file mode 100644
index 00000000..1cac3b3d
--- /dev/null
+++ b/.spectral.yaml
@@ -0,0 +1 @@
+extends: ["spectral:oas", "spectral:asyncapi"]
diff --git a/specs/openapi-3.1.yaml b/specs/openapi-3.1.yaml
index 93c2753c..b601c51e 100644
--- a/specs/openapi-3.1.yaml
+++ b/specs/openapi-3.1.yaml
@@ -3,6 +3,9 @@ info:
   title: Ollama API
   description: API for interacting with the Ollama service.
   version: 0.1.44
+  contact:
+    name: Ollama
+    url: https://github.com/ollama/ollama
 servers:
   - url: http://{host}:{port}
     description: Ollama API server
@@ -337,22 +340,18 @@ components:
           items:
             type: string
             format: byte
-          description: A list of base64-encoded images (for multimodal models such as llava)
-          example: null
+          description: A list of base64-encoded images (for multimodal models such as llava)        
         format:
           type: string
           description: The format to return a response in. Currently the only accepted value is json
-          example: null
         options:
           $ref: '#/components/schemas/Options'
         system:
           type: string
           description: System message to (overrides what is defined in the Modelfile)
-          example: null
         template:
           type: string
           description: The prompt template to use (overrides what is defined in the Modelfile)
-          example: null
         context:
           type: array
           items:

From bfff252fa936f3a23380aae3e75debc8b1d70c4c Mon Sep 17 00:00:00 2001
From: JD Davis <mxjerrett@gmail.com>
Date: Fri, 14 Jun 2024 13:35:15 -0500
Subject: [PATCH 3/4] chore: converted from quoted strings to multiline

---
 specs/openapi-3.1.yaml | 298 ++++++++++++++++++++++++++++++++---------
 1 file changed, 234 insertions(+), 64 deletions(-)

diff --git a/specs/openapi-3.1.yaml b/specs/openapi-3.1.yaml
index b601c51e..dba03d79 100644
--- a/specs/openapi-3.1.yaml
+++ b/specs/openapi-3.1.yaml
@@ -35,8 +35,16 @@ paths:
       operationId: generateResponse
       tags:
         - generate
-      description: Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
-      summary: Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
+      description: |
+        Generate a response for a given prompt with a provided model. This is 
+        a streaming endpoint, so there will be a series of responses. The 
+        final response object will include statistics and additional data from 
+        the request.
+      summary: |
+        Generate a response for a given prompt with a provided model. This is 
+        a streaming endpoint, so there will be a series of responses. The final 
+        response object will include statistics and additional data from the 
+        request.
       requestBody:
         required: true
         description: Request to generate a response
@@ -58,8 +66,16 @@ paths:
       tags:
         - chat
         - generate
-      description: 'Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. Streaming can be disabled using "stream": false. The final response object will include statistics and additional data from the request.'
-      summary: 'Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. Streaming can be disabled using "stream": false. The final response object will include statistics and additional data from the request.'
+      description: | 
+        Generate the next message in a chat with a provided model. This is a 
+        streaming endpoint, so there will be a series of responses. Streaming 
+        can be disabled using "stream": false. The final response object will 
+        include statistics and additional data from the request.
+      summary: |
+        Generate the next message in a chat with a provided model. This is a 
+        streaming endpoint, so there will be a series of responses. Streaming 
+        can be disabled using "stream": false. The final response object will 
+        include statistics and additional data from the request.
       requestBody:
         required: true
         description: Request to generate a response in a chat
@@ -80,8 +96,20 @@ paths:
       operationId: createModel
       tags:
         - models
-      description: Create a model from a Modelfile. It is recommended to set modelfile to the content of the Modelfile rather than just set path. This is a requirement for remote create. Remote model creation must also create any file blobs, fields such as FROM and ADAPTER, explicitly with the server using Create a Blob and the value to the path indicated in the response.
-      summary: 'Create a model from a Modelfile. It is recommended to set modelfile to the content of the Modelfile rather than just set path. This is a requirement for remote create. Remote model creation must also create any file blobs, fields such as FROM and ADAPTER, explicitly with the server using Create a Blob and the value to the path indicated in the response.'
+      description: |
+        Create a model from a Modelfile. It is recommended to set modelfile 
+        to the content of the Modelfile rather than just set path. This is a 
+        requirement for remote create. Remote model creation must also create 
+        any file blobs, fields such as FROM and ADAPTER, explicitly with the 
+        server using Create a Blob and the value to the path indicated in the 
+        response.
+      summary: | 
+        Create a model from a Modelfile. It is recommended to set modelfile to 
+        the content of the Modelfile rather than just set path. This is a 
+        requirement for remote create. Remote model creation must also create 
+        any file blobs, fields such as FROM and ADAPTER, explicitly with the 
+        server using Create a Blob and the value to the path indicated in the 
+        response.
       requestBody:
         required: true
         description: Request to create a model
@@ -102,8 +130,12 @@ paths:
       operationId: getBlob
       tags:
         - blobs
-      description: Ensures that the file blob used for a FROM or ADAPTER field exists on the server. This is checking your Ollama server and not Ollama.ai.
-      summary: 'Ensures that the file blob used for a FROM or ADAPTER field exists on the server. This is checking your Ollama server and not Ollama.ai.'
+      description: |
+        Ensures that the file blob used for a FROM or ADAPTER field exists on 
+        the server. This is checking your Ollama server and not Ollama.ai.
+      summary: |
+        Ensures that the file blob used for a FROM or ADAPTER field exists on 
+        the server. This is checking your Ollama server and not Ollama.ai.
       parameters:
         - name: digest
           in: path
@@ -164,8 +196,12 @@ paths:
       operationId: showModel
       tags:
         - models
-      description: Show information about a model including details, modelfile, template, parameters, license, and system prompt.
-      summary: 'Show information about a model including details, modelfile, template, parameters, license, and system prompt.'
+      description: |
+        Show information about a model including details, modelfile, template, 
+        parameters, license, and system prompt.
+      summary: |
+        Show information about a model including details, modelfile, template, 
+        parameters, license, and system prompt.
       requestBody:
         required: true
         description: Request to show a model
@@ -186,8 +222,10 @@ paths:
       operationId: copyModel
       tags:
         - models
-      description: Copy a model. Creates a model with another name from an existing model.
-      summary: 'Copy a model. Creates a model with another name from an existing model.'
+      description: |
+        Copy a model. Creates a model with another name from an existing model.
+      summary: |
+        Copy a model. Creates a model with another name from an existing model.
       requestBody:
         required: true
         description: Request to copy a model
@@ -226,8 +264,14 @@ paths:
       operationId: pullModel
       tags:
         - models
-      description: Download a model from the ollama library. Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress.
-      summary: 'Download a model from the ollama library. Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress.'
+      description: | 
+        Download a model from the ollama library. Cancelled pulls are resumed 
+        from where they left off, and multiple calls will share the same 
+        download progress.
+      summary: |
+        Download a model from the ollama library. Cancelled pulls are resumed 
+        from where they left off, and multiple calls will share the same 
+        download progress.
       requestBody:
         required: true
         description: Request to pull a model
@@ -248,8 +292,12 @@ paths:
       operationId: pushModel
       tags:
         - models
-      description: Upload a model to a model library. Requires registering for ollama.ai and adding a public key first.
-      summary: 'Upload a model to a model library. Requires registering for ollama.ai and adding a public key first.'
+      description: |
+        Upload a model to a model library. Requires registering for ollama.ai 
+        and adding a public key first.
+      summary: |
+        Upload a model to a model library. Requires registering for ollama.ai 
+        and adding a public key first.'
       requestBody:
         required: true
         description: Request to push a model
@@ -340,30 +388,44 @@ components:
           items:
             type: string
             format: byte
-          description: A list of base64-encoded images (for multimodal models such as llava)        
+          description: |
+            A list of base64-encoded images (for multimodal models such as 
+            llava)        
         format:
           type: string
-          description: The format to return a response in. Currently the only accepted value is json
+          description: |
+            The format to return a response in. Currently the only accepted 
+            value is json
         options:
           $ref: '#/components/schemas/Options'
         system:
           type: string
-          description: System message to (overrides what is defined in the Modelfile)
+          description: |
+            System message to (overrides what is defined in the Modelfile)
         template:
           type: string
-          description: The prompt template to use (overrides what is defined in the Modelfile)
+          description: |
+            The prompt template to use (overrides what is defined in the 
+            Modelfile)
         context:
           type: array
           items:
             type: integer
-          description: The context parameter returned from a previous request to /generate, this can be used to keep a short conversational memory
+          description: |
+            The context parameter returned from a previous request to generate, 
+            this can be used to keep a short conversational memory
           example: []
         stream:
           type: boolean
-          description: If false the response will be returned as a single response object, rather than a stream of objects
+          description: |
+            If false the response will be returned as a single response object, 
+            rather than a stream of objects
         raw:
           type: boolean
-          description: If true no formatting will be applied to the prompt. You may choose to use the raw parameter if you are specifying a full templated prompt in your request to the API
+          description: |
+            If true no formatting will be applied to the prompt. You may choose 
+            to use the raw parameter if you are specifying a full templated 
+            prompt in your request to the API
         keep_alive:
           $ref: '#/components/schemas/Duration'
       required:
@@ -383,7 +445,9 @@ components:
           description: Timestamp of the response
         response:
           type: string
-          description: The textual response itself. When done, empty if the response was streamed, if not streamed, this will contain the full response
+          description: |
+            The textual response itself. When done, empty if the response was 
+            streamed, if not streamed, this will contain the full response
         done:
           type: boolean
           description: Specifies if the response is complete
@@ -391,7 +455,8 @@ components:
           type: array
           items:
             type: integer
-          description: When done, encoding of the conversation used in this response
+          description: |
+            When done, encoding of the conversation used in this response
         total_duration:
           type: number
           description: When done, time spent generating the response
@@ -403,13 +468,15 @@ components:
           description: When done, number of tokens in the prompt
         prompt_eval_duration:
           type: number
-          description: When done, time spent in nanoseconds evaluating the prompt
+          description: |
+            When done, time spent in nanoseconds evaluating the prompt
         eval_count:
           type: integer
           description: When done, number of tokens in the response
         eval_duration:
           type: number
-          description: When done, time in nanoseconds spent generating the response 
+          description: |
+            When done, time in nanoseconds spent generating the response 
 
     ChatRequest:
       type: object
@@ -486,10 +553,12 @@ components:
         modelfile:
           type: string
           description: The modelfile content
-          example: 'FROM llama3\nSYSTEM You are mario from Super Mario Bros.'
+          example: FROM llama3\nSYSTEM You are mario from Super Mario Bros.
         stream:
           type: boolean
-          description: If false the response will be returned as a single response object, rather than a stream of objects
+          description: |
+            If false the response will be returned as a single response object, 
+            rather than a stream of objects
         quantize:
           type: string
           description: Specifies the quantization level of the model
@@ -588,10 +657,14 @@ components:
           example: llama3
         insecure:
           type: boolean
-          description: allow insecure connections to the library. Only use this if you are pulling from your own library during development.
+          description: |
+            allow insecure connections to the library. Only use this if you are
+             pulling from your own library during development.
         stream:
           type: boolean
-          description: If false the response will be returned as a single response object, rather than a stream of objects
+          description: |
+            If false the response will be returned as a single response object, 
+            rather than a stream of objects
       required: 
         - model
 
@@ -601,13 +674,18 @@ components:
       properties:
         model:
           type: string
-          description: The name of the model to push in the form of <namespace>/<model>:<tag>
+          description: |  
+            The name of the model to push in the form of <namespace>/<model>:<tag>
         insecure:
           type: boolean
-          description: Whether to allow insecure connections to the library. Only use this if you are pushing to your library during development
+          description: |
+            Whether to allow insecure connections to the library. Only use this 
+            if you are pushing to your library during development
         stream:
           type: boolean
-          description: If false the response will be returned as a single response object, rather than a stream of objects
+          description: |
+            If false the response will be returned as a single response object, 
+            rather than a stream of objects
       required: 
         - model
         
@@ -725,132 +803,224 @@ components:
 
     Options:
       type: object
-      description: Advanced model and runner options for generation and chat requests
+      description: |
+        Advanced model and runner options for generation and chat requests
       properties:
         num_keep:
           type: integer
-          description: 'Specifies the number of tokens from the beginning of the context ot retain when the context limit is reached. (Default: 4)'
+          description: | 
+            Specifies the number of tokens from the beginning of 
+            the context ot retain when the context limit is reached. 
+            (Default: 4)
           example: 4
         seed:
           type: integer
-          description: 'Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0)'
+          description: |
+            Sets the random number seed to use for generation. Setting this to 
+            a specific number will make the model generate the same text for 
+            the same prompt. 
+            (Default: 0)
           example: -1
         num_predict:
           type: integer
-          description: 'Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)'
+          description: |
+            Maximum number of tokens to predict when generating text. 
+            (Default: 128, -1 = infinite generation, -2 = fill context)
           example: -1
         top_k:
           type: integer
-          description: 'Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)'
+          description: |
+            Reduces the probability of generating nonsense. A higher value 
+            (e.g. 100) will give more diverse answers, while a lower value 
+            (e.g. 10) will be more conservative. 
+            (Default: 40)
           example: 40
         top_p:
           type: number
           format: float
-          description: 'Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)'
+          description: |
+            Works together with top-k. A higher value (e.g., 0.95) will lead to 
+            more diverse text, while a lower value (e.g., 0.5) will generate 
+            more focused and conservative text. 
+            (Default: 0.9)
           example: 0.9
         tfs_z:
           type: number
           format: float
-          description: 'Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)'
+          description: |
+            Tail free sampling is used to reduce the impact of less probable 
+            tokens from the output. A higher value (e.g., 2.0) will reduce the 
+            impact more, while a value of 1.0 disables this setting. 
+            (default: 1)
           example: 1.0
         typical_p:
           type: number
           format: float
-          description: 'Controls the selection of typical words based on their probability distribution. A higher value (e.g., 0.95) focuses on more typical words, reducing the chance of unusual words being selected. (Default: 1.0)'
+          description: |
+            Controls the selection of typical words based on their probability 
+            distribution. A higher value (e.g., 0.95) focuses on more typical 
+            words, reducing the chance of unusual words being selected. 
+            (Default: 1.0)
           example: 1.0
         repeat_last_n:
           type: integer
-          description: 'Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)'
+          description: |
+            Sets how far back for the model to look back to prevent repetition. 
+            (Default: 64, 0 = disabled, -1 = num_ctx)
           example: 64
         temperature:
           type: number
           format: float
-          description: 'The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)'
+          description: |
+            The temperature of the model. Increasing the temperature will make 
+            the model answer more creatively. 
+            (Default: 0.8)
           example: 0.8
         repeat_penalty:
           type: number
           format: float
-          description: 'Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)'
+          description: |
+            Sets how strongly to penalize repetitions. A higher value 
+            (e.g., 1.5) will penalize repetitions more strongly, while a lower 
+            value (e.g., 0.9) will be more lenient. 
+            (Default: 1.1)
           example: 1.1
         presence_penalty:
           type: number
           format: float
-          description: 'Applies a penalty to tokens that have already appeared in the generated text, encouraging the model to introduce new tokens. A higher value increases this penalty, promoting more varied and less repetitive output. (Default: 0.8)'
+          description: |
+            Applies a penalty to tokens that have already appeared in the 
+            generated text, encouraging the model to introduce new tokens. A 
+            higher value increases this penalty, promoting more varied and less 
+            repetitive output. 
+            (Default: 0.8)
           example: 0.8
         frequency_penalty:
           type: number
           format: float
-          description: 'Penalizes tokens based on their frequency in the generated text so far. A higher value reduces the likelihood of frequent tokens being generated again, promoting more diverse outputs. (Default: 0.8)'
+          description: |
+            Penalizes tokens based on their frequency in the generated text so 
+            far. A higher value reduces the likelihood of frequent tokens being 
+            generated again, promoting more diverse outputs. 
+            (Default: 0.8)
           example: 0.8
         mirostat:
           type: number
           format: float
-          description: 'Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)'
+          description: |
+            Enable Mirostat sampling for controlling perplexity. 
+            (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
           example: 0
         mirostat_tau:
           type: number
           format: float
-          description: 'Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)'
+          description: |
+            Controls the balance between coherence and diversity of the output.
+            A lower value will result in more focused and coherent text. 
+            (Default: 5.0)
           example: 5.8
         mirostat_eta:
           type: number
           format: float
-          description: 'Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)'
+          description: | 
+            Influences how quickly the algorithm responds to feedback from the 
+            generated text. A lower learning rate will result in slower 
+            adjustments, while a higher learning rate will make the algorithm 
+            more responsive. 
+            (Default: 0.1)
           example: 0.1
         penalize_newline:
           type: boolean
-          description: 'Determines whether the model should penalize the generation of newlines, which can help control the structure and formatting of the output. (Default: true)'
+          description: |
+            Determines whether the model should penalize the generation of 
+            newlines, which can help control the structure and formatting of 
+            the output. 
+            (Default: true)
           example: true
         stop:
           type: array
           items:
             type: string
-          description: 'Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate stop parameters in a modelfile.'
+          description: |
+            Sets the stop sequences to use. When this pattern is encountered 
+            the LLM will stop generating text and return. Multiple stop patterns 
+            may be set by specifying multiple separate stop parameters in a 
+            modelfile.
           example: ['AI assistant.']
         numa:
           type: boolean
-          description: 'Indicates whether to use Non-Uniform Memory Access (NUMA) for optimizing memory usage and performance on multi-processor systems. (Default: false)'
+          description: |
+            Indicates whether to use Non-Uniform Memory Access (NUMA) for 
+            optimizing memory usage and performance on multi-processor systems. 
+            (Default: false)
           example: false
         num_ctx:
           type: integer
-          description: 'Sets the size of the context window used to generate the next token. (Default: 2048)'
+          description: |
+            Sets the size of the context window used to generate the next token. 
+            (Default: 2048)
           example: 2048
         num_batch:
           type: integer
-          description: 'Specifies the number of batches for processing. (Default: 512)'
+          description: |
+            Specifies the number of batches for processing. 
+            (Default: 512)
           example: 512
         num_gpu:
           type: integer
-          description: 'Specifies the number of GPUs to use. A value of -1 uses all available GPUs. (Default: -1)'
+          description: |
+            Specifies the number of GPUs to use. A value of -1 uses all 
+            available GPUs. 
+            (Default: -1)
           example: -1
         main_gpu:
           type: integer
-          description: 'Specifies the primary GPU to use for processing. (Default: 0)'
+          description: |
+            Specifies the primary GPU to use for processing. 
+            (Default: 0)
         low_vram:
           type: boolean
-          description: 'Indicates whether to optimize the model for low VRAM usage. (Default: false)'
+          description: | 
+            Indicates whether to optimize the model for low VRAM usage. 
+            (Default: false)
           example: false
         f16_kv:
           type: boolean
-          description: 'Indicates whether to use 16-bit floating point precision for key-value pairs, reducing memory usage. (Default: false)'
+          description: |
+            Indicates whether to use 16-bit floating point precision for 
+            key-value pairs, reducing memory usage. 
+            (Default: false)
           example: true
         logits_all:
           type: boolean
-          description: 'Specifies whether to output logits for all tokens. (Default: false)'
+          description: |
+            Specifies whether to output logits for all tokens. 
+            (Default: false)
           example: false
         vocab_only:
           type: boolean
-          description: 'Indicates whether to only load the vocabulary without the full model. (Default: false)'
+          description: |
+            Indicates whether to only load the vocabulary without the full model. 
+            (Default: false)
           example: false
         use_mmap:
           type: boolean
-          description: 'Determines whether to use memory-mapped files for loading the model, improving performance on large models. (Default: true)'
+          description: |
+            Determines whether to use memory-mapped files for loading the model, 
+            improving performance on large models. 
+            (Default: true)
           example: true
         use_mlock:
           type: boolean
-          description: 'Determines whether to use memory locking to prevent swapping the model out of RAM. (Default: false)'
+          description: |
+            Determines whether to use memory locking to prevent swapping the 
+            model out of RAM. 
+            (Default: false)
           example: false
         num_thread:
           type: integer
-          description: 'Specifies the number of threads to use for processing. A value of 0 uses all available threads. (Default: 0)'
+          description: |
+            Specifies the number of threads to use for processing. A value of 
+            0 uses all available threads. 
+            (Default: 0)
           example: 0

From eb67f20438b286ca78a69b5e8ad0c0a04436d55f Mon Sep 17 00:00:00 2001
From: Barna Szocs <barna.szocs@dextra.dev>
Date: Tue, 20 Aug 2024 10:24:04 +0300
Subject: [PATCH 4/4] Fix type of duration fields The duration fields return by
 ollama API are large, nanosecond based integers

---
 specs/openapi-3.1.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/specs/openapi-3.1.yaml b/specs/openapi-3.1.yaml
index dba03d79..b26fcfe6 100644
--- a/specs/openapi-3.1.yaml
+++ b/specs/openapi-3.1.yaml
@@ -524,21 +524,21 @@ components:
           type: number
           description: Total duration of the request
         load_duration:
-          type: string
+          type: number
           description: Load duration of the request
         prompt_eval_count:
           type: integer
           description: Count of prompt evaluations
         prompt_eval_duration:
-          type: string
+          type: number
           description: Duration of prompt evaluations
         eval_count:
           type: integer
           description: Count of evaluations
         eval_duration:
-          type: string
+          type: number
           description: Duration of evaluations
-          
+
     CreateRequest:
       type: object
       description: Request to create a model