# Chat Completions

## Create a chat completion

> This endpoint creates a chat completion using the specified model.

```json
{"openapi":"3.0.0","info":{"title":"Cortecs Chat Completions API","version":"1.0.0"},"servers":[{"url":"https://api.cortecs.ai/v1"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT"}},"schemas":{"CompletionRequest":{"type":"object","description":"A request object for generating chat completions and controlling router behavior. This object contains suggested parameters to generate a response from the specified model. Many of the parameters are optional, and it is recommended to set them only if needed; however, you may include other parameters as required. Note that not all providers support the same set of parameters. Adding unsupported or unnecessary parameters can cause requests to fail or limit the providers able to process them.\n","properties":{"preference":{"type":"string","description":"The provider preference for handling the request.","enum":["speed","cost","balanced"]},"allowed_providers":{"type":"array","items":{"type":"string"},"description":"The providers that are allowed to be used for the completion.","nullable":true},"eu_native":{"type":"boolean","description":"Whether to consider only providers based and regulated withing the EU. Even when false, all our endpoints are GDPR compliant.","nullable":true},"allow_quantization":{"type":"boolean","description":"Whether to allow quantized endpoints.","nullable":true},"allow_zero_data_retention":{"type":"boolean","description":"Whether to use only ZDR providers.","nullable":true},"enable_model_fallback":{"type":"boolean","description":"Whether to allow model fallback when a model is currently unavailable.","nullable":true},"model":{"type":"string","description":"The model to use for the completion."},"messages":{"type":"array","items":{"type":"object","properties":{"role":{"type":"string","description":"The role of the message sender."},"content":{"type":"string","description":"The content of the message.","nullable":true}}}},"temperature":{"type":"number","description":"Controls randomness in the output. Higher values make the output more random.","default":0.7,"nullable":true},"max_tokens":{"type":"integer","description":"The maximum number of tokens to generate in the completion. It can also be referred to as `max_completion_tokens`. The limit depends on the model’s context size — it can’t exceed the context size minus your prompt length.","nullable":true},"top_p":{"type":"number","description":"Controls diversity via nucleus sampling - only tokens whose cumulative probability mass exceeds top_p are considered for sampling. For example, 0.1 means only tokens comprising the top 10% probability mass are considered. An alternative to temperature sampling - we recommend altering either top_p or temperature, but not both.","nullable":true},"frequency_penalty":{"type":"number","description":"Reduces the probability of generating a token based on its frequency in the text so far. The more times a token has appeared in the text so far, the lower the probability of it appearing in the completion.","default":0,"nullable":true},"presence_penalty":{"type":"number","description":"Reduces the probability of generating a token based on whether it has already appeared in the text so far. If a token has already appeared in the text so far, the probability of it appearing in the completion is reduced.","default":0,"nullable":true},"response_format":{"type":"object","description":"Specifies the format of the response.","nullable":true},"stop":{"type":"array","description":"Sequences where the API will stop generating further tokens.","items":{"type":"string"},"nullable":true},"stream":{"type":"boolean","description":"Whether to stream the response. The last chunk will contain the usage information.","default":false,"nullable":true},"logprobs":{"oneOf":[{"type":"integer"},{"type":"boolean"}],"description":"Whether to return log probabilities of the output tokens.","nullable":true},"seed":{"type":"integer","description":"Random seed for reproducible results.","nullable":true},"tools":{"type":"array","description":"List of tools available to the model.","items":{"type":"object"},"nullable":true},"tool_choice":{"type":"string","description":"Controls which tool the model should use. Only set if tools is not empty.","nullable":true},"n":{"type":"integer","description":"Number of completions to generate.","nullable":true},"prediction":{"type":"object","description":"Specify expected results, optimizing response times by leveraging known or predictable content. This approach is especially effective for updating text documents or code files with minimal changes, reducing latency while maintaining high-quality results.","nullable":true},"parallel_tool_calls":{"type":"boolean","description":"Whether to allow parallel tool calls."},"safe_prompt":{"type":"boolean","description":"Whether to inject a safety prompt before all conversations."}}},"CompletionResponse":{"type":"object","properties":{"object":{"type":"string","description":"The type of object returned"},"id":{"type":"string","description":"The unique identifier for the completion"},"created":{"type":"integer","description":"The timestamp when the completion was created"},"provider":{"type":"string","description":"The provider that generated the completion"},"model":{"type":"string","description":"The model that generated the completion"},"choices":{"type":"array","items":{"type":"object","properties":{"index":{"type":"integer","description":"The index of the choice"},"message":{"type":"object","properties":{"role":{"type":"string","description":"The role of the message sender"},"content":{"type":"string","description":"The content of the message","nullable":true},"tool_calls":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string","description":"The ID of the tool call"},"type":{"type":"string","description":"The type of the tool call"},"function":{"type":"object","description":"The function details for the tool call"}}},"nullable":true},"reasoning_content":{"type":"string","description":"Additional reasoning content","nullable":true}}},"finish_reason":{"type":"string","description":"The reason why the completion finished (stop, length, tool_calls, etc.)"},"logprobs":{"type":"object","description":"Log probabilities of the output tokens","nullable":true}}}},"usage":{"type":"object","description":"Token usage information. In streaming responses, this will be included only in the last chunk.","properties":{"prompt_tokens":{"type":"integer","description":"Number of tokens in the prompt"},"completion_tokens":{"type":"integer","description":"Number of tokens in the completion"},"total_tokens":{"type":"integer","description":"Total number of tokens used"}}},"prompt_logprobs":{"type":"object","description":"Log probabilities of the prompt tokens","nullable":true}}}}},"paths":{"/chat/completions":{"post":{"summary":"Create a chat completion","description":"This endpoint creates a chat completion using the specified model.","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CompletionRequest"}}}},"responses":{"200":{"description":"A chat completion.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CompletionResponse"}}}},"500":{"description":"Internal server error."}}}}}}
```


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://docs.cortecs.ai/api-overview/chat-completions.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
