# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

from typing import List, Union, Optional
from typing_extensions import Literal

from ..._models import BaseModel
from ..shared.metadata import Metadata
from .conversation_item import ConversationItem
from .realtime_audio_formats import RealtimeAudioFormats
from .realtime_response_usage import RealtimeResponseUsage
from .realtime_response_status import RealtimeResponseStatus

__all__ = ["RealtimeResponse", "Audio", "AudioOutput"]


class AudioOutput(BaseModel):
    format: Optional[RealtimeAudioFormats] = None
    """The format of the output audio."""

    voice: Union[
        str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"], None
    ] = None
    """The voice the model uses to respond.

    Voice cannot be changed during the session once the model has responded with
    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
    `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. We recommend
    `marin` and `cedar` for best quality.
    """


class Audio(BaseModel):
    output: Optional[AudioOutput] = None


class RealtimeResponse(BaseModel):
    id: Optional[str] = None
    """The unique ID of the response, will look like `resp_1234`."""

    audio: Optional[Audio] = None
    """Configuration for audio output."""

    conversation_id: Optional[str] = None
    """
    Which conversation the response is added to, determined by the `conversation`
    field in the `response.create` event. If `auto`, the response will be added to
    the default conversation and the value of `conversation_id` will be an id like
    `conv_1234`. If `none`, the response will not be added to any conversation and
    the value of `conversation_id` will be `null`. If responses are being triggered
    automatically by VAD the response will be added to the default conversation
    """

    max_output_tokens: Union[int, Literal["inf"], None] = None
    """
    Maximum number of output tokens for a single assistant response, inclusive of
    tool calls, that was used in this response.
    """

    metadata: Optional[Metadata] = None
    """Set of 16 key-value pairs that can be attached to an object.

    This can be useful for storing additional information about the object in a
    structured format, and querying for objects via API or the dashboard.

    Keys are strings with a maximum length of 64 characters. Values are strings with
    a maximum length of 512 characters.
    """

    object: Optional[Literal["realtime.response"]] = None
    """The object type, must be `realtime.response`."""

    output: Optional[List[ConversationItem]] = None
    """The list of output items generated by the response."""

    output_modalities: Optional[List[Literal["text", "audio"]]] = None
    """
    The set of modalities the model used to respond, currently the only possible
    values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text
    transcript. Setting the output to mode `text` will disable audio output from the
    model.
    """

    status: Optional[Literal["completed", "cancelled", "failed", "incomplete", "in_progress"]] = None
    """
    The final status of the response (`completed`, `cancelled`, `failed`, or
    `incomplete`, `in_progress`).
    """

    status_details: Optional[RealtimeResponseStatus] = None
    """Additional details about the status."""

    usage: Optional[RealtimeResponseUsage] = None
    """Usage statistics for the Response, this will correspond to billing.

    A Realtime API session will maintain a conversation context and append new Items
    to the Conversation, thus output from previous turns (text and audio tokens)
    will become the input for later turns.
    """
