o
    $ip                     @  s  U d dl mZ d dlZd dlmZmZmZmZmZm	Z	 d dl
mZmZmZ d dlZddlmZ ddlmZmZmZmZmZmZmZmZmZ ddlmZmZmZmZm Z  dd	l!m"Z" dd
l#m$Z$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z? ddgZ@eAdZBdeCd< G dd de$ZDG dd de%ZEG dd dZFG d d! d!ZGG d"d# d#ZHG d$d% d%ZId,d*d+ZJdS )-    )annotationsN)TYPE_CHECKINGListUnionMappingOptionalcast)Literaloverloadassert_never   )_legacy_response)	BodyOmitQueryHeadersNotGiven	FileTypesSequenceNotStromit	not_given)extract_filesrequired_argsmaybe_transformdeepcopy_minimalasync_maybe_transform)cached_property)SyncAPIResourceAsyncAPIResource)to_streamed_response_wrapper"async_to_streamed_response_wrapper)StreamAsyncStream)transcription_create_params)make_request_options)
AudioModel)Transcription)AudioResponseFormat)TranscriptionInclude)TranscriptionVerbose)TranscriptionDiarized)TranscriptionStreamEvent)TranscriptionCreateResponseTranscriptionsAsyncTranscriptionszopenai.audio.transcriptionszlogging.Loggerlogc                   @  sX  e Zd ZedCddZedDddZeeeeeeeeeddded	dEd&d'Z	eeeeeeeddded(
dFd+d'Z	eeeeeeeddded(
dGd.d'Z	eeeeeeeddded/
dHd5d'Z	eeeeeeeeeeddded6dId:d'Z	eeeeeeeeeeddded6dJd=d'Z	e
d
dgg d>eeeeeeeeeeddded?dKdBd'Z	dS )Lr-   returnTranscriptionsWithRawResponsec                 C     t | S a  
        This property can be used as a prefix for any HTTP method call to return
        the raw response object instead of the parsed content.

        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
        )r1   self r6   e/var/www/html/flask_server/venv/lib/python3.10/site-packages/openai/resources/audio/transcriptions.pywith_raw_response-      z Transcriptions.with_raw_response#TranscriptionsWithStreamingResponsec                 C  r2   z
        An alternative to `.with_raw_response` that doesn't eagerly read the response body.

        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
        )r:   r4   r6   r6   r7   with_streaming_response7      z&Transcriptions.with_streaming_responseN)chunking_strategyincludelanguagepromptresponse_formatstreamtemperaturetimestamp_granularitiesextra_headersextra_query
extra_bodytimeoutfiler   modelUnion[str, AudioModel]r>   =Optional[transcription_create_params.ChunkingStrategy] | Omitr?   !List[TranscriptionInclude] | Omitr@   
str | OmitrA   rB   Union[Literal['json'], Omit]rC   Optional[Literal[False]] | OmitrD   float | OmitrE   'List[Literal['word', 'segment']] | OmitrF   Headers | NonerG   Query | NonerH   Body | NonerI   'float | httpx.Timeout | None | NotGivenr&   c                C     dS )a  
        Transcribes audio into the input language.

        Args:
          file:
              The audio file object (not file name) to transcribe, in one of these formats:
              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.

          model: ID of the model to use. The options are `gpt-4o-transcribe`,
              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
              Whisper V2 model).

          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
              first normalizes loudness and then uses voice activity detection (VAD) to choose
              boundaries. `server_vad` object can be provided to tweak VAD detection
              parameters manually. If unset, the audio is transcribed as a single block.

          include: Additional information to include in the transcription response. `logprobs` will
              return the log probabilities of the tokens in the response to understand the
              model's confidence in the transcription. `logprobs` only works with
              response_format set to `json` and only with the models `gpt-4o-transcribe` and
              `gpt-4o-mini-transcribe`.

          language: The language of the input audio. Supplying the input language in
              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
              format will improve accuracy and latency.

          prompt: An optional text to guide the model's style or continue a previous audio
              segment. The
              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
              should match the audio language.

          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
              the only supported format is `json`.

          stream: If set to true, the model response data will be streamed to the client as it is
              generated using
              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
              See the
              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
              for more information.

              Note: Streaming is not supported for the `whisper-1` model and will be ignored.

          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
              output more random, while lower values like 0.2 will make it more focused and
              deterministic. If set to 0, the model will use
              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
              automatically increase the temperature until certain thresholds are hit.

          timestamp_granularities: The timestamp granularities to populate for this transcription.
              `response_format` must be set `verbose_json` to use timestamp granularities.
              Either or both of these options are supported: `word`, or `segment`. Note: There
              is no additional latency for segment timestamps, but generating word timestamps
              incurs additional latency.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request
        Nr6   )r5   rJ   rK   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   r6   r6   r7   create@   s    zTranscriptions.create
r>   r?   r@   rA   rD   rE   rF   rG   rH   rI   Literal['verbose_json']r)   c                C     d S Nr6   r5   rJ   rK   r>   r?   rB   r@   rA   rD   rE   rF   rG   rH   rI   r6   r6   r7   rY         Literal['text', 'srt', 'vtt']strc                C  r\   r]   r6   )r5   rJ   rK   r>   rB   r?   r@   rA   rD   rE   rF   rG   rH   rI   r6   r6   r7   rY      r_   )
r>   known_speaker_namesknown_speaker_referencesr@   rD   rE   rF   rG   rH   rI   Literal['diarized_json']rb   SequenceNotStr[str] | Omitrc   r*   c                C  r\   r]   r6   )r5   rJ   rK   r>   rB   rb   rc   r@   rD   rE   rF   rG   rH   rI   r6   r6   r7   rY      r_   r>   r?   rb   rc   r@   rA   rB   rD   rE   rF   rG   rH   rI   Literal[True] Union[AudioResponseFormat, Omit] Stream[TranscriptionStreamEvent]c                C  rX   aS  
        Transcribes audio into the input language.

        Args:
          file:
              The audio file object (not file name) to transcribe, in one of these formats:
              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.

          model: ID of the model to use. The options are `gpt-4o-transcribe`,
              `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
              Whisper V2 model), and `gpt-4o-transcribe-diarize`.

          stream: If set to true, the model response data will be streamed to the client as it is
              generated using
              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
              See the
              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
              for more information.

              Note: Streaming is not supported for the `whisper-1` model and will be ignored.

          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
              first normalizes loudness and then uses voice activity detection (VAD) to choose
              boundaries. `server_vad` object can be provided to tweak VAD detection
              parameters manually. If unset, the audio is transcribed as a single block.
              Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
              seconds.

          include: Additional information to include in the transcription response. `logprobs` will
              return the log probabilities of the tokens in the response to understand the
              model's confidence in the transcription. `logprobs` only works with
              response_format set to `json` and only with the models `gpt-4o-transcribe` and
              `gpt-4o-mini-transcribe`. This field is not supported when using
              `gpt-4o-transcribe-diarize`.

          known_speaker_names: Optional list of speaker names that correspond to the audio samples provided in
              `known_speaker_references[]`. Each entry should be a short identifier (for
              example `customer` or `agent`). Up to 4 speakers are supported.

          known_speaker_references: Optional list of audio samples (as
              [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
              that contain known speaker references matching `known_speaker_names[]`. Each
              sample must be between 2 and 10 seconds, and can use any of the same input audio
              formats supported by `file`.

          language: The language of the input audio. Supplying the input language in
              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
              format will improve accuracy and latency.

          prompt: An optional text to guide the model's style or continue a previous audio
              segment. The
              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
              should match the audio language. This field is not supported when using
              `gpt-4o-transcribe-diarize`.

          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
              `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
              `gpt-4o-mini-transcribe`, the only supported format is `json`. For
              `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
              `diarized_json`, with `diarized_json` required to receive speaker annotations.

          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
              output more random, while lower values like 0.2 will make it more focused and
              deterministic. If set to 0, the model will use
              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
              automatically increase the temperature until certain thresholds are hit.

          timestamp_granularities: The timestamp granularities to populate for this transcription.
              `response_format` must be set `verbose_json` to use timestamp granularities.
              Either or both of these options are supported: `word`, or `segment`. Note: There
              is no additional latency for segment timestamps, but generating word timestamps
              incurs additional latency. This option is not available for
              `gpt-4o-transcribe-diarize`.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        Nr6   r5   rJ   rK   rC   r>   r?   rb   rc   r@   rA   rB   rD   rE   rF   rG   rH   rI   r6   r6   r7   rY         ibool>TranscriptionCreateResponse | Stream[TranscriptionStreamEvent]c                C  rX   rj   r6   rk   r6   r6   r7   rY   =  rl   rJ   rK   rC   r>   r?   rb   rc   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   /Optional[Literal[False]] | Literal[True] | Omitestr | Transcription | TranscriptionDiarized | TranscriptionVerbose | Stream[TranscriptionStreamEvent]c                C  s   t |||||||||	|
||d}tttttf |dggd}ddi|p&i }| jdt||
r2tj	ntj
|t||||dt|	|
pCdtt d	S 
N)rJ   rK   r>   r?   rb   rc   r@   rA   rB   rC   rD   rE   rJ   )pathszContent-Typezmultipart/form-dataz/audio/transcriptions)rF   rG   rH   rI   F)bodyfilesoptionscast_torC   
stream_cls)r   r   r   r   ra   object_postr   r#   "TranscriptionCreateParamsStreaming%TranscriptionCreateParamsNonStreamingr$   _get_response_format_typer!   r+   r5   rJ   rK   r>   r?   rb   rc   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   ru   rv   r6   r6   r7   rY     sB   )r0   r1   )r0   r:   )rJ   r   rK   rL   r>   rM   r?   rN   r@   rO   rA   rO   rB   rP   rC   rQ   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   r&   rJ   r   rK   rL   r>   rM   r?   rN   rB   r[   r@   rO   rA   rO   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   r)   )rJ   r   rK   rL   r>   rM   rB   r`   r?   rN   r@   rO   rA   rO   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   ra   )rJ   r   rK   rL   r>   rM   rB   rd   rb   re   rc   re   r@   rO   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   r*   )"rJ   r   rK   rL   rC   rg   r>   rM   r?   rN   rb   re   rc   re   r@   rO   rA   rO   rB   rh   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   ri   )"rJ   r   rK   rL   rC   rm   r>   rM   r?   rN   rb   re   rc   re   r@   rO   rA   rO   rB   rh   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   rn   )"rJ   r   rK   rL   r>   rM   r?   rN   rb   re   rc   re   r@   rO   rA   rO   rB   rh   rC   rq   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   rr   __name__
__module____qualname__r   r8   r<   r
   r   r   rY   r   r6   r6   r6   r7   r-   ,   s    	Rjjc                   @  s6  e Zd Zed>ddZed?ddZeeeeeeeeeeeddded	d@d)d*Z	eeeeeeeddded+
dAd.d*Z	eeeeeeeddded+
dBd1d*Z	eeeeeeeeeeddded2dCd6d*Z	eeeeeeeeeeddded2dDd9d*Z	e
d
dgg d:eeeeeeeeeeddded	dEd=d*Z	dS )Fr.   r0   "AsyncTranscriptionsWithRawResponsec                 C  r2   r3   )r   r4   r6   r6   r7   r8     r9   z%AsyncTranscriptions.with_raw_response(AsyncTranscriptionsWithStreamingResponsec                 C  r2   r;   )r   r4   r6   r6   r7   r<     r=   z+AsyncTranscriptions.with_streaming_responseNrp   rJ   r   rK   rL   r>   rM   r?   rN   rb   re   rc   r@   rO   rA   rB   rP   rC   rQ   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r,   c                     dS )a  
        Transcribes audio into the input language.

        Args:
          file:
              The audio file object (not file name) to transcribe, in one of these formats:
              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.

          model: ID of the model to use. The options are `gpt-4o-transcribe`,
              `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
              Whisper V2 model), and `gpt-4o-transcribe-diarize`.

          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
              first normalizes loudness and then uses voice activity detection (VAD) to choose
              boundaries. `server_vad` object can be provided to tweak VAD detection
              parameters manually. If unset, the audio is transcribed as a single block.
              Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
              seconds.

          include: Additional information to include in the transcription response. `logprobs` will
              return the log probabilities of the tokens in the response to understand the
              model's confidence in the transcription. `logprobs` only works with
              response_format set to `json` and only with the models `gpt-4o-transcribe` and
              `gpt-4o-mini-transcribe`. This field is not supported when using
              `gpt-4o-transcribe-diarize`.

          known_speaker_names: Optional list of speaker names that correspond to the audio samples provided in
              `known_speaker_references[]`. Each entry should be a short identifier (for
              example `customer` or `agent`). Up to 4 speakers are supported.

          known_speaker_references: Optional list of audio samples (as
              [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
              that contain known speaker references matching `known_speaker_names[]`. Each
              sample must be between 2 and 10 seconds, and can use any of the same input audio
              formats supported by `file`.

          language: The language of the input audio. Supplying the input language in
              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
              format will improve accuracy and latency.

          prompt: An optional text to guide the model's style or continue a previous audio
              segment. The
              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
              should match the audio language. This field is not supported when using
              `gpt-4o-transcribe-diarize`.

          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
              `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
              `gpt-4o-mini-transcribe`, the only supported format is `json`. For
              `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
              `diarized_json`, with `diarized_json` required to receive speaker annotations.

          stream: If set to true, the model response data will be streamed to the client as it is
              generated using
              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
              See the
              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
              for more information.

              Note: Streaming is not supported for the `whisper-1` model and will be ignored.

          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
              output more random, while lower values like 0.2 will make it more focused and
              deterministic. If set to 0, the model will use
              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
              automatically increase the temperature until certain thresholds are hit.

          timestamp_granularities: The timestamp granularities to populate for this transcription.
              `response_format` must be set `verbose_json` to use timestamp granularities.
              Either or both of these options are supported: `word`, or `segment`. Note: There
              is no additional latency for segment timestamps, but generating word timestamps
              incurs additional latency. This option is not available for
              `gpt-4o-transcribe-diarize`.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request
        Nr6   )r5   rJ   rK   r>   r?   rb   rc   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   r6   r6   r7   rY     s    zAsyncTranscriptions.createrZ   r[   r)   c                     d S r]   r6   r^   r6   r6   r7   rY   `     r`   ra   c                  r   r]   r6   r^   r6   r6   r7   rY   u  r   rf   rg   rh   %AsyncStream[TranscriptionStreamEvent]c                  r   rj   r6   rk   r6   r6   r7   rY        irm   CTranscriptionCreateResponse | AsyncStream[TranscriptionStreamEvent]c                  r   rj   r6   rk   r6   r6   r7   rY     r   ro   rq   jTranscription | TranscriptionVerbose | TranscriptionDiarized | str | AsyncStream[TranscriptionStreamEvent]c                  s   t |||||||||	|
||d}tttttf |dggd}ddi|p'i }| jdt||
r3tj	ntj
I d H |t||||dt|	|
pGdtt d	I d H S rs   )r   r   r   r   ra   rz   r{   r   r#   r|   r}   r$   r~   r"   r+   r   r6   r6   r7   rY   `  sD   )r0   r   )r0   r   )"rJ   r   rK   rL   r>   rM   r?   rN   rb   re   rc   re   r@   rO   rA   rO   rB   rP   rC   rQ   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   r,   r   )rJ   r   rK   rL   r>   rM   r?   rN   rB   r`   r@   rO   rA   rO   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   ra   )"rJ   r   rK   rL   rC   rg   r>   rM   r?   rN   rb   re   rc   re   r@   rO   rA   rO   rB   rh   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   r   )"rJ   r   rK   rL   rC   rm   r>   rM   r?   rN   rb   re   rc   re   r@   rO   rA   rO   rB   rh   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   r   )"rJ   r   rK   rL   r>   rM   r?   rN   rb   re   rc   re   r@   rO   rA   rO   rB   rh   rC   rq   rD   rR   rE   rS   rF   rT   rG   rU   rH   rV   rI   rW   r0   r   r   r6   r6   r6   r7   r.     s    	ejjc                   @     e Zd ZdddZdS )	r1   transcriptionsr-   r0   Nonec                 C     || _ t|j| _d S r]   )_transcriptionsr   to_raw_response_wrapperrY   r5   r   r6   r6   r7   __init__     
z&TranscriptionsWithRawResponse.__init__Nr   r-   r0   r   r   r   r   r   r6   r6   r6   r7   r1         r1   c                   @  r   )	r   r   r.   r0   r   c                 C  r   r]   )r   r   async_to_raw_response_wrapperrY   r   r6   r6   r7   r     r   z+AsyncTranscriptionsWithRawResponse.__init__Nr   r.   r0   r   r   r6   r6   r6   r7   r     r   r   c                   @  r   )	r:   r   r-   r0   r   c                 C     || _ t|j| _d S r]   )r   r   rY   r   r6   r6   r7   r        
z,TranscriptionsWithStreamingResponse.__init__Nr   r   r6   r6   r6   r7   r:     r   r:   c                   @  r   )	r   r   r.   r0   r   c                 C  r   r]   )r   r    rY   r   r6   r6   r7   r     r   z1AsyncTranscriptionsWithStreamingResponse.__init__Nr   r   r6   r6   r6   r7   r     r   r   rB   AudioResponseFormat | Omitr0   Htype[Transcription | TranscriptionVerbose | TranscriptionDiarized | str]c                 C  sv   t | ts	| d u rtS | dkrtS | dkrtS | dkrtS | dks)| dks)| dkr+tS tr3t|  d S t	d|  tS )Njsonverbose_jsondiarized_jsonsrttextvttz$Unexpected audio response format: %s)

isinstancer   r&   r)   r*   ra   r   r   r/   warn)rB   r6   r6   r7   r~     s   r~   )rB   r   r0   r   )K
__future__r   loggingtypingr   r   r   r   r   r   typing_extensionsr	   r
   r   httpx r   _typesr   r   r   r   r   r   r   r   r   _utilsr   r   r   r   r   _compatr   	_resourcer   r   	_responser   r    
_streamingr!   r"   types.audior#   _base_clientr$   types.audio_modelr%   types.audio.transcriptionr&   types.audio_response_formatr'   !types.audio.transcription_includer(   !types.audio.transcription_verboser)   "types.audio.transcription_diarizedr*   &types.audio.transcription_stream_eventr+   )types.audio.transcription_create_responser,   __all__	getLoggerr/   __annotations__r-   r.   r1   r   r:   r   r~   r6   r6   r6   r7   <module>   sJ    ,   =   ;				