o
    ?h>                     @   s  d dl Z d dlZd dlZzddlmZ W n ey!   d dlZY nw zddlmZ W n ey7   d dlZY nw ejZej	Z	dZ
dZdZdZzejZW n eyZ   eeB eB ZY nw ejZejedf Zeje Zeje Zeje Zeje Zejej  Z!	 					d^d
ej"dededej#de$de%fddZ&							d_d
ej"dededej#de$de%fddZ'				d`d
ej"dededej#def
ddZ(	dad
ej"dedej#defddZ)		dbd
ej"de
de
dedej#f
ddZ*	 					dcd
ej"ded ed!ed"e$d#edej#fd$d%Z+	&dddddd	ddd'd
ej"d(edededej#de$fd)d*Z,dadefd+d,Z-d-ed.edefd/d0Z.d
ej"d1edefd2d3Z/de%fd4d5Z0de%fd6d7Z1d8ede2fd9d:Z3d8ede2fd;d<Z4d=ej5d-ede2fd>d?Z6d@dA Z7	 dBdC Z8dDdE Z9defdFdGZ:defdHdIZ;dJedefdKdLZ<dMe2dNedOe2dej=fdPdQZ>dMe2dNedej=fdRdSZ?dadTedUe%dej=fdVdWZ@dadMe2dNedXe%dej=fdYdZZAdMe2dNed[edej=fd\d]ZBdS )e    N   )pymupdf)mupdf
point_like	rect_likematrix_like	quad_likeFpageclipflagstextpagesortreturnc                 C   st   t |  |du rt j}|}|du r| j||d}nt|d| kr%td| }|du r.~|r8|jdd d |S )a_  Return the text blocks on a page.

    Notes:
        Lines in a block are concatenated with line breaks.
    Args:
        flags: (int) control the amount of data parsed into the textpage.
    Returns:
        A list of the blocks. Each item contains the containing rectangle
        coordinates, text lines, running block number and block type.
    Nr
   r   parentnot a textpage of this pagec                 S      | d | d fS N   r    )br   r   M/var/www/html/flask_server/venv/lib/python3.10/site-packages/pymupdf/utils.py<lambda>R       z!get_text_blocks.<locals>.<lambda>key)r   CheckParentTEXTFLAGS_BLOCKSget_textpagegetattr
ValueErrorextractBLOCKSr   )r	   r
   r   r   r   tpblocksr   r   r   get_text_blocks4   s   
r$   r   c           
         s   fdd}t |  |du rt j}|}|du r | j |d}nt|d| kr+td||}	|durF durFt    fdd|	D }	|du rK~|	rS|rS||	}	|	S )	a  Return the text words as a list with the bbox for each word.

    Args:
        page: pymupdf.Page
        clip: (rect-like) area on page to consider
        flags: (int) control the amount of data parsed into the textpage.
        textpage: (pymupdf.TextPage) either passed-in or None.
        sort: (bool) sort the words in reading sequence.
        delimiters: (str,list) characters to use as word delimiters.
        tolerance: (float) consider words to be part of the same line if
            top or bottom coordinate are not larger than this. Relevant
            only if sort=True.

    Returns:
        Word tuples (x0, y0, x1, y1, "word", bno, lno, wno).
    c                    s   | j dd d g }| d g}t| d dd }| dd D ];}t|dd }t|j|j  ks?t|j|j  krI|| ||O }q |j dd d || |g}|}q |j d	d d || |S )
z1Sort words line-wise, forgiving small deviations.c                 S   r   r   r   wr   r   r   r   r   r   z4get_text_words.<locals>.sort_words.<locals>.<lambda>r   r   N   r   c                 S      | d S Nr   r   r%   r   r   r   r          c                 S   r(   r)   r   r%   r   r   r   r      r*   )r   r   Rectabsy0y1appendextend)wordsnwordslinelrectr&   wrect)	tolerancer   r   
sort_wordsp   s"   




z"get_text_words.<locals>.sort_wordsNr   r   r   c              
      s>   g | ]}t  |d d @ dt t|d d  kr|qS )Nr'   g      ?)r,   r   r+   .0r&   )r
   r   r   
<listcomp>   s    8z"get_text_words.<locals>.<listcomp>)r   r   TEXTFLAGS_WORDSr   r   r    extractWORDSr+   )
r	   r
   r   r   r   
delimitersr6   r7   r"   r1   r   )r
   r6   r   get_text_wordsV   s(   



r>   c              	   C   s  dd }dd t | |||d|dD }|sdS t }|D ]\}}	||O }qg }
|d g}|d d }|d	d
 D ]=\}}	|d \}}t|j|j |ksWt|j|j |krc|||	f ||O }q9|||}|
||f ||	fg}|}q9|||}|
||f |
jdd d |
d d	 }	|
d d j}|
d	d
 D ]"\}}tt	t
|j| |j d}d|d	  }|	|| 7 }	|j}q|	S )a  Extract plain text avoiding unacceptable line breaks.

    Text contained in clip will be sorted in reading sequence. Some effort
    is also spent to simulate layout vertically and horizontally.

    Args:
        page: pymupdf.Page
        clip: (rect-like) only consider text inside
        flags: (int) text extraction flags
        textpage: pymupdf.TextPage
        tolerance: (float) consider words to be on the same line if their top
            or bottom coordinates do not differ more than this.

    Notes:
        If a TextPage is provided, all text is checked for being inside clip
        with at least 50% of its bbox.
        This allows to use some "global" TextPage in conjunction with sub-
        selecting words in parts of the defined TextPage rectangle.

    Returns:
        A text string in reading sequence. Left indentation of each line,
        inter-line and inter-word distances strive to reflect the layout.
    c                 S   s   |j dd d d}| j}t }|D ]2\}}||O }ttt|j| |j t| || jks5|j|kr7dnd}|d| | 7 }|j	}q|S )a  Create the string of one text line.

        We are trying to simulate some horizontal layout here, too.

        Args:
            clip: (pymupdf.Rect) the area from which all text is being read.
            line: (list) word tuples (rect, text) contained in the line
        Returns:
            Text in this line. Generated from words in 'line'. Distance from
            predecessor is translated to multiple spaces, thus simulating
            text indentations and large horizontal distances.
        c                 S   
   | d j S r)   )x0r%   r   r   r   r         
 z4get_sorted_text.<locals>.line_text.<locals>.<lambda>r    r   r    )
r   r@   r   
EMPTY_RECTmaxintroundwidthlenx1)r
   r3   ltextrJ   r4   rtdistr   r   r   	line_text   s   z"get_sorted_text.<locals>.line_textc                 S   s&   g | ]}t |d d |d fqS )Nr'   )r   r+   r8   r   r   r   r:      s    z#get_sorted_text.<locals>.<listcomp>T)r
   r   r   r   r6   rB   r   r   Nc                 S   r?   r)   )r.   )lr   r   r   r     rA   z!get_sorted_text.<locals>.<lambda>r      
)r>   r   rD   r,   r-   r.   r/   r   minrF   rG   height)r	   r
   r   r   r6   rO   r1   totalboxwrtextlinesr3   r4   w0r_rK   r.   distancebreaksr   r   r   get_sorted_text   sN   

(



r^   rectc                 C   sD   |}|d u r|   }nt|d| krtd||}|d u r ~|S )Nr   r   )r   r   r    extractTextbox)r	   r_   r   r"   rcr   r   r   get_textbox  s   

rb   p1p2c                 C   sX   t |  |}|d u r| j|t jd}nt|d| krtd|||}|d u r*~|S )Nr   r   r   )r   r   r   TEXT_DEHYPHENATEr   r    extractSelection)r	   rc   rd   r
   r   r"   ra   r   r   r   get_text_selection*  s   
rg   engH   languagedpifulltessdatac              
      sp  t |  t    fdd}|r|| |||S | j|d}| jdt jdd D ]}|d dkr2q)t |d }	|	jd	ksC|	jd	krDq)zTt 	|d
 }
|
j
|
j d	kr[t 	t j|
}
|
jrdt 	|
d}
t d|
j| d}|d}d}
|j}t d|j d|j }||d  }|j|d|d |  W q) ttjfy   	 d}t d || ||| Y   S w |S )as  Create a Textpage from combined results of normal and OCR text parsing.

    Args:
        flags: (int) control content becoming part of the result.
        language: (str) specify expected language(s). Default is "eng" (English).
        dpi: (int) resolution in dpi, default 72.
        full: (bool) whether to OCR the full page image, or only its images (default)
    c                    s   |d }t ||}| j|d}t d|jd| d}|d}| jj|jj }	t |	|	| j }
|j	||
d}|
  d }t| |_|S )Nri   )matrixpdfF)compressrj   rm   r   r   rn   )r   Matrix
get_pixmapDocumentpdfocr_tobytes	load_pager_   rH   derotation_matrixr   closeweakrefproxyr   )r	   rk   rj   r   zoommatpixocr_pdfocr_pageunzoomctmtpagerm   r   r   full_ocrP  s&   
z"get_textpage_ocr.<locals>.full_ocr)r   dictr#   typer   bboxr   imager   ro   )rj   rm   N	transformrq   zFalling back to full page OCR)r   r   get_tessdatar   get_textTEXT_PRESERVE_IMAGESr+   rH   rU   PixmapnalphacsRGBrt   ru   rv   r_   rr   extend_textpagerx   RuntimeErrorr   FzErrorBaseg_exceptions_verboseexception_infomessage)r	   r   rj   rk   rl   rm   r   r   blockr   r}   imgdocimgpageimgrectshrinkr|   r   r   r   get_textpage_ocr=  sH   



	r   rX   )r
   r   r   r   r=   r6   optionc                C   s  t jt jt jt jt jt jt jt jt jt jd
}|	 }||v s!J ||vr'd}|du r/|| }|dkr=t
| |||||dS |dkrJt| ||||dS |dkrY|rYt| ||||dS t |  d}	|d	v rg| j}|durst |}d}	n
t| t ju r}| j}	|}
|
du r| j||d
}
nt|
d| krtd|dkr|
j|	|d}nE|dkr|
j|	|d}n9|dkr|
j|	|d}n-|dkr|
j|	|d}n!|dkr|
 }n|dkr|
 }n|dkr|
 }n|
j|d}|du r~
|S )a  Extract text from a page or an annotation.

    This is a unifying wrapper for various methods of the pymupdf.TextPage class.

    Args:
        option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml.
        clip: (rect-like) restrict output to this area.
        flags: bit switches to e.g. exclude images or decompose ligatures.
        textpage: reuse this pymupdf.TextPage and make no new one. If specified,
            'flags' and 'clip' are ignored.

    Returns:
        the output of methods get_text_words / get_text_blocks or pymupdf.TextPage
        methods extractText, extractHTML, extractDICT, extractJSON, extractRAWDICT,
        extractXHTML or etractXML respectively.
        Default and misspelling choice is "text".
    )
rX   htmljsonrawjsonxmlxhtmlr   rawdictr1   r#   rX   Nr1   )r
   r   r   r   r=   r#   )r
   r   r   r   )r
   r   r   r6   )r   r   r   r   r   r   r   )cbr   r   r   r   r   r   r   )r   )r   TEXTFLAGS_TEXTTEXTFLAGS_HTMLTEXTFLAGS_DICTTEXTFLAGS_RAWDICTTEXTFLAGS_XMLTEXTFLAGS_XHTMLr;   r   lowerr>   r$   r^   r   cropboxr+   r   Pager   r   r    extractJSONextractRAWJSONextractDICTextractRAWDICTextractHTML
extractXMLextractXHTMLextractText)r	   r   r
   r   r   r   r=   r6   formatsr   r"   rM   r   r   r   r     s   





r   c                 C   s  t | tjr| |}nt | tjr| j}n
J dt| d|jdd}zt| dr1| j	|d< W n t
yC   tdkrAt  Y nw tdd}|jtj@ rU|jj|_|jtj@ r`|jj|_|jtjkrm|j|d< |S |jtjkr|j|d	< ||d
< |jtj@ r|jj|d< |S d|d< |S |jtjkr|jdd|d< |j|d	< |jdk r|j|d
< |S ||d
< |jtj@ r|jj|d< |S d|d< |S |jtjkr|jdd|d< |S |jtjkr|j  |  @ rJ |!|j d
|v rt|d
 |d
< |S |j|d	< |S )Nr   zUnexpected type(ln)=.)kindxrefr_   from   urir	   tor{   g        \/file)"
isinstancer   OutlinedestinationLinkdestr   r   hasattrr_   	Exceptionr   r   Pointr   LINK_FLAG_L_VALIDltxLINK_FLAG_T_VALIDyLINK_URIr   	LINK_GOTOr	   LINK_FLAG_R_IS_ZOOMrb
LINK_GOTOR	file_specreplaceLINK_LAUNCH
LINK_NAMEDnamedkeysupdate)lndocumentr   nlpntr   r   r   getLinkDict  sj   




"




r   r   ddictc                 C   s  |sdS dd }dd }dd }dd }dd }t |ttfv r)|| d|d}|S |d	tj}|tjkr7dS |d	 tjkr[|d
d}	|dtdd}
|
\}}|| |||	}|S |d	 tjkrm|t	|d }|S |d	 tj
krt	|d }|||}|S |d	 tjkr|d dk rt	|d }|t	|d ||}|S |d	 tjkr|d dkrt	|d }||d |d j|d j|d
 ||}|S dS )zrCalculate the PDF action string.

    Notes:
        Supports Link annotations and outline items (bookmarks).
    rB   c                 S   s   d|  dt |||f dS )Nz/A<</S/GoTo/D[z	 0 R/XYZ z]>>	_format_g)ar   cdr   r   r   r   A      zgetDestStr.<locals>.<lambda>c              	   S   s(   d|  dt |||f d| d| d	S )Nz/A<</S/GoToR/D[z /XYZ z]/F<</F/UF/Type/Filespec>>>>r   )r   r   r   r   efr   r   r   r   B  s   ( c                 S   s   d|  d| d| dS )Nz/A<</S/GoToR/Dz/F<</Fr   r   r   )r   r   r   r   r   r   r   C      c                 S   s   d|  d| dS )Nz/A<</S/Launch/F<</Fr   r   r   )r   r   r   r   r   r   D  s    c                 S   s   d|  dS )Nz/A<</S/URI/URIz>>r   )r   r   r   r   r   E  s    r   r   r{   r   r   r   r	   )r   rF   floatgetr   	LINK_NONEr   r   r   get_pdf_strr   r   r   r   )r   r   str_goto
str_gotor1
str_gotor2
str_launchstr_urir   d_kindd_zoomr   d_leftd_topfspecr   r   r   
getDestStr9  sT   

r   lnkc                 C   s  | j }| }|d }tt|| }d}|d tjkrh|d dkrXtjd }|d }| j|}	|dt	dd}
| j| }|j }| }|
| }||	|j
|j|dd|}ntjd	 }|t|d |}n|d tjkr|d dkrtjd
 }|dt	dd}
t|
tj	urt	dd}
||d |
j
|
j|dd|d |d |}n^tjd }|t|d |d |}nK|d tjkrtjd }||d |d |}n4|d tjkrtjd }||d |}n |d tjkrtjd }|d}|d u r|d }|||}|s|S tdd |  D }|dd}|r-|d |f| v r-|}nd}tj d }	 || }|| vrCn|d7 }q7|dd| }|S )Nr   rB   r   r	   r   goto1r   r{   goto2gotor1r   gotor2launchr   r   name	nameddestc                 S   s*   g | ]}|d  t jkr|d |d fqS )r   r   r   )r   PDF_ANNOT_LINKr9   r   r   r   r   r:     s   * zgetLinkText.<locals>.<listcomp>idr   z-L%iTr   z/Linkz/Link/NM(%s))transformation_matrixr   tupler   r   
annot_skelr   	page_xrefr   r   r   r   r   r   r   r   r   r   r   annot_xrefsitemsTOOLSset_annot_stemvaluesr   )r	   r   r   ictmrL   r_   annottxtpnor   r   	dest_pagedest_ctm	dest_ictmipntlname
link_namesold_namer   istemr   r   r   getLinkTexts  s   












r  c                   C   s   dd t  D S )zP
    Returns a list of upper-case colour names.
    :rtype: list of strings
    c                 S   s   g | ]\}}}}|qS r   r   )r9   r   rL   gr   r   r   r   r:     r   z getColorList.<locals>.<listcomp>r   colors_wx_listr   r   r   r   getColorList  s   r  c                   C   s   t  S )z
    Returns list of (name, red, gree, blue) tuples, where:
        name: upper-case color name.
        read, green, blue: integers in range 0..255.
    :rtype: list of tuples
    r  r   r   r   r   getColorInfoList  s   r   r   c                 C   s   t  |  dS )zRetrieve RGB color in PDF format by name.

    Returns:
        a triple of floats in range 0 to 1. In case of name-not-found, "white" is returned.
    )r   r   r   )r   colors_pdf_dictr   r   )r   r   r   r   getColor  s   r"  c                 C   s(  zt  t |   }W n ty   trt  Y dS w |d d }|d d }|d d }t|||}t	|d d}t
|||}|| }|dkrNd}	n(||kr]d|| | d	  }	n||krld|| | d  }	n
d|| | d
  }	tt	|	}
|dkrd}n|| }tt	|d }|
||fS )zRetrieve the hue, saturation, value triple of a color name.

    Returns:
        a triple (degree, percent, percent). If not found (-1, -1, -1) is returned.
    )rP   rP   rP   r   g     o@r   r   d   r   g      N@   r'   )r   r  indexupperr   r   r   r   rE   rG   rT   rF   )r   r   rL   r  r   cmaxVcmindeltahueHsatSr   r   r   getColorHSV  s4   
r/  docc           
      C   s   |  |\}}}}d}d}|dkr|||||fS |rZz#tj|d}|j}|j}|j}	|| dk r<|	j|k r8|	j}d| }W n tyR   t  |d9 }|d9 }Y nw |||||fS |dkrzt|}|j}|j}W n ty   t  |d9 }|d9 }Y n	w |d9 }|d9 }|||||fS )Ng?gɿrB   )
fontbufferr   g333333?zn/a)	extract_fontr   Fontascender	descenderr   r-   r   r   )
r0  r   fontnameextstypebufferascdscfontr   r   r   r   _get_font_properties  sF   


r=  c                 C   sB   d}d}| j j}	 |sn|d7 }||j7 }|j}q	d| d| S )Nr   r   z
num_spans=z num_chars=)
m_internalheadrI   next)rX   	num_spans	num_charsspanr   r   r   _show_fz_text;  s   
rD  c                 C   s   | \}}|dd  ddd }|ddd}d}t|D ]A\}} |r&d}q| d	kr5||d  |d
< d}q| drM| dd dddd}||d< q| dr^t| dd }||d< q|S )a"  Make a Python dict from a PDF page label rule.

    Args:
        item -- a tuple (pno, rule) with the start page number and the rule
                string like <</S/D...>>.
    Returns:
        A dict like
        {'startpage': int, 'prefix': str, 'style': str, 'firstpagenum': int}.
    r   r   r   NrB   )	startpageprefixfirstpagenumFr.  styleTP()rG  StrH  )split	enumerate
startswithr   rF   )itemr  ruler   skipr  r   r   r   r   	rule_dict^  s*   

rT  c                    sj    fdd|D d }t |}|dd}|dd}|dv r!dnd} |d	  |d
  | }t|||S )zReturn the label for this page number.

    Args:
        pgNo: page number, 0-based.
        labels: result of doc._get_page_labels().
    Returns:
        The label (str) of the page number. Errors return an empty string.
    c                    s   g | ]
}|d   kr|qS )r   r   r  pgNor   r   r:     r   z!get_label_pno.<locals>.<listcomp>rP   rG  rB   rI  )r   Ar   rF  rH  )rT  r   construct_label)rV  labelsrQ  rR  rG  rI  r*  
pagenumberr   rU  r   get_label_pno  s   r[  c                 C   sx   d}| dkrt |}n+| dkrt| }n | dkr!t| }n| dkr,t| }n
| dkr6t| }|| }|S )z9Construct a label based on style, prefix and page number.rB   DrL   Rr   rW  )strintegerToRomanr   r&  integerToLetter)rI  rG  r  n_strresultr   r   r   rX    s   
rX  c           	      C   s   ddl }|j}d| }}td||kr(|ttd|8 }|d7 }td||ksd}tt|D ]}t|ttd|\}}||| 7 }|}q0|S )z-Returns letter sequence string for integer i.r   Nr      rB   )stringascii_uppercasepowrF   mathreversedrangedivmod)	r  rd  lsr   r   str_tjr   r  r   r   r   r`    s   
r`  numc                    s(   d  fdd}d dd || D S )z$Return roman numeral for an integer.))i  M)i  CM)i  r\  )i  CD)r#  C)Z   XC)2   L)(   XL)
   X)	   IX)rR   r(  )r'   IV)r   Ic                 3   sF     D ]\}}t | |\}}|| V  | || 8 } | dkr  d S qd S r)   )rj  )rn  rL   ltrr   r[   romanr   r   	roman_num  s   
z!integerToRoman.<locals>.roman_numrB   c                 S   s   g | ]}|qS r   r   )r9   r   r   r   r   r:     r   z"integerToRoman.<locals>.<listcomp>)join)rn  r  r   r  r   r_    s   r_  line_dirrC  r   c                 C   s  | du r|d } | \}}t |}t j rd}n|d |d  }||d  }|| }|| }|dkrT|dkrT|jd|f }	|j|df }
|j|df }|jd|f }nf|dkry|dkry|j|df }	|jd|f }
|jd|f }|j|df }nA|dkr|dkr|jd|f }	|j|df }
|j|df }|jd|f }n|j|df }	|jd|f }
|jd|f }|j|df }t |	|
||S )a  Compute the quad located inside the bbox.

    The bbox may be any of the resp. tuples occurring inside the given span.

    Args:
        line_dir: (tuple) 'line["dir"]' of the owning line or None.
        span: (dict) the span. May be from get_texttrace() method.
        bbox: (tuple) the bbox of the span or any of its characters.
    Returns:
        The quad which is wrapped by the bbox.
    Ndirr   r4  r5  sizer   )	r   r+   r  set_small_glyph_heightsbltrbrtlQuad)r  rC  r   cossinr   rU   hshculurlllrr   r   r   recover_bbox_quad  s<   

r  c                 C   sD   t | tust| dkrtdt |turtdt| ||d S )zRecover the quadrilateral of a text span.

    Args:
        line_dir: (tuple) 'line["dir"]' of the owning line.
        span: the span.
    Returns:
        The quadrilateral enveloping the span's text.
    r   bad line dir argumentbad span argumentr   )r   r  rI   r    r   r  )r  rC  r   r   r   recover_quad  s
   	r  r3   spansc                    s   |du r| d }t |dkrtd| d }|\}}t||d }t |dkr/t||d }n|}|j}|j}t||}	||	 }
tj  t	 fdd	|D }t
d| |
jd}|j}||	 9 }|S )
a  Calculate the line quad for 'dict' / 'rawdict' text extractions.

    The lower quad points are those of the first, resp. last span quad.
    The upper points are determined by the maximum span quad height.
    From this, compute a rect with bottom-left in (0, 0), convert this to a
    quad and rotate and shift back to cover the text of the spans.

    Args:
        spans: (list, optional) sub-list of spans to consider.
    Returns:
        pymupdf.Quad covering selected spans.
    Nr  r   zbad span listr  r   rP   c                    s,   g | ]}|d   rdn|d |d   qS )r  r   r4  r5  r   )r9   ssmallr   r   r:   B  s   , z%recover_line_quad.<locals>.<listcomp>)rI   r    r  r  r  r   planish_liner  r  rE   r+   r   quad)r3   r  r  r  r  q0q1line_llline_lrmat0x_lrh	line_rect	line_quadr   r  r   recover_line_quad  s,   

r  charsc                 C   s   | du r|d } |du rt | |S d| vrtdt| ||d }t|dkr2t| ||d }n|}|j}|j}t||}|| }tj	
 }	|d |	rPdn|d	 |d
   }
td|
 |jd}|j}|| 9 }|S )a^  Calculate the span quad for 'dict' / 'rawdict' text extractions.

    Notes:
        There are two execution paths:
        1. For the full span quad, the result of 'recover_quad' is returned.
        2. For the quad of a sub-list of characters, the char quads are
           computed and joined. This is only supported for the "rawdict"
           extraction option.

    Args:
        line_dir: (tuple) 'line["dir"]' of the owning line.
        span: (dict) the span.
        chars: (list, optional) sub-list of characters to consider.
    Returns:
        pymupdf.Quad covering selected characters.
    Nr  r  z)need 'rawdict' option to sub-select charsr   r   rP   r  r4  r5  )r  r   r    recover_char_quadrI   r  r  r   r  r  r  r+   r   r  )r  rC  r  r  r  span_llspan_lrr  r  r  r  	span_rect	span_quadr   r   r   recover_span_quadK  s(   

 
r  charc                 C   s   | du r|d } t | tust| dkrtdt |tur"tdt |tu r0t|d }nt |tu r>t|d }ntdt| ||S )aD  Recover the quadrilateral of a text character.

    This requires the "rawdict" option of text extraction.

    Args:
        line_dir: (tuple) 'line["dir"]' of the span's line.
        span: (dict) the span dict.
        char: (dict) the character dict.
    Returns:
        The quadrilateral enveloping the character.
    Nr  r   r  r  r   r   )r   r  rI   r    r   r   r+   r  )r  rC  r  r   r   r   r   r  x  s   r  )NNNF)NNNFNr   )NNNr   )N)NN)r   rh   ri   FN)rX   )Crg  typingry   rB   r   r   r   format_gr   r   r   r   r   r   
ByteStringAttributeErrorbytes	bytearray
memoryviewAnyAnyTypeUnionrF   OptIntOptionalr   OptFloatr^  OptStrr   OptDictOptBytesSequenceOptSeqr   TextPageboollistr$   r>   r^   rb   rg   r   r   r   r   r  r  r   r  r"  r/  rt   r=  rD  rT  r[  rX  r`  r_  r  r  r  r  r  r  r   r   r   r   <module>   sN  





$
Q
x


V
m;:a
	'%""2- -