File: //lib64/python3.6/html/__pycache__/parser.cpython-36.opt-1.pyc
3
\AE @ s d Z ddlZddlZddlZddlmZ dgZejdZejdZ ejdZ
ejdZejd Zejd
Z
ejdZejdZejd
ZejdejZejd
ZejdZG dd dejZdS )zA parser for HTML and XHTML. N)unescape
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF
<[a-zA-Z][^\t\n\r\f />\x00]* # tag name
(?:[\s/]* # optional whitespace before attribute name
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
(?:\s*=+\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|"[^"]*" # LIT-enclosed value
|(?!['"])[^>\s]* # bare value
)
(?:\s*,)* # possibly followed by a comma
)?(?:\s|/(?!>))*
)*
)?
\s* # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c @ s e Zd ZdZd:ZddddZdd Zd
d Zdd
ZdZ dd Z
dd Zdd Zdd Z
dd Zd;ddZdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 ZdS )<r aE Find tags and other markup and call handler functions.
Usage:
p = HTMLParser()
p.feed(data)
...
p.close()
Start tags are handled by calling self.handle_starttag() or
self.handle_startendtag(); end tags by self.handle_endtag(). The
data between tags is passed from the parser to the derived class
by calling self.handle_data() with the data as argument (the data
may be split up in arbitrary chunks). If convert_charrefs is
True the character references are converted automatically to the
corresponding Unicode character (and self.handle_data() is no
longer split in chunks), otherwise they are passed by calling
self.handle_entityref() or self.handle_charref() with the string
containing respectively the named or numeric reference as the
argument.
scriptstyleT)convert_charrefsc C s || _ | j dS )zInitialize and reset this instance.
If convert_charrefs is True (the default), all character references
are automatically converted to the corresponding Unicode characters.
N)r reset)selfr r
#/usr/lib64/python3.6/html/parser.py__init__W s zHTMLParser.__init__c C s( d| _ d| _t| _d| _tjj| dS )z1Reset this instance. Loses all unprocessed data. z???N)rawdatalasttaginteresting_normalinteresting
cdata_elem_markupbase
ParserBaser )r r
r
r r ` s
zHTMLParser.resetc C s | j | | _ | jd dS )zFeed data to the parser.
Call this as often as you want, with as little or as much text
as you want (may include '\n').
r N)r goahead)r datar
r
r feedh s zHTMLParser.feedc C s | j d dS )zHandle any buffered data. N)r )r r
r
r closeq s zHTMLParser.closeNc C s | j S )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_text)r r
r
r get_starttag_textw s zHTMLParser.get_starttag_textc C s$ |j | _tjd| j tj| _d S )Nz</\s*%s\s*>)lowerr recompileIr )r elemr
r
r set_cdata_mode{ s
zHTMLParser.set_cdata_modec C s t | _d | _d S )N)r r r )r r
r
r clear_cdata_mode s zHTMLParser.clear_cdata_modec C sL | j }d}t|}x||k r| jr|| j r||jd|}|dk r|jdt||d }|dkrvtjdj || rvP |}n(| j
j ||}|r|j }n| jrP |}||k r| jr| j r| jt
||| n| j||| | j||}||krP |j}|d|rLtj||r&| j|} n|d|r>| j|} nl|d|rV| j|} nT|d|rn| j|} n<|d |r| j|} n$|d
|k r| jd |d
} nP | dk r>|sP |jd|d
} | dk r|jd|d
} | dk r|d
} n| d
7 } | jr,| j r,| jt
||| n| j||| | j|| }q|d|rtj||}|r|j d
d }
| j|
|j } |d| d
s| d
} | j|| }qn:d||d kr| j|||d
| j||d
}P q|d|rtj||}|rN|jd
}
| j|
|j } |d| d
s@| d
} | j|| }qtj||}|r|r|j ||d kr|j } | |kr|} | j||d
}P n,|d
|k r| jd | j||d
}nP qqW |r:||k r:| j r:| jr| j r| jt
||| n| j||| | j||}||d | _ d S )Nr <&"