From 2afab9ed57666d7f3d46706c46a203e7553095b4 Mon Sep 17 00:00:00 2001 From: InformationSecurity <1241112575@qq.com> Date: Sat, 16 Jul 2016 07:23:25 +0800 Subject: [PATCH] Add files via upload --- BruteXSS/mechanize/__init__.py | 211 ++ BruteXSS/mechanize/__init__.pyc | Bin 0 -> 5028 bytes BruteXSS/mechanize/_auth.py | 68 + BruteXSS/mechanize/_auth.pyc | Bin 0 -> 2713 bytes BruteXSS/mechanize/_beautifulsoup.py | 1077 +++++++ BruteXSS/mechanize/_beautifulsoup.pyc | Bin 0 -> 41319 bytes BruteXSS/mechanize/_clientcookie.py | 1725 +++++++++++ BruteXSS/mechanize/_clientcookie.pyc | Bin 0 -> 58063 bytes BruteXSS/mechanize/_debug.py | 28 + BruteXSS/mechanize/_debug.pyc | Bin 0 -> 1448 bytes BruteXSS/mechanize/_firefox3cookiejar.py | 248 ++ BruteXSS/mechanize/_firefox3cookiejar.pyc | Bin 0 -> 8683 bytes BruteXSS/mechanize/_form.py | 3280 +++++++++++++++++++++ BruteXSS/mechanize/_form.pyc | Bin 0 -> 111356 bytes BruteXSS/mechanize/_gzip.py | 105 + BruteXSS/mechanize/_gzip.pyc | Bin 0 -> 4357 bytes BruteXSS/mechanize/_headersutil.py | 241 ++ BruteXSS/mechanize/_headersutil.pyc | Bin 0 -> 8161 bytes BruteXSS/mechanize/_html.py | 629 ++++ BruteXSS/mechanize/_html.pyc | Bin 0 -> 21523 bytes BruteXSS/mechanize/_http.py | 447 +++ BruteXSS/mechanize/_http.pyc | Bin 0 -> 15836 bytes BruteXSS/mechanize/_lwpcookiejar.py | 185 ++ BruteXSS/mechanize/_lwpcookiejar.pyc | Bin 0 -> 6215 bytes BruteXSS/mechanize/_markupbase.py | 393 +++ BruteXSS/mechanize/_mechanize.py | 669 +++++ BruteXSS/mechanize/_mechanize.pyc | Bin 0 -> 24074 bytes BruteXSS/mechanize/_mozillacookiejar.py | 161 + BruteXSS/mechanize/_mozillacookiejar.pyc | Bin 0 -> 5050 bytes BruteXSS/mechanize/_msiecookiejar.py | 388 +++ BruteXSS/mechanize/_msiecookiejar.pyc | Bin 0 -> 12112 bytes BruteXSS/mechanize/_opener.py | 442 +++ BruteXSS/mechanize/_opener.pyc | Bin 0 -> 12362 bytes BruteXSS/mechanize/_pullparser.py | 391 +++ BruteXSS/mechanize/_pullparser.pyc | Bin 0 -> 17006 bytes BruteXSS/mechanize/_request.py | 40 + BruteXSS/mechanize/_request.pyc | Bin 0 -> 1519 bytes BruteXSS/mechanize/_response.py | 525 ++++ BruteXSS/mechanize/_response.pyc | Bin 0 -> 16438 bytes BruteXSS/mechanize/_rfc3986.py | 245 ++ BruteXSS/mechanize/_rfc3986.pyc | Bin 0 -> 4470 bytes BruteXSS/mechanize/_sgmllib_copy.py | 559 ++++ BruteXSS/mechanize/_sgmllib_copy.pyc | Bin 0 -> 15533 bytes BruteXSS/mechanize/_sockettimeout.py | 6 + BruteXSS/mechanize/_sockettimeout.pyc | Bin 0 -> 260 bytes BruteXSS/mechanize/_testcase.py | 162 + BruteXSS/mechanize/_urllib2.py | 50 + BruteXSS/mechanize/_urllib2.pyc | Bin 0 -> 1645 bytes BruteXSS/mechanize/_urllib2_fork.py | 1414 +++++++++ BruteXSS/mechanize/_urllib2_fork.pyc | Bin 0 -> 44422 bytes BruteXSS/mechanize/_useragent.py | 367 +++ BruteXSS/mechanize/_useragent.pyc | Bin 0 -> 13708 bytes BruteXSS/mechanize/_util.py | 305 ++ BruteXSS/mechanize/_util.pyc | Bin 0 -> 9990 bytes BruteXSS/mechanize/_version.py | 2 + BruteXSS/mechanize/_version.pyc | Bin 0 -> 216 bytes 56 files changed, 14363 insertions(+) create mode 100644 BruteXSS/mechanize/__init__.py create mode 100644 BruteXSS/mechanize/__init__.pyc create mode 100644 BruteXSS/mechanize/_auth.py create mode 100644 BruteXSS/mechanize/_auth.pyc create mode 100644 BruteXSS/mechanize/_beautifulsoup.py create mode 100644 BruteXSS/mechanize/_beautifulsoup.pyc create mode 100644 BruteXSS/mechanize/_clientcookie.py create mode 100644 BruteXSS/mechanize/_clientcookie.pyc create mode 100644 BruteXSS/mechanize/_debug.py create mode 100644 BruteXSS/mechanize/_debug.pyc create mode 100644 BruteXSS/mechanize/_firefox3cookiejar.py create mode 100644 BruteXSS/mechanize/_firefox3cookiejar.pyc create mode 100644 BruteXSS/mechanize/_form.py create mode 100644 BruteXSS/mechanize/_form.pyc create mode 100644 BruteXSS/mechanize/_gzip.py create mode 100644 BruteXSS/mechanize/_gzip.pyc create mode 100644 BruteXSS/mechanize/_headersutil.py create mode 100644 BruteXSS/mechanize/_headersutil.pyc create mode 100644 BruteXSS/mechanize/_html.py create mode 100644 BruteXSS/mechanize/_html.pyc create mode 100644 BruteXSS/mechanize/_http.py create mode 100644 BruteXSS/mechanize/_http.pyc create mode 100644 BruteXSS/mechanize/_lwpcookiejar.py create mode 100644 BruteXSS/mechanize/_lwpcookiejar.pyc create mode 100644 BruteXSS/mechanize/_markupbase.py create mode 100644 BruteXSS/mechanize/_mechanize.py create mode 100644 BruteXSS/mechanize/_mechanize.pyc create mode 100644 BruteXSS/mechanize/_mozillacookiejar.py create mode 100644 BruteXSS/mechanize/_mozillacookiejar.pyc create mode 100644 BruteXSS/mechanize/_msiecookiejar.py create mode 100644 BruteXSS/mechanize/_msiecookiejar.pyc create mode 100644 BruteXSS/mechanize/_opener.py create mode 100644 BruteXSS/mechanize/_opener.pyc create mode 100644 BruteXSS/mechanize/_pullparser.py create mode 100644 BruteXSS/mechanize/_pullparser.pyc create mode 100644 BruteXSS/mechanize/_request.py create mode 100644 BruteXSS/mechanize/_request.pyc create mode 100644 BruteXSS/mechanize/_response.py create mode 100644 BruteXSS/mechanize/_response.pyc create mode 100644 BruteXSS/mechanize/_rfc3986.py create mode 100644 BruteXSS/mechanize/_rfc3986.pyc create mode 100644 BruteXSS/mechanize/_sgmllib_copy.py create mode 100644 BruteXSS/mechanize/_sgmllib_copy.pyc create mode 100644 BruteXSS/mechanize/_sockettimeout.py create mode 100644 BruteXSS/mechanize/_sockettimeout.pyc create mode 100644 BruteXSS/mechanize/_testcase.py create mode 100644 BruteXSS/mechanize/_urllib2.py create mode 100644 BruteXSS/mechanize/_urllib2.pyc create mode 100644 BruteXSS/mechanize/_urllib2_fork.py create mode 100644 BruteXSS/mechanize/_urllib2_fork.pyc create mode 100644 BruteXSS/mechanize/_useragent.py create mode 100644 BruteXSS/mechanize/_useragent.pyc create mode 100644 BruteXSS/mechanize/_util.py create mode 100644 BruteXSS/mechanize/_util.pyc create mode 100644 BruteXSS/mechanize/_version.py create mode 100644 BruteXSS/mechanize/_version.pyc diff --git a/BruteXSS/mechanize/__init__.py b/BruteXSS/mechanize/__init__.py new file mode 100644 index 0000000..c4429be --- /dev/null +++ b/BruteXSS/mechanize/__init__.py @@ -0,0 +1,211 @@ +__all__ = [ + 'AbstractBasicAuthHandler', + 'AbstractDigestAuthHandler', + 'BaseHandler', + 'Browser', + 'BrowserStateError', + 'CacheFTPHandler', + 'ContentTooShortError', + 'Cookie', + 'CookieJar', + 'CookiePolicy', + 'DefaultCookiePolicy', + 'DefaultFactory', + 'FTPHandler', + 'Factory', + 'FileCookieJar', + 'FileHandler', + 'FormNotFoundError', + 'FormsFactory', + 'HTTPBasicAuthHandler', + 'HTTPCookieProcessor', + 'HTTPDefaultErrorHandler', + 'HTTPDigestAuthHandler', + 'HTTPEquivProcessor', + 'HTTPError', + 'HTTPErrorProcessor', + 'HTTPHandler', + 'HTTPPasswordMgr', + 'HTTPPasswordMgrWithDefaultRealm', + 'HTTPProxyPasswordMgr', + 'HTTPRedirectDebugProcessor', + 'HTTPRedirectHandler', + 'HTTPRefererProcessor', + 'HTTPRefreshProcessor', + 'HTTPResponseDebugProcessor', + 'HTTPRobotRulesProcessor', + 'HTTPSClientCertMgr', + 'HeadParser', + 'History', + 'LWPCookieJar', + 'Link', + 'LinkNotFoundError', + 'LinksFactory', + 'LoadError', + 'MSIECookieJar', + 'MozillaCookieJar', + 'OpenerDirector', + 'OpenerFactory', + 'ParseError', + 'ProxyBasicAuthHandler', + 'ProxyDigestAuthHandler', + 'ProxyHandler', + 'Request', + 'RobotExclusionError', + 'RobustFactory', + 'RobustFormsFactory', + 'RobustLinksFactory', + 'RobustTitleFactory', + 'SeekableResponseOpener', + 'TitleFactory', + 'URLError', + 'USE_BARE_EXCEPT', + 'UnknownHandler', + 'UserAgent', + 'UserAgentBase', + 'XHTMLCompatibleHeadParser', + '__version__', + 'build_opener', + 'install_opener', + 'lwp_cookie_str', + 'make_response', + 'request_host', + 'response_seek_wrapper', # XXX deprecate in public interface? + 'seek_wrapped_response', # XXX should probably use this internally in place of response_seek_wrapper() + 'str2time', + 'urlopen', + 'urlretrieve', + 'urljoin', + + # ClientForm API + 'AmbiguityError', + 'ControlNotFoundError', + 'FormParser', + 'ItemCountError', + 'ItemNotFoundError', + 'LocateError', + 'Missing', + 'ParseFile', + 'ParseFileEx', + 'ParseResponse', + 'ParseResponseEx', + 'ParseString', + 'XHTMLCompatibleFormParser', + # deprecated + 'CheckboxControl', + 'Control', + 'FileControl', + 'HTMLForm', + 'HiddenControl', + 'IgnoreControl', + 'ImageControl', + 'IsindexControl', + 'Item', + 'Label', + 'ListControl', + 'PasswordControl', + 'RadioControl', + 'ScalarControl', + 'SelectControl', + 'SubmitButtonControl', + 'SubmitControl', + 'TextControl', + 'TextareaControl', + ] + +import logging +import sys + +from _version import __version__ + +# high-level stateful browser-style interface +from _mechanize import \ + Browser, History, \ + BrowserStateError, LinkNotFoundError, FormNotFoundError + +# configurable URL-opener interface +from _useragent import UserAgentBase, UserAgent +from _html import \ + Link, \ + Factory, DefaultFactory, RobustFactory, \ + FormsFactory, LinksFactory, TitleFactory, \ + RobustFormsFactory, RobustLinksFactory, RobustTitleFactory + +# urllib2 work-alike interface. This is a superset of the urllib2 interface. +from _urllib2 import * +import _urllib2 +if hasattr(_urllib2, "HTTPSHandler"): + __all__.append("HTTPSHandler") +del _urllib2 + +# misc +from _http import HeadParser +from _http import XHTMLCompatibleHeadParser +from _opener import ContentTooShortError, OpenerFactory, urlretrieve +from _response import \ + response_seek_wrapper, seek_wrapped_response, make_response +from _rfc3986 import urljoin +from _util import http2time as str2time + +# cookies +from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \ + CookieJar, FileCookieJar, LoadError, request_host_lc as request_host, \ + effective_request_host +from _lwpcookiejar import LWPCookieJar, lwp_cookie_str +# 2.4 raises SyntaxError due to generator / try/finally use +if sys.version_info[:2] > (2,4): + try: + import sqlite3 + except ImportError: + pass + else: + from _firefox3cookiejar import Firefox3CookieJar +from _mozillacookiejar import MozillaCookieJar +from _msiecookiejar import MSIECookieJar + +# forms +from _form import ( + AmbiguityError, + ControlNotFoundError, + FormParser, + ItemCountError, + ItemNotFoundError, + LocateError, + Missing, + ParseError, + ParseFile, + ParseFileEx, + ParseResponse, + ParseResponseEx, + ParseString, + XHTMLCompatibleFormParser, + # deprecated + CheckboxControl, + Control, + FileControl, + HTMLForm, + HiddenControl, + IgnoreControl, + ImageControl, + IsindexControl, + Item, + Label, + ListControl, + PasswordControl, + RadioControl, + ScalarControl, + SelectControl, + SubmitButtonControl, + SubmitControl, + TextControl, + TextareaControl, + ) + +# If you hate the idea of turning bugs into warnings, do: +# import mechanize; mechanize.USE_BARE_EXCEPT = False +USE_BARE_EXCEPT = True + +logger = logging.getLogger("mechanize") +if logger.level is logging.NOTSET: + logger.setLevel(logging.CRITICAL) +del logger diff --git a/BruteXSS/mechanize/__init__.pyc b/BruteXSS/mechanize/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c294ffff6717fcd1fe69b9828dc91b284be0b210 GIT binary patch literal 5028 zcmdT{=~f%d5&nemhQVNC<0am&7rf!^dbwP0SO|kH7LSC%ZS3(#)kp)=GceP#!AVY* zo4i5(Lan8xV#LGu_b$4|wUsVtJe-8A~-~TpSL+g(+KVO*tiS7l+ z{L6uQKzUFvXb)&FXdh@l=m4k>bP&`J8UP&v4T6S1he1a`!=R&}W1tbxanK3SNzf_K zZ$PI(XFz8`=RoH{7eE(5KY%WQE`zRsu7XBE*Fe`nH$XQ*w?Jc{+n_t3yP$iZ`=B2| z4?qt=k3f$>KY_+UKZBlto`Rl%o`YV1CO|Ji1<)j@2$}-D0!@QvK(9e>Kqb&DXbvpmk7y zx`Hr~RG<(+4#I>q9rr+(*qU*KAP?0G(Tg0A2LEVS=5$XZNgY@L@ z5Ilr>1n~&!F~no!WDnL1p?;6^^cSr6A}-{&4Y$**jYEuMDLjl``7_r05Z}({a=Fby zewjD?0l^cfrw~tVP+0Uw1ka$JLp)FK`xAl}P!kXn>HNlr7NWqf zZR=qpn3SttGth;I547ZmI`S#wUKDTZJ|pV7oK4P_26*fTGF8DBv4bqhOl7C0qFBvE zdMav$l-kMWur$dOu}9OD%EGR2vYoupGBS>8GD(cT$6NB24_0k<=do;USNM;#R*IWV z|I0#K8OMq*NsVi7;`nzqv1hy~yM+_O^93(SzD6<4)?Va z?e4>;T8AA;ek^NxQdXPw&iz>=vgOQPdAd>Z!*$Dd z9_*$DOWV}o#T;hbYcpV|rcz#2MGM==tG>qYssVE~ITWu{) z^Uk!fN!dlhk!)pmL9=`IP?iq%-I1G3Q~0*XY!r)IwV;{!QJCJurWkoM(U~b6*OsKa zEIQ&HA?b)Kz7Axjgv~5Vx$ad1>Buy%2wp57JhNpdFMi(i#eq8s9j5!Xj zYfY*){eaxaCd%IEhl%!rpj#At-EeERMY*Qs+G~}!E?vWE%U-J}P9yF%A`^B_WVY%i zCLr$D*xT4J#N?GO0d!@w;2a)mu+Zf97`Sh@=JSdE%HNa@r0`S^`@`4 z(@=8JachpFU?;C_7d5$Ul?aD-MoU#NO|jLyxmB`g=Q$2rDXO*K1LoJPpCo=*w>{qK z$kG9anR`G_v=?lBUptsXxb-|VYZzMVWu8Nv3Wt#16)rt%B!~pHc-Y}lI;t*gK(hq9y2I; zRT)?>N~YsvYM9YkZ_Q+^+VRMbG6COSu6cnM?~usAG~Z0ZUANq)1mv8G!Yr_;G^z>=}MY~7%yxr52oRTF>z z3s<;pqqeBiU|iwTlLmZ_jnDB){j;ALrqi!RTld?qKx*R6Y?E_?M;8n@vkY?#^9&0N zZy6khGQ(|#3d179JBB5OWrp_*=JzHz9~f2`J~Dh_a2Y-`cnnnr{w3BS2Fb9>P-kc` z_zY_d>kNSb8&M@|4KMV+N$Y=?*LDX7*mb#Iy0!{~s9xt9<`gE|iLJh@<66SmhuUo` z{Vw&=eNEHrO(ll9Lqc0}ZbPf!zh`1Z_kIlfYe$W+Nny9`!b*AIQ| zx?>yLR@oC3k@@a@#!V(UWg1;R_dohK|6RL!zPr-p%Yl4$_?at&4^|HxVpDzFn1(3oclL + +This code is free software; you can redistribute it and/or modify it under +the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt +included with the distribution). + +""" + +from _urllib2_fork import HTTPPasswordMgr + + +# TODO: stop deriving from HTTPPasswordMgr +class HTTPProxyPasswordMgr(HTTPPasswordMgr): + # has default realm and host/port + def add_password(self, realm, uri, user, passwd): + # uri could be a single URI or a sequence + if uri is None or isinstance(uri, basestring): + uris = [uri] + else: + uris = uri + passwd_by_domain = self.passwd.setdefault(realm, {}) + for uri in uris: + for default_port in True, False: + reduced_uri = self.reduce_uri(uri, default_port) + passwd_by_domain[reduced_uri] = (user, passwd) + + def find_user_password(self, realm, authuri): + attempts = [(realm, authuri), (None, authuri)] + # bleh, want default realm to take precedence over default + # URI/authority, hence this outer loop + for default_uri in False, True: + for realm, authuri in attempts: + authinfo_by_domain = self.passwd.get(realm, {}) + for default_port in True, False: + reduced_authuri = self.reduce_uri(authuri, default_port) + for uri, authinfo in authinfo_by_domain.iteritems(): + if uri is None and not default_uri: + continue + if self.is_suburi(uri, reduced_authuri): + return authinfo + user, password = None, None + + if user is not None: + break + return user, password + + def reduce_uri(self, uri, default_port=True): + if uri is None: + return None + return HTTPPasswordMgr.reduce_uri(self, uri, default_port) + + def is_suburi(self, base, test): + if base is None: + # default to the proxy's host/port + hostport, path = test + base = (hostport, "/") + return HTTPPasswordMgr.is_suburi(self, base, test) + + +class HTTPSClientCertMgr(HTTPPasswordMgr): + # implementation inheritance: this is not a proper subclass + def add_key_cert(self, uri, key_file, cert_file): + self.add_password(None, uri, key_file, cert_file) + def find_key_cert(self, authuri): + return HTTPPasswordMgr.find_user_password(self, None, authuri) diff --git a/BruteXSS/mechanize/_auth.pyc b/BruteXSS/mechanize/_auth.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef8116e51c1c056c5847570297e1da2357c27474 GIT binary patch literal 2713 zcmb7GOK%%h6h1S4#Bp5_^(CNyMk-a^O;WNbQd{+vsHIgKqH(0CQL5==W*kpy&p30h zRR=i|QrazI!G^!Uk|kSy0lx)_EqvdZ*g-r5v2xFx`#6t#zVF_f%CBde@%P_7kL37Q z!S_Czc?1!QY(Y8_yV7@L;L4UOPF#`1l`clB@>b+X)wNr?IoE_qtK>#=7m}8trz+Dz%!%aFfZ(4v)Nq9hX+Nv+q2&CjT<+;r+F{) zo-TUNlEk~Sx7WWv%y;tfVw4Z=Hk%v0)Ob-ICmz1t0!vKZwQs^Ax$Pa~BQFXwuSnw5 z*dpB-SuC>*x|A2*Adl1T0S87|oD@wY=Gmke7%%U7>^$tOdRVu${>)om{LJg8QIeU& zcx{6)T+mJXiMR6Q`im!Pj~DIOHq$KXkK!cu-lVptOJyJ*t&FG47KHOPK_X4nart0LnalORJmdT#|^o&zb%{lPh=B` zbhJWi${owB*YYf}O(>~JGh@RnD!V%&00FewE`o^+S;|-!n#9IQHyrh?I@l;i2~rYy z6!tMiSD?ixN`g_5wpkBdW|Dqamla9aA6T9T%9G7SJn020I5qoDae@ zvn>oBM-Iqndj zikzbrql$akX}NXhqI1?c@0@cYX3EHOXo~7@P=F5#jtZlsrLqgGDl-0sAOm5A=Vv*r z%J?VZvHJ2y*~Sv!4OspTb4|wI&<@sW`ZbZ~I*>wfBp2pn{DfTCtMhhPeR*45Hso-g zFqbqdejxTOeD>c-hV0-2>;Ng09mPAD%u5Gzn;fS1Lr{88G}B-sG$(&{mOZ8=c4lG1IRjf)Bvkm&O*s2R)~jB`E&&e_;d{40+kME zu%|pJiFQ<(9bB50Rfi&4t9?4mjx6nDs@)}u5 z_Ebo5@Ec?TLqKm(?w^uZ=3qi(sZ3XigRb}&A=-?kRM0zsmyaqIcaS32%Z=4D&xD4d z?VVW%nR1ux?N27;m=0k|&U*01;JuGCzs<5$)D`;xO zqp$yLlq3R5M^*fraq4^>=Fgu15 zC*p^VnQPYPp;%;%+jW}wU#X83$DnA=6lKnrpq(Z$UIBWFr>> Null("send", "a", "message")("and one more", + ... "and what you get still") is Null + True + """ + + def __new__(cls): return Null + def __call__(self, *args, **kwargs): return Null +## def __getstate__(self, *args): return Null + def __getattr__(self, attr): return Null + def __getitem__(self, item): return Null + def __setattr__(self, attr, value): pass + def __setitem__(self, item, value): pass + def __len__(self): return 0 + # FIXME: is this a python bug? otherwise ``for x in Null: pass`` + # never terminates... + def __iter__(self): return iter([]) + def __contains__(self, item): return False + def __repr__(self): return "Null" +Null = object.__new__(NullType) + +class PageElement: + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=Null, previous=Null): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = Null + self.previousSibling = Null + self.nextSibling = Null + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def findNext(self, name=None, attrs={}, text=None): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._first(self.fetchNext, name, attrs, text) + firstNext = findNext + + def fetchNext(self, name=None, attrs={}, text=None, limit=None): + """Returns all items that match the given criteria and appear + before after Tag in the document.""" + return self._fetch(name, attrs, text, limit, self.nextGenerator) + + def findNextSibling(self, name=None, attrs={}, text=None): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._first(self.fetchNextSiblings, name, attrs, text) + firstNextSibling = findNextSibling + + def fetchNextSiblings(self, name=None, attrs={}, text=None, limit=None): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._fetch(name, attrs, text, limit, self.nextSiblingGenerator) + + def findPrevious(self, name=None, attrs={}, text=None): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._first(self.fetchPrevious, name, attrs, text) + + def fetchPrevious(self, name=None, attrs={}, text=None, limit=None): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._fetch(name, attrs, text, limit, self.previousGenerator) + firstPrevious = findPrevious + + def findPreviousSibling(self, name=None, attrs={}, text=None): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._first(self.fetchPreviousSiblings, name, attrs, text) + firstPreviousSibling = findPreviousSibling + + def fetchPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._fetch(name, attrs, text, limit, + self.previousSiblingGenerator) + + def findParent(self, name=None, attrs={}): + """Returns the closest parent of this Tag that matches the given + criteria.""" + r = Null + l = self.fetchParents(name, attrs, 1) + if l: + r = l[0] + return r + firstParent = findParent + + def fetchParents(self, name=None, attrs={}, limit=None): + """Returns the parents of this Tag that match the given + criteria.""" + return self._fetch(name, attrs, None, limit, self.parentGenerator) + + #These methods do the real heavy lifting. + + def _first(self, method, name, attrs, text): + r = Null + l = method(name, attrs, text, 1) + if l: + r = l[0] + return r + + def _fetch(self, name, attrs, text, limit, generator): + "Iterates over a generator looking for things that match." + if not hasattr(attrs, 'items'): + attrs = {'class' : attrs} + + results = [] + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + found = None + if isinstance(i, Tag): + if not text: + if not name or self._matches(i, name): + match = True + for attr, matchAgainst in attrs.items(): + check = i.get(attr) + if not self._matches(check, matchAgainst): + match = False + break + if match: + found = i + elif text: + if self._matches(i, text): + found = i + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #Generators that can be used to navigate starting from both + #NavigableTexts and Tags. + def nextGenerator(self): + i = self + while i: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i: + i = i.parent + yield i + + def _matches(self, chunk, howToMatch): + #print 'looking for %s in %s' % (howToMatch, chunk) + # + # If given a list of items, return true if the list contains a + # text element that matches. + if isList(chunk) and not isinstance(chunk, Tag): + for tag in chunk: + if isinstance(tag, NavigableText) and self._matches(tag, howToMatch): + return True + return False + if callable(howToMatch): + return howToMatch(chunk) + if isinstance(chunk, Tag): + #Custom match methods take the tag as an argument, but all other + #ways of matching match the tag name as a string + chunk = chunk.name + #Now we know that chunk is a string + if not isinstance(chunk, basestring): + chunk = str(chunk) + if hasattr(howToMatch, 'match'): + # It's a regexp object. + return howToMatch.search(chunk) + if isList(howToMatch): + return chunk in howToMatch + if hasattr(howToMatch, 'items'): + return howToMatch.has_key(chunk) + #It's just a string + return str(howToMatch) == chunk + +class NavigableText(PageElement): + + def __getattr__(self, attr): + "For backwards compatibility, text.string gives you text" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + +class NavigableString(str, NavigableText): + pass + +class NavigableUnicodeString(unicode, NavigableText): + pass + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def __init__(self, name, attrs=None, parent=Null, previous=Null): + "Basic constructor." + self.name = name + if attrs == None: + attrs = [] + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + fetch() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.fetch, args, kwargs) + + def __getattr__(self, tag): + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.first(tag[:-3]) + elif tag.find('__') != 0: + return self.first(tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self): + """Renders this tag as a string.""" + return str(self) + + def __unicode__(self): + return self.__str__(1) + + def __str__(self, needUnicode=None, showStructureIndent=None): + """Returns a string or Unicode representation of this tag and + its contents. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + attrs = [] + if self.attrs: + for key, val in self.attrs: + attrs.append('%s="%s"' % (key, val)) + close = '' + closeTag = '' + if self.isSelfClosing(): + close = ' /' + else: + closeTag = '' % self.name + indentIncrement = None + if showStructureIndent != None: + indentIncrement = showStructureIndent + if not self.hidden: + indentIncrement += 1 + contents = self.renderContents(indentIncrement, needUnicode=needUnicode) + if showStructureIndent: + space = '\n%s' % (' ' * showStructureIndent) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if showStructureIndent: + s.append(space) + s.append('<%s%s%s>' % (self.name, attributeString, close)) + s.append(contents) + if closeTag and showStructureIndent != None: + s.append(space) + s.append(closeTag) + s = ''.join(s) + isUnicode = type(s) == types.UnicodeType + if needUnicode and not isUnicode: + s = unicode(s) + elif isUnicode and needUnicode==False: + s = str(s) + return s + + def prettify(self, needUnicode=None): + return self.__str__(needUnicode, showStructureIndent=True) + + def renderContents(self, showStructureIndent=None, needUnicode=None): + """Renders the contents of this tag as a (possibly Unicode) + string.""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableUnicodeString) or type(c) == types.UnicodeType: + text = unicode(c) + elif isinstance(c, Tag): + s.append(c.__str__(needUnicode, showStructureIndent)) + elif needUnicode: + text = unicode(c) + else: + text = str(c) + if text: + if showStructureIndent != None: + if text[-1] == '\n': + text = text[:-1] + s.append(text) + return ''.join(s) + + #Soup methods + + def firstText(self, text, recursive=True): + """Convenience method to retrieve the first piece of text matching the + given criteria. 'text' can be a string, a regular expression object, + a callable that takes a string and returns whether or not the + string 'matches', etc.""" + return self.first(recursive=recursive, text=text) + + def fetchText(self, text, recursive=True, limit=None): + """Convenience method to retrieve all pieces of text matching the + given criteria. 'text' can be a string, a regular expression object, + a callable that takes a string and returns whether or not the + string 'matches', etc.""" + return self.fetch(recursive=recursive, text=text, limit=limit) + + def first(self, name=None, attrs={}, recursive=True, text=None): + """Return only the first child of this + Tag matching the given criteria.""" + r = Null + l = self.fetch(name, attrs, recursive, text, 1) + if l: + r = l[0] + return r + findChild = first + + def fetch(self, name=None, attrs={}, recursive=True, text=None, + limit=None): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._fetch(name, attrs, text, limit, generator) + fetchChildren = fetch + + #Utility methods + + def isSelfClosing(self): + """Returns true iff this is a self-closing tag as defined in the HTML + standard. + + TODO: This is specific to BeautifulSoup and its subclasses, but it's + used by __str__""" + return self.name in BeautifulSoup.SELF_CLOSING_TAGS + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.contents.append(tag) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + for i in range(0, len(self.contents)): + yield self.contents[i] + raise StopIteration + + def recursiveChildGenerator(self): + stack = [(self, 0)] + while stack: + tag, start = stack.pop() + if isinstance(tag, Tag): + for i in range(start, len(tag.contents)): + a = tag.contents[i] + yield a + if isinstance(a, Tag) and tag.contents: + if i < len(tag.contents) - 1: + stack.append((tag, i+1)) + stack.append((a, 0)) + break + raise StopIteration + + +def isList(l): + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is listlike.""" + return hasattr(l, '__iter__') \ + or (type(l) in (types.ListType, types.TupleType)) + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS and NESTABLE_TAGS maps out + of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif isList(portion): + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and fetch code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + + #As a public service we will by default silently replace MS smart quotes + #and similar characters with their HTML or ASCII equivalents. + MS_CHARS = { '\x80' : '€', + '\x81' : ' ', + '\x82' : '‚', + '\x83' : 'ƒ', + '\x84' : '„', + '\x85' : '…', + '\x86' : '†', + '\x87' : '‡', + '\x88' : '⁁', + '\x89' : '%', + '\x8A' : 'Š', + '\x8B' : '<', + '\x8C' : 'Œ', + '\x8D' : '?', + '\x8E' : 'Z', + '\x8F' : '?', + '\x90' : '?', + '\x91' : '‘', + '\x92' : '’', + '\x93' : '“', + '\x94' : '”', + '\x95' : '•', + '\x96' : '–', + '\x97' : '—', + '\x98' : '˜', + '\x99' : '™', + '\x9a' : 'š', + '\x9b' : '>', + '\x9c' : 'œ', + '\x9d' : '?', + '\x9e' : 'z', + '\x9f' : 'Ÿ',} + + PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda(x):x.group(1) + ' />'), + (re.compile(']*)>'), + lambda(x):''), + (re.compile("([\x80-\x9f])"), + lambda(x): BeautifulStoneSoup.MS_CHARS.get(x.group(1))) + ] + + ROOT_TAG_NAME = '[document]' + + def __init__(self, text=None, avoidParserProblems=True, + initialTextIsEverything=True): + """Initialize this as the 'root tag' and feed in any text to + the parser. + + NOTE about avoidParserProblems: sgmllib will process most bad + HTML, and BeautifulSoup has tricks for dealing with some HTML + that kills sgmllib, but Beautiful Soup can nonetheless choke + or lose data if your data uses self-closing tags or + declarations incorrectly. By default, Beautiful Soup sanitizes + its input to avoid the vast majority of these problems. The + problems are relatively rare, even in bad HTML, so feel free + to pass in False to avoidParserProblems if they don't apply to + you, and you'll get better performance. The only reason I have + this turned on by default is so I don't get so many tech + support questions. + + The two most common instances of invalid HTML that will choke + sgmllib are fixed by the default parser massage techniques: + +
(No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + Tag.__init__(self, self.ROOT_TAG_NAME) + if avoidParserProblems \ + and not isList(avoidParserProblems): + avoidParserProblems = self.PARSER_MASSAGE + self.avoidParserProblems = avoidParserProblems + SGMLParser.__init__(self) + self.quoteStack = [] + self.hidden = 1 + self.reset() + if hasattr(text, 'read'): + #It's a file-type object. + text = text.read() + if text: + self.feed(text) + if initialTextIsEverything: + self.done() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + if methodName.find('start_') == 0 or methodName.find('end_') == 0 \ + or methodName.find('do_') == 0: + return SGMLParser.__getattr__(self, methodName) + elif methodName.find('__') != 0: + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def feed(self, text): + if self.avoidParserProblems: + for fix, m in self.avoidParserProblems: + text = fix.sub(m, text) + SGMLParser.feed(self, text) + + def done(self): + """Called when you're done parsing, so that the unclosed tags can be + correctly processed.""" + self.endData() #NEW + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def reset(self): + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + # Tags with just one string-owning child get the child as a + # 'string' property, so that soup.tag.string is shorthand for + # soup.tag.contents[0] + if len(self.currentTag.contents) == 1 and \ + isinstance(self.currentTag.contents[0], NavigableText): + self.currentTag.string = self.currentTag.contents[0] + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self): + currentData = ''.join(self.currentData) + if currentData: + if not currentData.strip(): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + c = NavigableString + if type(currentData) == types.UnicodeType: + c = NavigableUnicodeString + o = c(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + self.currentData = [] + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

FooBar

should pop to 'p', not 'b'. +

FooBar

should pop to 'table', not 'p'. +

Foo

Bar

should pop to 'tr', not 'p'. +

FooBar

should pop to 'p', not 'b'. + +

    • *
    • * should pop to 'ul', not the first 'li'. +
  • ** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
    ** should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers != None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers == None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s" % name + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + if not name in self.SELF_CLOSING_TAGS and not selfClosing: + self._smartPop(name) + tag = Tag(name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or name in self.SELF_CLOSING_TAGS: + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + + def unknown_endtag(self, name): + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def handle_pi(self, text): + "Propagate processing instructions right through." + self.handle_data("" % text) + + def handle_comment(self, text): + "Propagate comments right through." + self.handle_data("" % text) + + def handle_charref(self, ref): + "Propagate char refs right through." + self.handle_data('&#%s;' % ref) + + def handle_entityref(self, ref): + "Propagate entity refs right through." + self.handle_data('&%s;' % ref) + + def handle_decl(self, data): + "Propagate DOCTYPEs and the like right through." + self.handle_data('' % data) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as regular data.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + self.handle_data(self.rawdata[i+9:k]) + j = k+3 + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

    tag should implicitly close the previous

    tag. + +

    Para1

    Para2 + should be transformed into: +

    Para1

    Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

    tag should _not_ implicitly close the previous +
    tag. + + Alice said:
    Bob said:
    Blah + should NOT be transformed into: + Alice said:
    Bob said:
    Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
    , + but not close a tag in another table. + +
    BlahBlah + should be transformed into: +
    BlahBlah + but, + Blah
    Blah + should NOT be transformed into + Blah
    Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup before writing your own + subclass.""" + + SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base']) + + QUOTE_TAGS = {'script': None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center'] + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + } + + NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close (eg.) a 'b' + tag than to actually use nested 'b' tags, and the BeautifulSoup + class handles the common case. This class handles the + not-co-common case: where you can't believe someone wrote what + they did, but it's valid HTML and BeautifulSoup screwed up by + assuming it wouldn't be. + + If this doesn't do what you need, try subclassing this class or + BeautifulSoup, and providing your own list of NESTABLE_TAGS.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big'] + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class BeautifulSOAP(BeautifulStoneSoup): + """This class will push a tag with only a single string child into + the tag's parent as an attribute. The attribute's name is the tag + name, and the value is the string child. An example should give + the flavor of the change: + + baz + => + baz + + You can then access fooTag['bar'] instead of fooTag.barTag.string. + + This is, of course, useful for scraping structures that tend to + use subelements instead of attributes, such as SOAP messages. Note + that it modifies its input, so don't print the modified version + out. + + I'm not sure how many people really want to use this class; let me + know if you do. Mainly I like the name.""" + + def popTag(self): + if len(self.tagStack) > 1: + tag = self.tagStack[-1] + parent = self.tagStack[-2] + parent._getAttrMap() + if (isinstance(tag, Tag) and len(tag.contents) == 1 and + isinstance(tag.contents[0], NavigableText) and + not parent.attrMap.has_key(tag.name)): + parent[tag.name] = tag.contents[0] + BeautifulStoneSoup.popTag(self) + +#Enterprise class names! It has come to our attention that some people +#think the names of the Beautiful Soup parser classes are too silly +#and "unprofessional" for use in enterprise screen-scraping. We feel +#your pain! For such-minded folk, the Beautiful Soup Consortium And +#All-Night Kosher Bakery recommends renaming this file to +#"RobustParser.py" (or, in cases of extreme enterprisitude, +#"RobustParserBeanInterface.class") and using the following +#enterprise-friendly class aliases: +class RobustXMLParser(BeautifulStoneSoup): + pass +class RobustHTMLParser(BeautifulSoup): + pass +class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): + pass +class SimplifyingSOAPParser(BeautifulSOAP): + pass + +### + + +#By default, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulStoneSoup(sys.stdin.read()) + print soup.prettify() diff --git a/BruteXSS/mechanize/_beautifulsoup.pyc b/BruteXSS/mechanize/_beautifulsoup.pyc new file mode 100644 index 0000000000000000000000000000000000000000..337200ed45a3457341b3ff24b7064e8b502d968c GIT binary patch literal 41319 zcmdU&e{dbwb>H^^AV`3qNRc8%O0u-1L;<8pP?8nemT5{PNJ)$-kY2!6M45hhc)I`| z`S2dF`#=(k)Yzud#A$vwiIX^O(zH>UnY2mTw8>W^&LnP=Cev}8CTW{YXWC9P z)5(k{`Kzt_`JS`8?}0yL>QPRoB=)|$ckkXG=bn4cx#ygFckruQM&lp*{7dy9|2N3* zb6nY`fgp&3-~wSKhzEjZC0H5=E(`>fcrdsy7{o)tg`ptc5M0<0#KXab;UL}^T-a#2 zHwDd2!O}=@VI)`@4K9rG4rPo4&CS7u&6b1sSa4yih;IrmYzgAgAh{=)C+Fti%^?j}HAH-z$a(0%fyMuU_ZZ7Nz;`=S~KoIW^68=W4r<;vyjWn#a;&8UzYSc&f%`PV4Og&AK z)?t2X%Sk$(h0mvrq!sTQz4FxLlao)5E_OQ0&m1{&_3G8hdb+x_v^H69FCEF+^PQ`; zG&xe%p(>7yj+XaUXSiHTvm^^^>0G0e*3w3EEo`)|)S8Vr{Fax_9uC`S_~I;)X5(@a zW~*~qr`D<`qm5Rl9oE7QO$*axIZd*p)v0wF?bc*?rV}ow?JJF#a+i|MVmqdM`gDGc zINb^xaihJIjdm7mop7l}xs6Vk)UvgZGP9(X))y`RQaf(UlL!n{&*bRniSTODY#yF( zr%Q>o?}*wK#_js*67`2y8_j08)<~K$ZR^b%*E+S)tdp+RJF96j8J=i`WWBS3M(gdA z{w=p#aig^$_;lY?xnWPaXXeAT_G;Q~Q6mf6t>)Tjvt6$>dAFxghmV=WPvKCv49HyU9>!QjV*}IjlEpyt7#2N>^Kl+w+GRwZluoN<-9o zJ5COTbB6FX$5b6H$!DAwdSYPlPjpRylrjwPxzPUR-P3KlS;auaj8c1rjs7z~l zCv7ZefVc-NH1o!mpF45hfy7J{Onojkl2o(NsDlVWiP1ttpaRMosM;M6d|rehX(pP3 zj9VK@P@I&=E$ z^VO-d=Vs1Kzfhe$@xqKI{t+sgK0Pyg;^f)WmN+@u_!|EvDsYd^D8WL~O2B{SWOk6- zvq`&EOXDzV)ED{9n6?v5PfMT6l7;!&YO^!hP8SYj8^~_VBYTM0W2XSZGvQNDJn{6A zC!aX-Rq;C2ry2t|XM2^a>l=6sR47mlk#9=JtkvtuGE=11S}X8Z zLk$OoEwwV{UY0cHHADG#`dV#ixhc}7yW_`?hbnhsA5=TucPQLfQ;^SseFrA?sUT_z zm)dEv?~uJTIXS6!Lo)O5swOVj7%n6oW+YT;-+>UWqhc-lY`SW*I;?91ifc-686xNa z8pFP6$3dl8la$j&zTV6xhzAyA`-z=;<`Yw}@z%m8mJ-mi)p#xWM0KtZdot0Z<+aX6 za#pLY3V(U_?ZU-4}m2yQY#9IUz>kBqQ%W9P&6CtctPjTz;>9-|~PO@}&El~;8 z>glp2f`2hmB~0Qjw(!^o$-ohfaof?r!M|3MyJ$*Q0OffKbD+G#ridg(+X(Ki6%LRW z%2o(DimMBd3e!h}Hr*h|Yg8_SN?#4|dDBc<)#{5B)SWjfR#jQ$;uXx@1zVBaG=ph- zYV*1OW^AH@YE=bQs~;hL=lE%8@O-VAB?cMC4-HCR;1FUlV_p8ps@1b)TZ+NZ?=1$z zCoIo`cLfq!5LK%$6}3-nA!eKo8dqCNs#Rm()hY{|)uwKJ99hOxtC8Z7CViyWqOA(< zRj^&b4h1_E+^=9aK^IgOyK}M|iDVw-%61V9j|>ltR7Q09|3UqB)RObaS*KmRt8;7r zG=EQWWzPZA1ZGo4dO*)Pa#PlZ@c1d@l6qFyPJGp?qW1FR*?=Ypw z1@s-;Ed~9CX-h6VB-ukFngPXNDo0+Vha_y(t~3^GQO&BV(bBTl?rcqywU=z+W{W`O zUS^bxjZY-3w2Pwfd>Q7K-HX06U-#YLXyO3Jqw0TgKnlt z)B2dPOzK=k@xfDMlzep|Y03I1p{8xUEY=;9CLK>tX#N=MN2X^zV)Krr#+62UHM4nc z?6?ISI+|O>qZt;$h$)8edRW|VB9ADFmNwRMr%~@AIeM6&JNxc@;u+P0d95zbQbH#F z6WDC?nYXjDqf*!0)TMbg#g*+LaHuO}toUPm27~KEz*K}SV?=@|7bOML`9{hzL(Iv9 zQFJXG)-a|@2p1YxfNeb$)o#?*!L_!$j1mSZb)IJq{#g(y*9;WOmPwcKC1h2tqRt4> z`GhK_f!~B0Z?JNBL-(xPl43GT5-RouJq?M}BdX?GiDyFNV6ZMw=~9ynWDgO^K~@0k zbwf$OcYQ+vwU}H5)DZRzUim850A}cUGq^$rQ;Cgp3Gg!Anr;h`tNeUMSXa z6KcKV*W-$fD-ehba!r;ceE}@mPZ9H61)%!Dt#4CXdT^`WY@;`19pUpvizS<=>bWiB zdyuV!(QTo4XUG;kCRm6WmS~nw)2xeb(R$GJg<&EL1%~A{Tqdc2U!RD!LJ_?n%t2=g zlv}{Ai+3gbdZ%lSe|I!Xqk~l4xsRZSw0(WFiw^NdKLCsl^P;+2K&&5(6U2MLb(hpE zpPwb5Z%NJ^!!$7s!_Mb?OHW!dm8q2U}VUoSy=GSZzLu? zK|P{9cY{grfOVMkZqu%JfLapt)}hunhJb0A(2HVuEuZcIWS^Lp-T=#Yhlt%71P$@D z0KM`dx=YEUbDY zRiV&O@WycPI?F3i2<5g=$W~IXOMV^X{pp_Z)&Sag+bYPVZ-Lsaus29X(fgJA0|fmH zwsFdz*6l!W^o;?{hXrO2z1$F7C$cfP z_DBUROwU!;wgszlOjqlWaa zghbf`6@WRGea_HnqaJgw$6pO7{*BS#+G#c8(k4}V>f-x@PYeWaY!2QSQ`So(!Rwo4 z!hd5+@jj6;>%dzwZSem2mA3-^UVfeS{Doek9V>qktb8?Cd7#pIOpQos1jy{QvsR;a zQamSl0Lw^_KB=L-v{`W~tcYNot#oJxy#Z1~!Q~A~IlpqYvhrlW-^)CiF`*>W_?=;$ zh6#uHfJG^+q2JF9LuRyORci_Z2uW+9cVRPW%L*+IGmDw&Y}&O|!cOeETCLe)O{Rh9 z5fW|HGSg`X^#+qMr3wqJde=IO4BG991x%D#$NRmQ z)Gzln2Si4GhAR^Yf`P4-Z3DxmJR7WRgWPRkR&3?kW7pQoc78W;>34f&jBrb3cV%KC;}OdFcU<3yHkw+;Sf>CG^$kbK1z-LKwK1gJcLR>! zL^*%_4QiuwWbZ~BKSViyc2gTgN5rIcP3h?%lR$h18qb9JxM)0abeIs`G48LuPxq%T zzE_f(9uhi5e$*&Dk{XhSWltX!Kbcv1GjH3OYdI;A!h@fv1&%TK>ksc;fzEJQtj z1UZM@{R0t4eLo&**Z7|Rp$S|7Qe4-8JM0h<*oFo(E|z|R3JtXJ*q&o zJUT$&3Qg%yrD-%RS~RJ2DQry}q4azWVk(d5NxiEjtHIXt6s!E`X(iuRyjmm|uWOwV zeNb68msw0CjU+spv}IZxqt21YUnKexX#z)@AvShh2wLnN5dDWJ;yhQTW)2Sw4{ROV zJ~TYAb3-0(*vXyZBbBk?t(%B%9N9SX=m=&b{~vvn7wcU1PYg+jmPt>L6y~8fi44fz zp~rQRSY1P0HiM-#0*et7-rz*p|2mzmE{DokvS%rD~llu#z*BvuClW0j!+ zU%_<5Wy<2Z#*);cV44Sz)5}T%* zBe7$;ITD|4H%DT`GzV%KC9!N`!Q>XZB<;b7>5s5Z@(7P^jF=cGd4zK|ah~31k1%CE zB=tg`h;uhKQ(Uzl2rgjL?0v*5czfg0ZZ6vw@X~glZyd9CTbJ?0Hiz#Ep`5+d9Xzhh z+55siQ3=uMOpAu7@G!Gd*A&3%8DJJH?)?WjHufH{=Fq<93S1 zL2D^rdmC#R*X=$9k18nFL5qzTr`cQDfQ(`FZEe(S#XSQ9Yqfg6uvkm1K!n^R`w`YE zrqmLAN*JkGbdV*-b)&t4m&qTwt?{!w?^Vf|FOTc1l5iZpKi(aXeEEq(Z4Dn#oRDR` z)jt_2{YAG7$<=op|6h43j)x67H{*DkWUX zyCz!`GH92{CV3rQI`s}nvWneX5QAW-WMd+rQ(Ok#5_G_APY$vb9s+?^X~f0S<*y3@ z)AhYQ?7CeC!8?m-`>Jpq&uh6AvJ`iau|=TOmNPBBwv+DAGEdb#>l!LrqT7f;F)fAe&Xdil17n;W9y!G}VvfUPxSOb zP%kdSJVD-SvVRx@&!bQl3Xom(6g^fJ)P4&5%?E669!qn7b1N z%v$Z%Vd-2$oZEABEd;GD&{R^n!SDeVlpi!Lza^oo+e4N;41zeyQ5%f`ZEf;b zczz6-lj2s6>hERgDSt43C@rB;S+Y~MX$Sp9Bf>m8hV+md4y2X9f2JetUP)B&DRH#C z6KY%%6JDQ$p(AKbXnI8=7DgpSU`0rcMG0CdJ0p_F(Al`ybISfaK@ZmwZ84Y1=&WLW zYfr>YKco>81qcR5S=S9$9;w`qbBtnpDn@t*^^D8LFeY;CVm^$gE=B>`Wk#}RKu`fN zPW(AH%Yn>25C?(va8TY#z?un5)5oZ z2G{|NF>h(|38I=A=eROaosy(Gf?n6!OBu7DAef@BLc^L(a&vb_N?UE!oh3O&W;g(8 z?}pR~$_%+I5zq{x69@9mVw2(Ng-I1PFcJVUnL*Yz;;EZ=DQWn%hy_BV&^IjTQ%%Kxm@k8qbkE^YN6aqdiLJ6 z(>O^d!4i#Es3A)u`uofHpWz(r$|BA^Hlg}Y$a+crp(Up4ROVV3@6RQ zvu_q+6t!ASR;tx+6&f0;8*ZW+e5CRKDndg=O$}X|ESXji;0%M#Bpl00+yxSEKfu6O z!ZzEuTI{jJfz=%0*gMG(;XKKLrzCTXo9}Im1ygU>q|Yq1z-iGcv3_P--q{xJB*TfgEibiY6Q@rbP+=~ut>ydn@~PhUO4aI>vR%F$7Ms-!H_Z4G9>RPP+`M(^RB#Qm zt+n*j#YfF_jHw=_0P+I_8sdZ$u|bmGE&yFw*@N}kRGApMA^KRk^|FZ_&~b=7=`GYc zE?f=}05(<75EYiG_{;pI> zD3OTYw+qWPP4Pm~Kb6%Pt9R>XtbeGQr~-2Y)NWISH~5oW3gxW?_wzt+N>@_BIxZ4C zss|>j+CBOy6%3dtV>p0Wy0*mUV}OkcN*y^%O$Bd;g48Vd@?(0F?yw0?3DG}RD^`t49gQbWIqewjCQJb+|`;1E`h){rddVi zjb%Oz7MsPPK^hR`Bi=dFV*9YRJRAG6{5E75|ER+hy=&;!c8d=`xz6j_w%3c$p z`eq`*CfVk9RUT$NG%Rf%RxbT2b({I)+$#+qJ_-82i)&luJ*=L#^ScGB*iH+#6DwI^ zDU}IOn#qJl&gd27Bqztwn>=+9Q4Mm|-Q{6|bU(c-_G+-IonQOe8_2N)+mamrIX6y@ zuYFm~JazGRvB(((a`b`d#;kXQq<95&o zf^sdVxNHW8XfN0i0?9Q|=}Zb-w>|dAjJ9Bw{hd14zTM%tl&)ZA%LSM0#WXl;1ug== z%GJMRlYGp>@!Sz>{7{H1=%kUOa%JJ;BZP=Df*z%V1}*x2rEz(1=c_=g#9$`}gMX!A z6rl(Nu=#DEG$K?2uh=)lX%OX6DR$tEpROdmgabSYRi=~> z98poeUWzg9IB}KpX{-|Eg-l9=(4&KxMZ7Ha&$SS1*I~b1u5q}P>E^IO@l_+FkQ1O)P?TMx+U$+n`8s8P?goY#uT8S|?$>t#2C%j?-{>ma?V6Df?BP6zeGKcc~Kt5H-%7YR(^) zL~X272)9l%qVG4{tiJ=k62{(Hm6tKLu~&jKifc zuG=XtGl4jyoz~j2Sn3<)$m9oh!knmYPz)Bjs8WxZ6IJ0KehGTn)-G zdEWl3vLmkE5irYDg+y5R27M8iz&Y}yl!V+?@pK+HY?4K7kLgqJy@wj|SFo64EBj1~ z;t6^0KS_~ieDgSm%Lu(hQ*OUfXU*NEXI()Hm8hhqT9YH8Vx56!+tDd8o-|90&s5R$ z{3sT;&xo7!h)N6G_I;j+nSK73z~|g#t1a#jX*KgSIs#*zEBhe9wL^j=VAR?L2-yM! zke<4@U0$f31Yh2RvpC8pRzaU(Atw`oiptL*QHidCeL=B(3QiC>ZWuCrM7Lb;RGNr> zhGHBYxup4XZ z1K3@+R;DK&RZ~hwKlhz|Mrj(-C{b`)!Bz!36-bldr*4Z4sf8hnU_P55IgN0`i81aNW!|MW_5ZNK8D zE^fAv_fcjrq{Y8U$WeJN5g5V2feME#++g8w&;zN5B-`RLOKXy3K(HI}v zUEJX;yPF^UWFO$id}5Erc0{q=J>rQWKYMi+@zMBE9X@vAczn0 z^WGpn#LwX%p5*695I@1slR^9xKkp0Tr}_D&AU?{^`-Auc{7~Nq`FSRYKg7?mApUT0 zl;f5X+tC;w7aGUU25$x;AbH|*mUv=);z>)K$`coGU&bBzl%2cuN|11@5|B6@yh%DP z$j>XAT##R|2rkGkS_Jr;^%0BUe07L}|t%bavJvxEg=VBEb3M7QxB*WsBfs{4Ev<sKH?4(+45*!$Kt< zYd4_HsyTDu%?&bIJ(#lyy4ILugE`qaKb_Vll$`@A!;LMzFJo{wyNr>^+8oo)7J73D z3*-hf+}$(_XILv?zTIrLuQJu`kVi`@m+1Jsn$;n!Sut{KQIW;Esqlh9me;|NA-YRD zlEukI$l3_v-+;F4J2u~LA3ru%OOGErqUb)(q3o>kz7C$O5NNqV2YaGS2bbc$hOSL#T%z;x&we&JS#X-Nh{{`lh<@Se6)BdRl z1`$~F(`ImcJeylt%^!ZobI-Th^Pc;D8KI<>c@VMRb$KP%2_3>t&`)~RC=QyBz!83f1z5lGe z|GgH!VDay>@cS+NoWIq~$`1U1r=;DKAI$S~JNZL-o^HzL9WrwqQ|gufuxCLhkS%)3 zk9f*bH|0k?g;P%B#PfV1Pf0-nPx&$LIGnfa_}})9FXSEnJN`}^m$|&-KW<(73G4D} z7XQ26yjND2^5*@d`H@ToY zHd3n+Bw<847Q1}0Z1q1@JFL|-S*-@T>@PuhOrNsltwDQ?8$HCp*}Fl7#VjWW*ua2A71R%8s^aQl zNmY!Byo1*TOFH&kA`8R%B>sNJR<^iQ*p6~3ppkCBV7ZH}sOHmw`SKrG>J}@5jEs72 zi1{^!zN>hF2SR#t*R=7ptqlmZg>jQL=W!U2;#bEW%NT|no3pymc&>BX9_^mo*h1%^ zwJYsLd>%5Mr03H%cCe-FnJ`;ef&|Vvizp7rUuxrgKUeFX4=5cX8e@2PS6;H z^w|fU)F_w4kdGGNsp2mE=<;j6J?l`IwGp2ZSTUb(C>z2eW*J^8gs_uewW_(rD4Ubw zA_v3AZEgQC@7-R+0faudM8;XzqL$Nd7!6^z2V`*Q+yHvub0DzDb0*ht%RdB5rhh# zC5>zk(jMqG;+6cI{$l)VVQxt&>^u|E>B0mq6>d`vb$fib6&#z%3=`I9Om3Klq-J~t zWzJ_k4=wdx}@mbxmlgQU!6Yj(rH@~n?>e4 z{Ni*}ed)x^%!wCHn^(~-P&g95x4rI2Kppn2N^ch{O&nd0Ssh<*|Gd(~Nku6w0@5Wn z8h8JRa>-VdE2m|pJ(HbAK<2|LP_Ta1-qzD=Vpb1RaPYv6ik95k7`R2QL)t>m26{OT z4R@V{O5P>SoNH-IF$NEp?AapGv?j2eb&2l5?!GYRq}+0DO*wETb90Bwm?~Fq!ht$s zgX=N@BB@rj!ATN@c&8t0#HGA5_3}cQBrIOg8)#?qH1bPX9A*ZoqPb z5>(Cv?7q&ak5uX}sJoKx0+4u@b_+@JTPK4*O@eptI!~@WYBvQoB0CE8j#wfXOAhnO zDvfTRNNgmL|r(-ei(@Wa9u6fB`(Z$d1A@b#d!6(AIwgU zBBp2qefUNP>t8BmBY;kD11F=$xs85-z;F&*0*NketLJ^8$}jQaF9~)sIRt~F=>Bhp zoXk(9$l4=!;#*}P<=p375=E5?GGRfnjTpTcp?T{_@K06RW`ewD>Z$vo{hFSrD}c<1 zr)wQov&FmbsA~Ne>Zo8E0msv)?Ns>t#B$ zYY)RBVf&T!sh$NPU1B%IFx6J`2*S0Uo9%}LS{3;U%a&F~Py8b=f=cw83Vu|<3kn1- zH{qRB?4t^*1YKg%zh(E=Dd&p<*F8kA4-LsCw1-2hIjWlE_1jRNBO&kELwQ6L!xmz; zte|I3L_A4(!5!_JWc32|?^-GCi#Hkz@7?o?soU>HpT0;rzo$M4nA#7&qcBm59F_?1 zZ@HmujFZ}FEtY+i1=Fu9q~%}*hHOThTZ}U42Nf4xBoW7cKN5%trc11hRsyKPw2(XK zy!Kjl7j<5~`Mk*|lp5Wm9zT1n3c2U7xxZ)R*Wy)N_h}g~06O2Vh+q6za0wSwW4{7; zJ{w_~f~gg0$CYAu;1wKp_Kpj0aq+G*fN&TD0u3O=5>o^mgkkOX{s#jx-0+3Lm_E3wV-sRaq|uv)hCo%eWIa;|;$+ zubmAv6*CHF1n+U(iXQbi94n3O9TJVZT`_LaT3r(COqka)EHc2rKl3tsKrHNBhS?#U1P~Ly{iYC6YpxKrfZ(xU=HK1WYUFA`}2r0oM>aKocRM z@fRl-UEg-|ti1Nm#q#N&vT=MR6T9NL3PuT6zff#2f`n|u|AC)PY^?ka{fMAA$NCXr z;8@BJju#t(^WbLLhV&PUyk=<5{;+t&sm?e*WAD9g}v(-iwUHJ^+0p)cuuObw>D zqzx;Ju_G%R^|AqZ)mC&Zx2xPHPg%yztuzpP-PZN!Bi4UX%rpbXmXAN*ZXcUFezKMl z57``}57Y_ffPQ=#Uvc?Gjn9o2h9zFhU+T!e@-{Epv*P9D+rE6Plisd?^zGihqxP1d z3y(D$$B(TxkLx!)sPN#8JzZ_)^=YWx*?+v*Sl4jcdd&N*W>XH~P381gp2OTq z7!CDJQ3cuV`Y|o{BPUp4)eQSy=X`tlYog6o%nlv5DxpEtc}wLzcxvl_Kpn}tbE8Cv zAyS7cCSaS2UW`|lNe?3>xlW%oQP5m*VBl{W!GwoH`b5Bu9>VA)YS^hCyRc%AA$m~Af!ERgBYBX*p}{|8j^Y+a?i1;@M^zUW zH|tiFL*cXEv9icYrHH40WJPe4F@lud^9F6PXB&(%ll7jWF_2|ytZr2m>WRrDEtKe<7$LboVd5_kfh~DA@HW_$+h9!G4)eW+KFE3>kthfzpNKRDCMd4;g8*hP zdYbZNfT3-+j&@XfA1NU2sbr;58Q9kvtOS<~OEhA$Es8PuP=;sA!p;S(Za6_>E*);`>tPntxzGxl!~y}=pVwZWidKP`?7Bx<9Y5_DTdl9M(W zc73pYs``MIrwl#88CSB+w(cP-1u18E7xMGgLm4iz3X}Ka7n`leg-TsKVp7>rnKcc-mKQG@OmCJ;uUQN3t2MQnQ5K}cHrs{-MSQGMM8J(=Y_qwu!801erjyZ25Y;dg>vPw z*a<}sU}yLcpMcnCpMdyXeFDM@=lLVKXhhMu?4EGo-bvX0ykPoadc7bz=5JDarSkIW2p>p;scrWJ3fbb1L$xz4ofP z^<2Do+!&c1(v%;$W&*m@9#m-Z2kP})=V2sV)XlaYnW?whQ*Fcu_8!0X5#vEzi6UOKa(N)FBG7r+V)&+aeM&TLBjNds5b_YDz@1F``8;Gx_aZ@fFD-cZ!7qc zfm|7_(=%-sd^pCJM|rCLLZR`$Qz&y-8lt7Zo#E?Y9&& zS?xb4_D2c~bbq4Qe^l_F75u4!KU1KNbs=K0ci@Nq)4csO!;*c9$nZ9{0kD4H|Ilb< z<6Zn8WrKhp%*yIyP^FF?wCw~sZTlXs9b-F3o*27-WX!f2Xv+crqtDZbI+y*^96!mG z9R?48U+Ds^@3J10OfIda%~AJtc2{RSofquHdJZaPb*Y>uVuyF zLvmW~!zAOxi~DXE;3>Vp#$q{3@3WkbmN7i1<(X{G)9yP>PQDK7eZ|8WPwIqQ zJ|&9^olnn>=@BhR%qcogr_o#F9{gT`?Z2ZsHfwV3SK*Gmwhk}cby1PpR$O!PmGegK zX>kzm8@78Q8RiFT;oW+pV@0lEVaF@3{z-~HtAm~kVl$T@S>>c9w-u6_wJ|cwV@bX+ zask27*WFLC5xQ%|L7aW5RGwAHWgJDtOGPUt8u-AB;}g+X?r@sk5`GZ5zq7Xzw~f-7 zMWsvG1F4g9Fx@XdE?YRT8Pl?N3jGQFFgO){S86IPakY^Ef!nk>JcYh*Sl&N)bGb5G zP1Xx!J7iw8^;&UGO|6N~2D)f%=8-$FxTU`-XJ=fG8}){~ph^&+b1U_Gd{(*Z{PL0% z;x;6djw=x@Rz?AxVw=s$@OcdVg^Jt{XlvK8My^*|*QEARIYrMKOGP8xQD(i8Fwfl8 zzL9VqKdvY9-=|6@(nSb%c(*dSF3~pa12L~~sp-mhk1QWA-_Hx^VjSJ;?OZz7pun^t zSFyM0VO|WHQhGSoY}YTFx5~P%a)@fD8oq6Z^I~o+q1*oxjF=9>YsAm2tLJ2U?pCSI zT91#FDXlu1R)(to(JK)ogCo;z$bU z@+df_!uoLl;8G-ZajGcd?J=u(r#$O2KVL9cP8OS5WuDP=Hd>;=E#5O%%%WB3=elFm zquvxm^oBvkRK)~$f~s8&sTFaAGHxn|rWLKUW_e|%PQEE`UgvEFKZn>&rL3#gGM!=* zI^Q~G@!Iv|M+h(XX%wTHig*~M)mCC0BcZ0NY2BSQ7|zlO(i}#G4=2}cIXBlKBYNx8 z!4^itn$nRl=`As2vU3V*h6nhOXxqX<64qAP@PPx8^3oo+UMQzSu9L2XXQpstJBf2T zu9s&z+4}nOZz^8Ju~Su9!6=EW0mXLsNfX27Qu|pT+Kd2}+AYdObP4PI%f*sJS5_Mx%LApb{@d^->E(%T_*o#*!NUP6;`7m4)BxNjrfBF=V2Sp-8i zuCZl}ZRhuDXCNE;kU`lTm~1fd5WjjPL~b#uB2jLXc&J*l4Iqo;t@X3b9R``>I8qCD zWx4c^(PWn!%gdU9I`-aXpHx1u26OsN(zJ~U(sTgpT*M~B7u#30`$=wO%&>-Rl!!jI zAIXZzUxg&ex0|VbBYQ#6lQbwNMjFOMU67)ya0< zT<5vqFFt|i`ds5R>30rT>wDS=9kfQ>3M%H)d`scM?QC;K`OJ;3;RLdjaZT>UKc23{ zW`xupJS&erwJw{;l@rRVDL%H=hJC$V+vi^5aU+)O=e$rvJ2&yojjM8ads46Ai*N7q zI89_O*13wc=3%x_V-h7Q|8cCMb8T*IP;9H8T>QwLA01&OD92>@vUZb6S}i2@EF2e< zb&czRJA2#C0Cy8d0LRkmjWC%QsuI@11>`~@REGl_8E#aRNxsO)*9j6{p;Y$pYFke4 z`Js^#3GG7>JkpDD+eaFv5aS1>Snt~0RX!MDYY968I4`0&xTY53OECvF@p2(NNNkty zHt0T=u#+*DB+dvPOREU(OKrN8T(JJgs+fc>@Uytd;fir3~4F*ZUxYjt+otu48f&>`*u z1}ixiA+*ib8Tow6elO3tKRgJzYzbFbl%m!M=AwJ}@x!IpbJfm49-paBotU1jo;-c_ z%;}HA`CdHzN_4u*^LqK}fv=yvhef_cZd13IQmcPHc^|F+${_4S%I4t*h6i`!YKuD? zTyRha)b?(2KX>AModo-*z8m8cAHRX|y=hzA;Zu@SvVTKpl-vj0Xw5RdvrI89I81tN zw^--b!u+rS`TKz-cHES$Kb;UkG4N~cKrtLzKbF}V0=egQz(AaXr zv%MUC__+Nl7h9Cl5AP$_KHnF8V}8r{O1@czdgyIkUfIwr`Fy=P&imsRO=XZ%U4Jl1 zI?pG)NBK0hG0;YACJ>q%bRT#sb+;%p>uV++lq6VyLo?CA#0c%PwutO8k=a`2rm7x# zUjm*QGLc+{X9kR6Q-W;=V$nHWFh(mETC&uR8_3U+C-O~U=Bw^Iot8lYY6rS`wzxPj z#G1m^ih}-(FWLGsgKdPIsq34gmXo%|PK*=WDs(>TK$rS_i0|gm3LjOe*Fq!<(bX8< z^%8sWdGn0RL~4iat61l3FeL<+F1X|7OS3j3`d!8^?XBD>fn>Kwj=E-{K;5}Vh78<} z-AoNJ!Dzr51-k;Ts2uDVzU{=l<4CkTBx21doVh8TGWaMpP8qylCt8oGPJ3G!dG1e+ zt-K|t2^KMzaL4HJfY)jx|7^GXn;ZW}0cnAs!TJKR=rOF2}}4e0+fa0{lxI?;3`uuRIBr=?EmDu2@&b;%u}uUnxa}Y zKOMde;3p<=?8I6>|EfAA8s(Xy zzf>ws4)6$w0M zs56l&Tl4R~XC0)z=;wLURX6OFj=q<>zNe4@>lj=FjoNXQ!Y&C-C4D75|Br*T{P@QP scJ1Ceu;Z6@jqDlRGq~$(!3gJL?b!3R;LNU9ca8IZ!2WMk-kp{I3pj~wU;qFB literal 0 HcmV?d00001 diff --git a/BruteXSS/mechanize/_clientcookie.py b/BruteXSS/mechanize/_clientcookie.py new file mode 100644 index 0000000..2ed4c87 --- /dev/null +++ b/BruteXSS/mechanize/_clientcookie.py @@ -0,0 +1,1725 @@ +"""HTTP cookie handling for web clients. + +This module originally developed from my port of Gisle Aas' Perl module +HTTP::Cookies, from the libwww-perl library. + +Docstrings, comments and debug strings in this code refer to the +attributes of the HTTP cookie system as cookie-attributes, to distinguish +them clearly from Python attributes. + + CookieJar____ + / \ \ + FileCookieJar \ \ + / | \ \ \ + MozillaCookieJar | LWPCookieJar \ \ + | | \ + | ---MSIEBase | \ + | / | | \ + | / MSIEDBCookieJar BSDDBCookieJar + |/ + MSIECookieJar + +Comments to John J Lee . + + +Copyright 2002-2006 John J Lee +Copyright 1997-1999 Gisle Aas (original libwww-perl code) +Copyright 2002-2003 Johnny Lee (original MSIE Perl code) + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +import sys, re, copy, time, urllib, types, logging +try: + import threading + _threading = threading; del threading +except ImportError: + import dummy_threading + _threading = dummy_threading; del dummy_threading + +MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " + "instance initialised with one)") +DEFAULT_HTTP_PORT = "80" + +from _headersutil import split_header_words, parse_ns_headers +from _util import isstringlike +import _rfc3986 + +debug = logging.getLogger("mechanize.cookies").debug + + +def reraise_unmasked_exceptions(unmasked=()): + # There are a few catch-all except: statements in this module, for + # catching input that's bad in unexpected ways. + # This function re-raises some exceptions we don't want to trap. + import mechanize, warnings + if not mechanize.USE_BARE_EXCEPT: + raise + unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError) + etype = sys.exc_info()[0] + if issubclass(etype, unmasked): + raise + # swallowed an exception + import traceback, StringIO + f = StringIO.StringIO() + traceback.print_exc(None, f) + msg = f.getvalue() + warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2) + + +IPV4_RE = re.compile(r"\.\d+$") +def is_HDN(text): + """Return True if text is a host domain name.""" + # XXX + # This may well be wrong. Which RFC is HDN defined in, if any (for + # the purposes of RFC 2965)? + # For the current implementation, what about IPv6? Remember to look + # at other uses of IPV4_RE also, if change this. + return not (IPV4_RE.search(text) or + text == "" or + text[0] == "." or text[-1] == ".") + +def domain_match(A, B): + """Return True if domain A domain-matches domain B, according to RFC 2965. + + A and B may be host domain names or IP addresses. + + RFC 2965, section 1: + + Host names can be specified either as an IP address or a HDN string. + Sometimes we compare one host name with another. (Such comparisons SHALL + be case-insensitive.) Host A's name domain-matches host B's if + + * their host name strings string-compare equal; or + + * A is a HDN string and has the form NB, where N is a non-empty + name string, B has the form .B', and B' is a HDN string. (So, + x.y.com domain-matches .Y.com but not Y.com.) + + Note that domain-match is not a commutative operation: a.b.c.com + domain-matches .c.com, but not the reverse. + + """ + # Note that, if A or B are IP addresses, the only relevant part of the + # definition of the domain-match algorithm is the direct string-compare. + A = A.lower() + B = B.lower() + if A == B: + return True + if not is_HDN(A): + return False + i = A.rfind(B) + has_form_nb = not (i == -1 or i == 0) + return ( + has_form_nb and + B.startswith(".") and + is_HDN(B[1:]) + ) + +def liberal_is_HDN(text): + """Return True if text is a sort-of-like a host domain name. + + For accepting/blocking domains. + + """ + return not IPV4_RE.search(text) + +def user_domain_match(A, B): + """For blocking/accepting domains. + + A and B may be host domain names or IP addresses. + + """ + A = A.lower() + B = B.lower() + if not (liberal_is_HDN(A) and liberal_is_HDN(B)): + if A == B: + # equal IP addresses + return True + return False + initial_dot = B.startswith(".") + if initial_dot and A.endswith(B): + return True + if not initial_dot and A == B: + return True + return False + +cut_port_re = re.compile(r":\d+$") +def request_host(request): + """Return request-host, as defined by RFC 2965. + + Variation from RFC: returned value is lowercased, for convenient + comparison. + + """ + url = request.get_full_url() + host = _rfc3986.urlsplit(url)[1] + if host is None: + host = request.get_header("Host", "") + # remove port, if present + return cut_port_re.sub("", host, 1) + +def request_host_lc(request): + return request_host(request).lower() + +def eff_request_host(request): + """Return a tuple (request-host, effective request-host name).""" + erhn = req_host = request_host(request) + if req_host.find(".") == -1 and not IPV4_RE.search(req_host): + erhn = req_host + ".local" + return req_host, erhn + +def eff_request_host_lc(request): + req_host, erhn = eff_request_host(request) + return req_host.lower(), erhn.lower() + +def effective_request_host(request): + """Return the effective request-host, as defined by RFC 2965.""" + return eff_request_host(request)[1] + +def request_path(request): + """Return path component of request-URI, as defined by RFC 2965.""" + url = request.get_full_url() + path = escape_path(_rfc3986.urlsplit(url)[2]) + if not path.startswith("/"): + path = "/" + path + return path + +def request_port(request): + host = request.get_host() + i = host.find(':') + if i >= 0: + port = host[i+1:] + try: + int(port) + except ValueError: + debug("nonnumeric port: '%s'", port) + return None + else: + port = DEFAULT_HTTP_PORT + return port + +def request_is_unverifiable(request): + try: + return request.is_unverifiable() + except AttributeError: + if hasattr(request, "unverifiable"): + return request.unverifiable + else: + raise + +# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't +# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). +HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" +ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") +def uppercase_escaped_char(match): + return "%%%s" % match.group(1).upper() +def escape_path(path): + """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" + # There's no knowing what character encoding was used to create URLs + # containing %-escapes, but since we have to pick one to escape invalid + # path characters, we pick UTF-8, as recommended in the HTML 4.0 + # specification: + # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 + # And here, kind of: draft-fielding-uri-rfc2396bis-03 + # (And in draft IRI specification: draft-duerst-iri-05) + # (And here, for new URI schemes: RFC 2718) + if isinstance(path, types.UnicodeType): + path = path.encode("utf-8") + path = urllib.quote(path, HTTP_PATH_SAFE) + path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) + return path + +def reach(h): + """Return reach of host h, as defined by RFC 2965, section 1. + + The reach R of a host name H is defined as follows: + + * If + + - H is the host domain name of a host; and, + + - H has the form A.B; and + + - A has no embedded (that is, interior) dots; and + + - B has at least one embedded dot, or B is the string "local". + then the reach of H is .B. + + * Otherwise, the reach of H is H. + + >>> reach("www.acme.com") + '.acme.com' + >>> reach("acme.com") + 'acme.com' + >>> reach("acme.local") + '.local' + + """ + i = h.find(".") + if i >= 0: + #a = h[:i] # this line is only here to show what a is + b = h[i+1:] + i = b.find(".") + if is_HDN(h) and (i >= 0 or b == "local"): + return "."+b + return h + +def is_third_party(request): + """ + + RFC 2965, section 3.3.6: + + An unverifiable transaction is to a third-party host if its request- + host U does not domain-match the reach R of the request-host O in the + origin transaction. + + """ + req_host = request_host_lc(request) + # the origin request's request-host was stuffed into request by + # _urllib2_support.AbstractHTTPHandler + return not domain_match(req_host, reach(request.origin_req_host)) + + +try: + all +except NameError: + # python 2.4 + def all(iterable): + for x in iterable: + if not x: + return False + return True + + +class Cookie: + """HTTP Cookie. + + This class represents both Netscape and RFC 2965 cookies. + + This is deliberately a very simple class. It just holds attributes. It's + possible to construct Cookie instances that don't comply with the cookie + standards. CookieJar.make_cookies is the factory function for Cookie + objects -- it deals with cookie parsing, supplying defaults, and + normalising to the representation used in this class. CookiePolicy is + responsible for checking them to see whether they should be accepted from + and returned to the server. + + version: integer; + name: string; + value: string (may be None); + port: string; None indicates no attribute was supplied (e.g. "Port", rather + than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list + string (e.g. "80,8080") + port_specified: boolean; true if a value was supplied with the Port + cookie-attribute + domain: string; + domain_specified: boolean; true if Domain was explicitly set + domain_initial_dot: boolean; true if Domain as set in HTTP header by server + started with a dot (yes, this really is necessary!) + path: string; + path_specified: boolean; true if Path was explicitly set + secure: boolean; true if should only be returned over secure connection + expires: integer; seconds since epoch (RFC 2965 cookies should calculate + this value from the Max-Age attribute) + discard: boolean, true if this is a session cookie; (if no expires value, + this should be true) + comment: string; + comment_url: string; + rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not + Set-Cookie2:) header, but had a version cookie-attribute of 1 + rest: mapping of other cookie-attributes + + Note that the port may be present in the headers, but unspecified ("Port" + rather than"Port=80", for example); if this is the case, port is None. + + """ + + + _attrs = ("version", "name", "value", + "port", "port_specified", + "domain", "domain_specified", "domain_initial_dot", + "path", "path_specified", + "secure", "expires", "discard", "comment", "comment_url", + "rfc2109", "_rest") + + def __init__(self, version, name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest, + rfc2109=False, + ): + + if version is not None: version = int(version) + if expires is not None: expires = int(expires) + if port is None and port_specified is True: + raise ValueError("if port is None, port_specified must be false") + + self.version = version + self.name = name + self.value = value + self.port = port + self.port_specified = port_specified + # normalise case, as per RFC 2965 section 3.3.3 + self.domain = domain.lower() + self.domain_specified = domain_specified + # Sigh. We need to know whether the domain given in the + # cookie-attribute had an initial dot, in order to follow RFC 2965 + # (as clarified in draft errata). Needed for the returned $Domain + # value. + self.domain_initial_dot = domain_initial_dot + self.path = path + self.path_specified = path_specified + self.secure = secure + self.expires = expires + self.discard = discard + self.comment = comment + self.comment_url = comment_url + self.rfc2109 = rfc2109 + + self._rest = copy.copy(rest) + + def has_nonstandard_attr(self, name): + return self._rest.has_key(name) + def get_nonstandard_attr(self, name, default=None): + return self._rest.get(name, default) + def set_nonstandard_attr(self, name, value): + self._rest[name] = value + def nonstandard_attr_keys(self): + return self._rest.keys() + + def is_expired(self, now=None): + if now is None: now = time.time() + return (self.expires is not None) and (self.expires <= now) + + def __eq__(self, other): + return all(getattr(self, a) == getattr(other, a) for a in self._attrs) + + def __ne__(self, other): + return not (self == other) + + def __str__(self): + if self.port is None: p = "" + else: p = ":"+self.port + limit = self.domain + p + self.path + if self.value is not None: + namevalue = "%s=%s" % (self.name, self.value) + else: + namevalue = self.name + return "" % (namevalue, limit) + + def __repr__(self): + args = [] + for name in ["version", "name", "value", + "port", "port_specified", + "domain", "domain_specified", "domain_initial_dot", + "path", "path_specified", + "secure", "expires", "discard", "comment", "comment_url", + ]: + attr = getattr(self, name) + args.append("%s=%s" % (name, repr(attr))) + args.append("rest=%s" % repr(self._rest)) + args.append("rfc2109=%s" % repr(self.rfc2109)) + return "Cookie(%s)" % ", ".join(args) + + +class CookiePolicy: + """Defines which cookies get accepted from and returned to server. + + May also modify cookies. + + The subclass DefaultCookiePolicy defines the standard rules for Netscape + and RFC 2965 cookies -- override that if you want a customised policy. + + As well as implementing set_ok and return_ok, implementations of this + interface must also supply the following attributes, indicating which + protocols should be used, and how. These can be read and set at any time, + though whether that makes complete sense from the protocol point of view is + doubtful. + + Public attributes: + + netscape: implement netscape protocol + rfc2965: implement RFC 2965 protocol + rfc2109_as_netscape: + WARNING: This argument will change or go away if is not accepted into + the Python standard library in this form! + If true, treat RFC 2109 cookies as though they were Netscape cookies. The + default is for this attribute to be None, which means treat 2109 cookies + as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is, + by default), and as Netscape cookies otherwise. + hide_cookie2: don't add Cookie2 header to requests (the presence of + this header indicates to the server that we understand RFC 2965 + cookies) + + """ + def set_ok(self, cookie, request): + """Return true if (and only if) cookie should be accepted from server. + + Currently, pre-expired cookies never get this far -- the CookieJar + class deletes such cookies itself. + + cookie: mechanize.Cookie object + request: object implementing the interface defined by + CookieJar.extract_cookies.__doc__ + + """ + raise NotImplementedError() + + def return_ok(self, cookie, request): + """Return true if (and only if) cookie should be returned to server. + + cookie: mechanize.Cookie object + request: object implementing the interface defined by + CookieJar.add_cookie_header.__doc__ + + """ + raise NotImplementedError() + + def domain_return_ok(self, domain, request): + """Return false if cookies should not be returned, given cookie domain. + + This is here as an optimization, to remove the need for checking every + cookie with a particular domain (which may involve reading many files). + The default implementations of domain_return_ok and path_return_ok + (return True) leave all the work to return_ok. + + If domain_return_ok returns true for the cookie domain, path_return_ok + is called for the cookie path. Otherwise, path_return_ok and return_ok + are never called for that cookie domain. If path_return_ok returns + true, return_ok is called with the Cookie object itself for a full + check. Otherwise, return_ok is never called for that cookie path. + + Note that domain_return_ok is called for every *cookie* domain, not + just for the *request* domain. For example, the function might be + called with both ".acme.com" and "www.acme.com" if the request domain + is "www.acme.com". The same goes for path_return_ok. + + For argument documentation, see the docstring for return_ok. + + """ + return True + + def path_return_ok(self, path, request): + """Return false if cookies should not be returned, given cookie path. + + See the docstring for domain_return_ok. + + """ + return True + + +class DefaultCookiePolicy(CookiePolicy): + """Implements the standard rules for accepting and returning cookies. + + Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is + switched off by default. + + The easiest way to provide your own policy is to override this class and + call its methods in your overriden implementations before adding your own + additional checks. + + import mechanize + class MyCookiePolicy(mechanize.DefaultCookiePolicy): + def set_ok(self, cookie, request): + if not mechanize.DefaultCookiePolicy.set_ok( + self, cookie, request): + return False + if i_dont_want_to_store_this_cookie(): + return False + return True + + In addition to the features required to implement the CookiePolicy + interface, this class allows you to block and allow domains from setting + and receiving cookies. There are also some strictness switches that allow + you to tighten up the rather loose Netscape protocol rules a little bit (at + the cost of blocking some benign cookies). + + A domain blacklist and whitelist is provided (both off by default). Only + domains not in the blacklist and present in the whitelist (if the whitelist + is active) participate in cookie setting and returning. Use the + blocked_domains constructor argument, and blocked_domains and + set_blocked_domains methods (and the corresponding argument and methods for + allowed_domains). If you set a whitelist, you can turn it off again by + setting it to None. + + Domains in block or allow lists that do not start with a dot must + string-compare equal. For example, "acme.com" matches a blacklist entry of + "acme.com", but "www.acme.com" does not. Domains that do start with a dot + are matched by more specific domains too. For example, both "www.acme.com" + and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does + not). IP addresses are an exception, and must match exactly. For example, + if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is + blocked, but 193.168.1.2 is not. + + Additional Public Attributes: + + General strictness switches + + strict_domain: don't allow sites to set two-component domains with + country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc. + This is far from perfect and isn't guaranteed to work! + + RFC 2965 protocol strictness switches + + strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable + transactions (usually, an unverifiable transaction is one resulting from + a redirect or an image hosted on another site); if this is false, cookies + are NEVER blocked on the basis of verifiability + + Netscape protocol strictness switches + + strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions + even to Netscape cookies + strict_ns_domain: flags indicating how strict to be with domain-matching + rules for Netscape cookies: + DomainStrictNoDots: when setting cookies, host prefix must not contain a + dot (e.g. www.foo.bar.com can't set a cookie for .bar.com, because + www.foo contains a dot) + DomainStrictNonDomain: cookies that did not explicitly specify a Domain + cookie-attribute can only be returned to a domain that string-compares + equal to the domain that set the cookie (e.g. rockets.acme.com won't + be returned cookies from acme.com that had no Domain cookie-attribute) + DomainRFC2965Match: when setting cookies, require a full RFC 2965 + domain-match + DomainLiberal and DomainStrict are the most useful combinations of the + above flags, for convenience + strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that + have names starting with '$' + strict_ns_set_path: don't allow setting cookies whose path doesn't + path-match request URI + + """ + + DomainStrictNoDots = 1 + DomainStrictNonDomain = 2 + DomainRFC2965Match = 4 + + DomainLiberal = 0 + DomainStrict = DomainStrictNoDots|DomainStrictNonDomain + + def __init__(self, + blocked_domains=None, allowed_domains=None, + netscape=True, rfc2965=False, + # WARNING: this argument will change or go away if is not + # accepted into the Python standard library in this form! + # default, ie. treat 2109 as netscape iff not rfc2965 + rfc2109_as_netscape=None, + hide_cookie2=False, + strict_domain=False, + strict_rfc2965_unverifiable=True, + strict_ns_unverifiable=False, + strict_ns_domain=DomainLiberal, + strict_ns_set_initial_dollar=False, + strict_ns_set_path=False, + ): + """ + Constructor arguments should be used as keyword arguments only. + + blocked_domains: sequence of domain names that we never accept cookies + from, nor return cookies to + allowed_domains: if not None, this is a sequence of the only domains + for which we accept and return cookies + + For other arguments, see CookiePolicy.__doc__ and + DefaultCookiePolicy.__doc__.. + + """ + self.netscape = netscape + self.rfc2965 = rfc2965 + self.rfc2109_as_netscape = rfc2109_as_netscape + self.hide_cookie2 = hide_cookie2 + self.strict_domain = strict_domain + self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable + self.strict_ns_unverifiable = strict_ns_unverifiable + self.strict_ns_domain = strict_ns_domain + self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar + self.strict_ns_set_path = strict_ns_set_path + + if blocked_domains is not None: + self._blocked_domains = tuple(blocked_domains) + else: + self._blocked_domains = () + + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def blocked_domains(self): + """Return the sequence of blocked domains (as a tuple).""" + return self._blocked_domains + def set_blocked_domains(self, blocked_domains): + """Set the sequence of blocked domains.""" + self._blocked_domains = tuple(blocked_domains) + + def is_blocked(self, domain): + for blocked_domain in self._blocked_domains: + if user_domain_match(domain, blocked_domain): + return True + return False + + def allowed_domains(self): + """Return None, or the sequence of allowed domains (as a tuple).""" + return self._allowed_domains + def set_allowed_domains(self, allowed_domains): + """Set the sequence of allowed domains, or None.""" + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def is_not_allowed(self, domain): + if self._allowed_domains is None: + return False + for allowed_domain in self._allowed_domains: + if user_domain_match(domain, allowed_domain): + return False + return True + + def set_ok(self, cookie, request): + """ + If you override set_ok, be sure to call this method. If it returns + false, so should your subclass (assuming your subclass wants to be more + strict about which cookies to accept). + + """ + debug(" - checking cookie %s", cookie) + + assert cookie.name is not None + + for n in "version", "verifiability", "name", "path", "domain", "port": + fn_name = "set_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + + return True + + def set_ok_version(self, cookie, request): + if cookie.version is None: + # Version is always set to 0 by parse_ns_headers if it's a Netscape + # cookie, so this must be an invalid RFC 2965 cookie. + debug(" Set-Cookie2 without version attribute (%s)", cookie) + return False + if cookie.version > 0 and not self.rfc2965: + debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + debug(" Netscape cookies are switched off") + return False + return True + + def set_ok_verifiability(self, cookie, request): + if request_is_unverifiable(request) and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + debug(" third-party RFC 2965 cookie during " + "unverifiable transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + debug(" third-party Netscape cookie during " + "unverifiable transaction") + return False + return True + + def set_ok_name(self, cookie, request): + # Try and stop servers setting V0 cookies designed to hack other + # servers that know both V0 and V1 protocols. + if (cookie.version == 0 and self.strict_ns_set_initial_dollar and + cookie.name.startswith("$")): + debug(" illegal name (starts with '$'): '%s'", cookie.name) + return False + return True + + def set_ok_path(self, cookie, request): + if cookie.path_specified: + req_path = request_path(request) + if ((cookie.version > 0 or + (cookie.version == 0 and self.strict_ns_set_path)) and + not req_path.startswith(cookie.path)): + debug(" path attribute %s is not a prefix of request " + "path %s", cookie.path, req_path) + return False + return True + + def set_ok_countrycode_domain(self, cookie, request): + """Return False if explicit cookie domain is not acceptable. + + Called by set_ok_domain, for convenience of overriding by + subclasses. + + """ + if cookie.domain_specified and self.strict_domain: + domain = cookie.domain + # since domain was specified, we know that: + assert domain.startswith(".") + if domain.count(".") == 2: + # domain like .foo.bar + i = domain.rfind(".") + tld = domain[i+1:] + sld = domain[1:i] + if (sld.lower() in [ + "co", "ac", + "com", "edu", "org", "net", "gov", "mil", "int", + "aero", "biz", "cat", "coop", "info", "jobs", "mobi", + "museum", "name", "pro", "travel", + ] and + len(tld) == 2): + # domain like .co.uk + return False + return True + + def set_ok_domain(self, cookie, request): + if self.is_blocked(cookie.domain): + debug(" domain %s is in user block-list", cookie.domain) + return False + if self.is_not_allowed(cookie.domain): + debug(" domain %s is not in user allow-list", cookie.domain) + return False + if not self.set_ok_countrycode_domain(cookie, request): + debug(" country-code second level domain %s", cookie.domain) + return False + if cookie.domain_specified: + req_host, erhn = eff_request_host_lc(request) + domain = cookie.domain + if domain.startswith("."): + undotted_domain = domain[1:] + else: + undotted_domain = domain + embedded_dots = (undotted_domain.find(".") >= 0) + if not embedded_dots and domain != ".local": + debug(" non-local domain %s contains no embedded dot", + domain) + return False + if cookie.version == 0: + if (not erhn.endswith(domain) and + (not erhn.startswith(".") and + not ("."+erhn).endswith(domain))): + debug(" effective request-host %s (even with added " + "initial dot) does not end end with %s", + erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainRFC2965Match)): + if not domain_match(erhn, domain): + debug(" effective request-host %s does not domain-match " + "%s", erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainStrictNoDots)): + host_prefix = req_host[:-len(domain)] + if (host_prefix.find(".") >= 0 and + not IPV4_RE.search(req_host)): + debug(" host prefix %s for domain %s contains a dot", + host_prefix, domain) + return False + return True + + def set_ok_port(self, cookie, request): + if cookie.port_specified: + req_port = request_port(request) + if req_port is None: + req_port = "80" + else: + req_port = str(req_port) + for p in cookie.port.split(","): + try: + int(p) + except ValueError: + debug(" bad port %s (not numeric)", p) + return False + if p == req_port: + break + else: + debug(" request port (%s) not found in %s", + req_port, cookie.port) + return False + return True + + def return_ok(self, cookie, request): + """ + If you override return_ok, be sure to call this method. If it returns + false, so should your subclass (assuming your subclass wants to be more + strict about which cookies to return). + + """ + # Path has already been checked by path_return_ok, and domain blocking + # done by domain_return_ok. + debug(" - checking cookie %s", cookie) + + for n in ("version", "verifiability", "secure", "expires", "port", + "domain"): + fn_name = "return_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + return True + + def return_ok_version(self, cookie, request): + if cookie.version > 0 and not self.rfc2965: + debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + debug(" Netscape cookies are switched off") + return False + return True + + def return_ok_verifiability(self, cookie, request): + if request_is_unverifiable(request) and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + debug(" third-party RFC 2965 cookie during unverifiable " + "transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + debug(" third-party Netscape cookie during unverifiable " + "transaction") + return False + return True + + def return_ok_secure(self, cookie, request): + if cookie.secure and request.get_type() != "https": + debug(" secure cookie with non-secure request") + return False + return True + + def return_ok_expires(self, cookie, request): + if cookie.is_expired(self._now): + debug(" cookie expired") + return False + return True + + def return_ok_port(self, cookie, request): + if cookie.port: + req_port = request_port(request) + if req_port is None: + req_port = "80" + for p in cookie.port.split(","): + if p == req_port: + break + else: + debug(" request port %s does not match cookie port %s", + req_port, cookie.port) + return False + return True + + def return_ok_domain(self, cookie, request): + req_host, erhn = eff_request_host_lc(request) + domain = cookie.domain + + # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't + if (cookie.version == 0 and + (self.strict_ns_domain & self.DomainStrictNonDomain) and + not cookie.domain_specified and domain != erhn): + debug(" cookie with unspecified domain does not string-compare " + "equal to request domain") + return False + + if cookie.version > 0 and not domain_match(erhn, domain): + debug(" effective request-host name %s does not domain-match " + "RFC 2965 cookie domain %s", erhn, domain) + return False + if cookie.version == 0 and not ("."+erhn).endswith(domain): + debug(" request-host %s does not match Netscape cookie domain " + "%s", req_host, domain) + return False + return True + + def domain_return_ok(self, domain, request): + # Liberal check of domain. This is here as an optimization to avoid + # having to load lots of MSIE cookie files unless necessary. + + # Munge req_host and erhn to always start with a dot, so as to err on + # the side of letting cookies through. + dotted_req_host, dotted_erhn = eff_request_host_lc(request) + if not dotted_req_host.startswith("."): + dotted_req_host = "."+dotted_req_host + if not dotted_erhn.startswith("."): + dotted_erhn = "."+dotted_erhn + if not (dotted_req_host.endswith(domain) or + dotted_erhn.endswith(domain)): + #debug(" request domain %s does not match cookie domain %s", + # req_host, domain) + return False + + if self.is_blocked(domain): + debug(" domain %s is in user block-list", domain) + return False + if self.is_not_allowed(domain): + debug(" domain %s is not in user allow-list", domain) + return False + + return True + + def path_return_ok(self, path, request): + debug("- checking cookie path=%s", path) + req_path = request_path(request) + if not req_path.startswith(path): + debug(" %s does not path-match %s", req_path, path) + return False + return True + + +def vals_sorted_by_key(adict): + keys = adict.keys() + keys.sort() + return map(adict.get, keys) + +class MappingIterator: + """Iterates over nested mapping, depth-first, in sorted order by key.""" + def __init__(self, mapping): + self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack + + def __iter__(self): return self + + def next(self): + # this is hairy because of lack of generators + while 1: + try: + vals, i, prev_item = self._s.pop() + except IndexError: + raise StopIteration() + if i < len(vals): + item = vals[i] + i = i + 1 + self._s.append((vals, i, prev_item)) + try: + item.items + except AttributeError: + # non-mapping + break + else: + # mapping + self._s.append((vals_sorted_by_key(item), 0, item)) + continue + return item + + +# Used as second parameter to dict.get method, to distinguish absent +# dict key from one with a None value. +class Absent: pass + +class CookieJar: + """Collection of HTTP cookies. + + You may not need to know about this class: try mechanize.urlopen(). + + The major methods are extract_cookies and add_cookie_header; these are all + you are likely to need. + + CookieJar supports the iterator protocol: + + for cookie in cookiejar: + # do something with cookie + + Methods: + + add_cookie_header(request) + extract_cookies(response, request) + get_policy() + set_policy(policy) + cookies_for_request(request) + make_cookies(response, request) + set_cookie_if_ok(cookie, request) + set_cookie(cookie) + clear_session_cookies() + clear_expired_cookies() + clear(domain=None, path=None, name=None) + + Public attributes + + policy: CookiePolicy object + + """ + + non_word_re = re.compile(r"\W") + quote_re = re.compile(r"([\"\\])") + strict_domain_re = re.compile(r"\.?[^.]*") + domain_re = re.compile(r"[^.]*") + dots_re = re.compile(r"^\.+") + + def __init__(self, policy=None): + """ + See CookieJar.__doc__ for argument documentation. + + """ + if policy is None: + policy = DefaultCookiePolicy() + self._policy = policy + + self._cookies = {} + + # for __getitem__ iteration in pre-2.2 Pythons + self._prev_getitem_index = 0 + + def get_policy(self): + return self._policy + + def set_policy(self, policy): + self._policy = policy + + def _cookies_for_domain(self, domain, request): + cookies = [] + if not self._policy.domain_return_ok(domain, request): + return [] + debug("Checking %s for cookies to return", domain) + cookies_by_path = self._cookies[domain] + for path in cookies_by_path.keys(): + if not self._policy.path_return_ok(path, request): + continue + cookies_by_name = cookies_by_path[path] + for cookie in cookies_by_name.values(): + if not self._policy.return_ok(cookie, request): + debug(" not returning cookie") + continue + debug(" it's a match") + cookies.append(cookie) + return cookies + + def cookies_for_request(self, request): + """Return a list of cookies to be returned to server. + + The returned list of cookie instances is sorted in the order they + should appear in the Cookie: header for return to the server. + + See add_cookie_header.__doc__ for the interface required of the + request argument. + + New in version 0.1.10 + + """ + self._policy._now = self._now = int(time.time()) + cookies = self._cookies_for_request(request) + # add cookies in order of most specific (i.e. longest) path first + def decreasing_size(a, b): return cmp(len(b.path), len(a.path)) + cookies.sort(decreasing_size) + return cookies + + def _cookies_for_request(self, request): + """Return a list of cookies to be returned to server.""" + # this method still exists (alongside cookies_for_request) because it + # is part of an implied protected interface for subclasses of cookiejar + # XXX document that implied interface, or provide another way of + # implementing cookiejars than subclassing + cookies = [] + for domain in self._cookies.keys(): + cookies.extend(self._cookies_for_domain(domain, request)) + return cookies + + def _cookie_attrs(self, cookies): + """Return a list of cookie-attributes to be returned to server. + + The $Version attribute is also added when appropriate (currently only + once per request). + + >>> jar = CookieJar() + >>> ns_cookie = Cookie(0, "foo", '"bar"', None, False, + ... "example.com", False, False, + ... "/", False, False, None, True, + ... None, None, {}) + >>> jar._cookie_attrs([ns_cookie]) + ['foo="bar"'] + >>> rfc2965_cookie = Cookie(1, "foo", "bar", None, False, + ... ".example.com", True, False, + ... "/", False, False, None, True, + ... None, None, {}) + >>> jar._cookie_attrs([rfc2965_cookie]) + ['$Version=1', 'foo=bar', '$Domain="example.com"'] + + """ + version_set = False + + attrs = [] + for cookie in cookies: + # set version of Cookie header + # XXX + # What should it be if multiple matching Set-Cookie headers have + # different versions themselves? + # Answer: there is no answer; was supposed to be settled by + # RFC 2965 errata, but that may never appear... + version = cookie.version + if not version_set: + version_set = True + if version > 0: + attrs.append("$Version=%s" % version) + + # quote cookie value if necessary + # (not for Netscape protocol, which already has any quotes + # intact, due to the poorly-specified Netscape Cookie: syntax) + if ((cookie.value is not None) and + self.non_word_re.search(cookie.value) and version > 0): + value = self.quote_re.sub(r"\\\1", cookie.value) + else: + value = cookie.value + + # add cookie-attributes to be returned in Cookie header + if cookie.value is None: + attrs.append(cookie.name) + else: + attrs.append("%s=%s" % (cookie.name, value)) + if version > 0: + if cookie.path_specified: + attrs.append('$Path="%s"' % cookie.path) + if cookie.domain.startswith("."): + domain = cookie.domain + if (not cookie.domain_initial_dot and + domain.startswith(".")): + domain = domain[1:] + attrs.append('$Domain="%s"' % domain) + if cookie.port is not None: + p = "$Port" + if cookie.port_specified: + p = p + ('="%s"' % cookie.port) + attrs.append(p) + + return attrs + + def add_cookie_header(self, request): + """Add correct Cookie: header to request (mechanize.Request object). + + The Cookie2 header is also added unless policy.hide_cookie2 is true. + + The request object (usually a mechanize.Request instance) must support + the methods get_full_url, get_host, is_unverifiable, get_type, + has_header, get_header, header_items and add_unredirected_header, as + documented by urllib2. + """ + debug("add_cookie_header") + cookies = self.cookies_for_request(request) + + attrs = self._cookie_attrs(cookies) + if attrs: + if not request.has_header("Cookie"): + request.add_unredirected_header("Cookie", "; ".join(attrs)) + + # if necessary, advertise that we know RFC 2965 + if self._policy.rfc2965 and not self._policy.hide_cookie2: + for cookie in cookies: + if cookie.version != 1 and not request.has_header("Cookie2"): + request.add_unredirected_header("Cookie2", '$Version="1"') + break + + self.clear_expired_cookies() + + def _normalized_cookie_tuples(self, attrs_set): + """Return list of tuples containing normalised cookie information. + + attrs_set is the list of lists of key,value pairs extracted from + the Set-Cookie or Set-Cookie2 headers. + + Tuples are name, value, standard, rest, where name and value are the + cookie name and value, standard is a dictionary containing the standard + cookie-attributes (discard, secure, version, expires or max-age, + domain, path and port) and rest is a dictionary containing the rest of + the cookie-attributes. + + """ + cookie_tuples = [] + + boolean_attrs = "discard", "secure" + value_attrs = ("version", + "expires", "max-age", + "domain", "path", "port", + "comment", "commenturl") + + for cookie_attrs in attrs_set: + name, value = cookie_attrs[0] + + # Build dictionary of standard cookie-attributes (standard) and + # dictionary of other cookie-attributes (rest). + + # Note: expiry time is normalised to seconds since epoch. V0 + # cookies should have the Expires cookie-attribute, and V1 cookies + # should have Max-Age, but since V1 includes RFC 2109 cookies (and + # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we + # accept either (but prefer Max-Age). + max_age_set = False + + bad_cookie = False + + standard = {} + rest = {} + for k, v in cookie_attrs[1:]: + lc = k.lower() + # don't lose case distinction for unknown fields + if lc in value_attrs or lc in boolean_attrs: + k = lc + if k in boolean_attrs and v is None: + # boolean cookie-attribute is present, but has no value + # (like "discard", rather than "port=80") + v = True + if standard.has_key(k): + # only first value is significant + continue + if k == "domain": + if v is None: + debug(" missing value for domain attribute") + bad_cookie = True + break + # RFC 2965 section 3.3.3 + v = v.lower() + if k == "expires": + if max_age_set: + # Prefer max-age to expires (like Mozilla) + continue + if v is None: + debug(" missing or invalid value for expires " + "attribute: treating as session cookie") + continue + if k == "max-age": + max_age_set = True + if v is None: + debug(" missing value for max-age attribute") + bad_cookie = True + break + try: + v = int(v) + except ValueError: + debug(" missing or invalid (non-numeric) value for " + "max-age attribute") + bad_cookie = True + break + # convert RFC 2965 Max-Age to seconds since epoch + # XXX Strictly you're supposed to follow RFC 2616 + # age-calculation rules. Remember that zero Max-Age is a + # is a request to discard (old and new) cookie, though. + k = "expires" + v = self._now + v + if (k in value_attrs) or (k in boolean_attrs): + if (v is None and + k not in ["port", "comment", "commenturl"]): + debug(" missing value for %s attribute" % k) + bad_cookie = True + break + standard[k] = v + else: + rest[k] = v + + if bad_cookie: + continue + + cookie_tuples.append((name, value, standard, rest)) + + return cookie_tuples + + def _cookie_from_cookie_tuple(self, tup, request): + # standard is dict of standard cookie-attributes, rest is dict of the + # rest of them + name, value, standard, rest = tup + + domain = standard.get("domain", Absent) + path = standard.get("path", Absent) + port = standard.get("port", Absent) + expires = standard.get("expires", Absent) + + # set the easy defaults + version = standard.get("version", None) + if version is not None: + try: + version = int(version) + except ValueError: + return None # invalid version, ignore cookie + secure = standard.get("secure", False) + # (discard is also set if expires is Absent) + discard = standard.get("discard", False) + comment = standard.get("comment", None) + comment_url = standard.get("commenturl", None) + + # set default path + if path is not Absent and path != "": + path_specified = True + path = escape_path(path) + else: + path_specified = False + path = request_path(request) + i = path.rfind("/") + if i != -1: + if version == 0: + # Netscape spec parts company from reality here + path = path[:i] + else: + path = path[:i+1] + if len(path) == 0: path = "/" + + # set default domain + domain_specified = domain is not Absent + # but first we have to remember whether it starts with a dot + domain_initial_dot = False + if domain_specified: + domain_initial_dot = bool(domain.startswith(".")) + if domain is Absent: + req_host, erhn = eff_request_host_lc(request) + domain = erhn + elif not domain.startswith("."): + domain = "."+domain + + # set default port + port_specified = False + if port is not Absent: + if port is None: + # Port attr present, but has no value: default to request port. + # Cookie should then only be sent back on that port. + port = request_port(request) + else: + port_specified = True + port = re.sub(r"\s+", "", port) + else: + # No port attr present. Cookie can be sent back on any port. + port = None + + # set default expires and discard + if expires is Absent: + expires = None + discard = True + + return Cookie(version, + name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest) + + def _cookies_from_attrs_set(self, attrs_set, request): + cookie_tuples = self._normalized_cookie_tuples(attrs_set) + + cookies = [] + for tup in cookie_tuples: + cookie = self._cookie_from_cookie_tuple(tup, request) + if cookie: cookies.append(cookie) + return cookies + + def _process_rfc2109_cookies(self, cookies): + if self._policy.rfc2109_as_netscape is None: + rfc2109_as_netscape = not self._policy.rfc2965 + else: + rfc2109_as_netscape = self._policy.rfc2109_as_netscape + for cookie in cookies: + if cookie.version == 1: + cookie.rfc2109 = True + if rfc2109_as_netscape: + # treat 2109 cookies as Netscape cookies rather than + # as RFC2965 cookies + cookie.version = 0 + + def _make_cookies(self, response, request): + # get cookie-attributes for RFC 2965 and Netscape protocols + headers = response.info() + rfc2965_hdrs = headers.getheaders("Set-Cookie2") + ns_hdrs = headers.getheaders("Set-Cookie") + + rfc2965 = self._policy.rfc2965 + netscape = self._policy.netscape + + if ((not rfc2965_hdrs and not ns_hdrs) or + (not ns_hdrs and not rfc2965) or + (not rfc2965_hdrs and not netscape) or + (not netscape and not rfc2965)): + return [] # no relevant cookie headers: quick exit + + try: + cookies = self._cookies_from_attrs_set( + split_header_words(rfc2965_hdrs), request) + except: + reraise_unmasked_exceptions() + cookies = [] + + if ns_hdrs and netscape: + try: + # RFC 2109 and Netscape cookies + ns_cookies = self._cookies_from_attrs_set( + parse_ns_headers(ns_hdrs), request) + except: + reraise_unmasked_exceptions() + ns_cookies = [] + self._process_rfc2109_cookies(ns_cookies) + + # Look for Netscape cookies (from Set-Cookie headers) that match + # corresponding RFC 2965 cookies (from Set-Cookie2 headers). + # For each match, keep the RFC 2965 cookie and ignore the Netscape + # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are + # bundled in with the Netscape cookies for this purpose, which is + # reasonable behaviour. + if rfc2965: + lookup = {} + for cookie in cookies: + lookup[(cookie.domain, cookie.path, cookie.name)] = None + + def no_matching_rfc2965(ns_cookie, lookup=lookup): + key = ns_cookie.domain, ns_cookie.path, ns_cookie.name + return not lookup.has_key(key) + ns_cookies = filter(no_matching_rfc2965, ns_cookies) + + if ns_cookies: + cookies.extend(ns_cookies) + + return cookies + + def make_cookies(self, response, request): + """Return sequence of Cookie objects extracted from response object. + + See extract_cookies.__doc__ for the interface required of the + response and request arguments. + + """ + self._policy._now = self._now = int(time.time()) + return [cookie for cookie in self._make_cookies(response, request) + if cookie.expires is None or not cookie.expires <= self._now] + + def set_cookie_if_ok(self, cookie, request): + """Set a cookie if policy says it's OK to do so. + + cookie: mechanize.Cookie instance + request: see extract_cookies.__doc__ for the required interface + + """ + self._policy._now = self._now = int(time.time()) + + if self._policy.set_ok(cookie, request): + self.set_cookie(cookie) + + def set_cookie(self, cookie): + """Set a cookie, without checking whether or not it should be set. + + cookie: mechanize.Cookie instance + """ + c = self._cookies + if not c.has_key(cookie.domain): c[cookie.domain] = {} + c2 = c[cookie.domain] + if not c2.has_key(cookie.path): c2[cookie.path] = {} + c3 = c2[cookie.path] + c3[cookie.name] = cookie + + def extract_cookies(self, response, request): + """Extract cookies from response, where allowable given the request. + + Look for allowable Set-Cookie: and Set-Cookie2: headers in the response + object passed as argument. Any of these headers that are found are + used to update the state of the object (subject to the policy.set_ok + method's approval). + + The response object (usually be the result of a call to + mechanize.urlopen, or similar) should support an info method, which + returns a mimetools.Message object (in fact, the 'mimetools.Message + object' may be any object that provides a getheaders method). + + The request object (usually a mechanize.Request instance) must support + the methods get_full_url, get_type, get_host, and is_unverifiable, as + documented by mechanize, and the port attribute (the port number). The + request is used to set default values for cookie-attributes as well as + for checking that the cookie is OK to be set. + + """ + debug("extract_cookies: %s", response.info()) + self._policy._now = self._now = int(time.time()) + + for cookie in self._make_cookies(response, request): + if cookie.expires is not None and cookie.expires <= self._now: + # Expiry date in past is request to delete cookie. This can't be + # in DefaultCookiePolicy, because can't delete cookies there. + try: + self.clear(cookie.domain, cookie.path, cookie.name) + except KeyError: + pass + debug("Expiring cookie, domain='%s', path='%s', name='%s'", + cookie.domain, cookie.path, cookie.name) + elif self._policy.set_ok(cookie, request): + debug(" setting cookie: %s", cookie) + self.set_cookie(cookie) + + def clear(self, domain=None, path=None, name=None): + """Clear some cookies. + + Invoking this method without arguments will clear all cookies. If + given a single argument, only cookies belonging to that domain will be + removed. If given two arguments, cookies belonging to the specified + path within that domain are removed. If given three arguments, then + the cookie with the specified name, path and domain is removed. + + Raises KeyError if no matching cookie exists. + + """ + if name is not None: + if (domain is None) or (path is None): + raise ValueError( + "domain and path must be given to remove a cookie by name") + del self._cookies[domain][path][name] + elif path is not None: + if domain is None: + raise ValueError( + "domain must be given to remove cookies by path") + del self._cookies[domain][path] + elif domain is not None: + del self._cookies[domain] + else: + self._cookies = {} + + def clear_session_cookies(self): + """Discard all session cookies. + + Discards all cookies held by object which had either no Max-Age or + Expires cookie-attribute or an explicit Discard cookie-attribute, or + which otherwise have ended up with a true discard attribute. For + interactive browsers, the end of a session usually corresponds to + closing the browser window. + + Note that the save method won't save session cookies anyway, unless you + ask otherwise by passing a true ignore_discard argument. + + """ + for cookie in self: + if cookie.discard: + self.clear(cookie.domain, cookie.path, cookie.name) + + def clear_expired_cookies(self): + """Discard all expired cookies. + + You probably don't need to call this method: expired cookies are never + sent back to the server (provided you're using DefaultCookiePolicy), + this method is called by CookieJar itself every so often, and the save + method won't save expired cookies anyway (unless you ask otherwise by + passing a true ignore_expires argument). + + """ + now = time.time() + for cookie in self: + if cookie.is_expired(now): + self.clear(cookie.domain, cookie.path, cookie.name) + + def __getitem__(self, i): + if i == 0: + self._getitem_iterator = self.__iter__() + elif self._prev_getitem_index != i-1: raise IndexError( + "CookieJar.__getitem__ only supports sequential iteration") + self._prev_getitem_index = i + try: + return self._getitem_iterator.next() + except StopIteration: + raise IndexError() + + def __iter__(self): + return MappingIterator(self._cookies) + + def __len__(self): + """Return number of contained cookies.""" + i = 0 + for cookie in self: i = i + 1 + return i + + def __repr__(self): + r = [] + for cookie in self: r.append(repr(cookie)) + return "<%s[%s]>" % (self.__class__, ", ".join(r)) + + def __str__(self): + r = [] + for cookie in self: r.append(str(cookie)) + return "<%s[%s]>" % (self.__class__, ", ".join(r)) + + +class LoadError(Exception): pass + +class FileCookieJar(CookieJar): + """CookieJar that can be loaded from and saved to a file. + + Additional methods + + save(filename=None, ignore_discard=False, ignore_expires=False) + load(filename=None, ignore_discard=False, ignore_expires=False) + revert(filename=None, ignore_discard=False, ignore_expires=False) + + Additional public attributes + + filename: filename for loading and saving cookies + + Additional public readable attributes + + delayload: request that cookies are lazily loaded from disk; this is only + a hint since this only affects performance, not behaviour (unless the + cookies on disk are changing); a CookieJar object may ignore it (in fact, + only MSIECookieJar lazily loads cookies at the moment) + + """ + + def __init__(self, filename=None, delayload=False, policy=None): + """ + See FileCookieJar.__doc__ for argument documentation. + + Cookies are NOT loaded from the named file until either the load or + revert method is called. + + """ + CookieJar.__init__(self, policy) + if filename is not None and not isstringlike(filename): + raise ValueError("filename must be string-like") + self.filename = filename + self.delayload = bool(delayload) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + """Save cookies to a file. + + filename: name of file in which to save cookies + ignore_discard: save even cookies set to be discarded + ignore_expires: save even cookies that have expired + + The file is overwritten if it already exists, thus wiping all its + cookies. Saved cookies can be restored later using the load or revert + methods. If filename is not specified, self.filename is used; if + self.filename is None, ValueError is raised. + + """ + raise NotImplementedError() + + def load(self, filename=None, ignore_discard=False, ignore_expires=False): + """Load cookies from a file. + + Old cookies are kept unless overwritten by newly loaded ones. + + Arguments are as for .save(). + + If filename is not specified, self.filename is used; if self.filename + is None, ValueError is raised. The named file must be in the format + understood by the class, or LoadError will be raised. This format will + be identical to that written by the save method, unless the load format + is not sufficiently well understood (as is the case for MSIECookieJar). + + """ + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + f = open(filename) + try: + self._really_load(f, filename, ignore_discard, ignore_expires) + finally: + f.close() + + def revert(self, filename=None, + ignore_discard=False, ignore_expires=False): + """Clear all cookies and reload cookies from a saved file. + + Raises LoadError (or IOError) if reversion is not successful; the + object's state will not be altered if this happens. + + """ + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + old_state = copy.deepcopy(self._cookies) + self._cookies = {} + try: + self.load(filename, ignore_discard, ignore_expires) + except (LoadError, IOError): + self._cookies = old_state + raise diff --git a/BruteXSS/mechanize/_clientcookie.pyc b/BruteXSS/mechanize/_clientcookie.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10186eaf3a8cf8eca59affd012bb56b480dadd8d GIT binary patch literal 58063 zcmeIb3y_>wcHj5)3^2e91`h(ni;u+zkRS#mFeE|lE(t!CgBJ-dImCVg5-SkgPEU8w zpn;k0L4Q32h6dNxlI4{mThS&fODpY4ySAj2Y}xC`N|Y!{BsmYQT(%>{a^+EqlsIvf zomeS5DLZze)aLg;=YEgw0l36)U8+}`{C{j7YkcL)uhygR zXFdPFz%Tvbp(tuZ(M8fq)EJ5uD$zwQ{CX(5IFwxvM;C{)>vhq^b=md$=;He9dL+6y zl3j0zE^dfc_GY&mql+7(Ch2JO=Emqk`=MxEb1b?#7Il9zs#GeW!cEb|O;MAST$`eH zCHicGddH&%HEy<|)<+9lqQ$Mz#jVlew&>!v$UYs37Ve4`w?`MZ`#t^G5nbF7H8(_8 z?v5H8qW3D%m7USWozWZ}-59-3FLy=Fq3Fu)s4=Ry7xzSsvFLpuL6bMx#XV7D+%E2o z8k_CnzU-4N_R0NGW2-&?Sk%~N7yF{dU3L*ijqP@^KWgl-ivv;PZo7COYV5R&2cyO= zyEqs%c1IT<$_n3Og&&F(U~!);o)fYD`QcsdsjHJ`V5V zRy*#gef3VG8F!m=&2HT5@TfUf>+$*Qa<7@HO{%Ee-*hGIH5cQWz7znC7TJ%eghngv zQTcK!y*$RZi@>8<>(XuO?75ZR;zM;YKZT@(NU#@kl{JCweNA2rNtIIgkT4?4) z`0HEbitYQGyf0p6SFd*7X)P?&ib8J2uUt4++-CV!)ve;ne+KXKcY1O3=+Rea&YnI| zOPld+GwO}^G~~H`PBJK`$~<+VXzYoZQ>EKmfb*u6Klnn$s_yPo6p26o#nV* zYcn&1&^{mHR!UrVPM+&4u7W8&p?K1 z_IZ6&JIYUcwRXM9MXT4UEws|Cu1>poxCgt9 zo_cIT|Mj+u&{0^gdbwGnd)4cmZX@k&MTMG`vYCD*9lYq?I~pS!i8trg!mR zu~~;Ww%%!i6QUGpU3oY1gM>yW`K7-~A`Tlx3sLkg7k*ud-W`f=!WoC6^icGLC~8O1 zO{y7=dh4Rz`e<%A+Wju~BheK&C`AlKd*9{d#^~m-GQKwyb*tgKP~>Rzu1f|~H5R?Q zKDxO+x-zDZ-yMl=u8Xd0QY&tb=-UeaUD<5!Hbgh!#ap5V}H7H{}To67M!4O7h z{s)=*D=&~|JcE_H>Ov;TK(#R2!Bfup4dv#8zsqsMB}3f+-r++_w@etnbXx1 zQ_1P->Cc`#eeQfuF#U9MWwul6HqN%ew(jy$&q|*$ntb|3tJm8^rdOMbo$ku%ZWp#P z%)LmqN^jn%S6l75j(x*$Ezi~$YK)rN(d*Xg%~=|0IcJQ4oPEt+Ex}BBRXzsz^cQTg zJoC-o+qH$|rae`|Csc>MT5d1a(yPrzB55uWlo&MX@2xB~lP!8?{TDjSSqm4_`3Y_! zOVS6qI(7WgN#QHtK+JP9w{krUad}cx&P9G*on!V+2J3ilCX6nIDo0{uzU?$4fk4r8my;ecxwH!Pd<%`N?i*CRaPslzI1B(WtFPiXv0=O zQx_amvJlAN@g5fd;EIWA%s$OezcvVQ3n01^YWob-MANVmrB7+r&gi>!`hrKA+9tdU zZkOzONS}@~gPFdaFIdTl?!(H>tqOxKtc&(>9{~IR(7XY_21rf?kRDyE_3D?QxZ(MU zBXO->hmL_$l2ej1C*xyJf8ygo0GToo=|sF(TZw0p>ITLJy55b?o{MXZMz@(l$irt@ zDM#Y8Sr^j9j~{o5<|UP&VMS1ak5Fd1)U3DWM4_8dc$fwxnm0wMs-qS&7C}gxG-f>0 zS#0)ND(8Ar0v!|>ak3rS=qx}SqXrY8)X6xWm|3o04q00%6Cs{?Y3h|%tTftDN76V7 zH-I6*2;Oc^9*$#c?bIPSiCBt*EQg>0;{-2Tb0ul*;aCi&)pc0ecbRPI{~yg7+q|}1 zTX=@v^?g0%v!Lj%jlku=w~(e6<7o!%I(=%!)1JBAX&-GaF7;OW#FSFqN8%GmR_8>_Xu$-~~A=}u1^y_U^o2b(Gr zSFULs7MFW9K{`h2?{d{?ACGI3vy*j|YV{95+}|95g?NSH?6T~{OY%c-%rMwuEzVf8+3 zkPHxlg2JXSz@weHqY_CNiNQ%7KzT;fu5L`DJ^$$JLZ^OJG{T#kO>`r}i5O0DABk0- z+^;KscgnZa85V5d!a~)l?>jt6)!AsoDDN;=dEdXt&Fv^JL*$gUb5xYqNo;yb4+BMh zK?HQhs4x`UgrJ-XcNeT+q#+~C+$t~RdA$2xsfhRTA`A@aB^o5CWw3fQ2j2kH)=E`{ z(uU2+KCY6(N@6AZl{`S=0!_Q&h%0a<4=OpRcM^hvlv{0d*2EYkY%QmVzGbZa0uNoF z7!l4QMhvY3bGKG@lJ4M_it8WG*H8 z-W#=U%gB*0&S7T9Au|$J$Zo=&$e0lmF^vYg2S&Z#X}{fUOUYrR%c;EhOh%eCF^%|$ zt0a55o1TyWW6K1@#_HVi!a{YqyI_M@?atMoc>1YNxa8Pfu!Tn=_#1greqV*yu-2D* zRjEs=v{T?kh@H^?Jyj68ZZA~4U@tJNWsZtsh4QK@Wylg_uaq&2Z~61rPku#yvj$p%kMszh|vsYF1tm(A|wwjm9L+Yqe< zlo~Ob2|C%Vv2uQ%XX!?gXlPsIU}f_yAgZczh+blP=1W!^bl~QrZ_p|sz_Wbk2$1(B zF*s_qv6lAl;Jd;0gAU)~ajH>@D%)?*O&w1~d}f@ms}*5LC34(){BBEBTnJHb6ZP|> z;kbl@y#Bu7`5U}(RQDMuNa}Y|&rg%kt$bwyl?_Pkm`-1F?k8qAB>YdObittQb~4M= zj8T>JIf@GaS*jsqi`XH1w@qD}A@%FY+1tZJlaNM4(W6GGlS$o+L7@++Ej6nuHhGl$ zIjsn6 z3W~8l>RuNA0$L&x`&hrNY!S3s#R1gHg=_6-9kf`sl!k06bc9u$-lAh^KpH>W9+48q z7kR!zdS@zd=PhC|r%s=ldi|C2Rq3~@=Uz+BJNXfXNu)eT9wKq@!*V_{_$lMpDbVF! zIEdESTPnl+HuHBQl5bszqF+7~pW+6n=A$ulq5UvWWw8xZj~i40xalQ;YWGRYiCBQB z(w3D<3`i(AZOnHW&fR4VTAN*HT3=1kga}%G(UW*Ol^FpXc;c=s0i?H2KF#$$0A_1t!%!+Jeels|j=%8GbB{cDWdG+MJ~VMSm0)pj;?2j7K3zLH zH+A&PTcyjxIV5w+ph_Wi4oRQ)PNR4ET%R_2dGO%D)ajT_T#dtgw}YZhS7=z9UF*8t z+Dc|ESRvYGWl5^4Iw5FO(Ux}qJKmJpxNsrwLBYlue`H4u_=91FCNvDI9qyVbZ2|p!uh%;S!5R_P5;v^m_7&uch<0 z#~8F?Z+|*z!tZi#?&wn!l9Pg#F;hDm#H0XBjrzUbZb|cgo(HDTYPNN2mq>0bjx7JR zWfs?lk3L1G&c9ThnL2aY(DwAq$*FUvPgPI8G?kziOP-+8L{hdR=ajDa-QnUad4W%U zpPy67vF$KW{WXNrP~^jLm79P~##@j9poWsApp5f+rCXzn#$4b@VU$ex*HBa}WA+j2 zD$!C_01WKBAuB9th7l!i5cE%gmVqI4n>ExtnCK*)#+Pr$f=a5XAfTT&ZGt{d^m$kq zn-=9I3H;fDfwjY2X94j)4LU8%N)N~J*|HWWK8o^4A4y)YhWLWX6-jyN&qxPzq@@4x z@A_0VQ7$)O z>ZH9DB6pY(n8v$}qo`VYD~{D@?pxT=Gm*~*MyO#Z|2pW0epdQE$Nr!LEHJ<(n%@?y z^Y}H9R-WdixtX)vqJS|jm^?)<6VbG!D&S}t=wC=a_9c*wp9wHpR;+>k8MTEe8w8L} zfxuQq-4_#M7w2`Br!fbMg!pnq>}jTf*F69vri1_v7hiFXPmWT$#h7#LTPZ z&y9+^im}(k`Jxsh_^rfgYf;N|D;)Lr*CN-N1aQ8obZAcT6tUt~RTo6B#L< zV?tuYjOUiyJ~O3x@=vU!&g>Ob3n|_cvdTA_D7L*uFDI6r8Z9nVKxXk3Q|LD5YRd~f z*anTTLNJss$`Bq*LOwqO#9c3j3b5gmG#it=<>xvW^^unxObinXLx+?p$u2itjZC47 zf}QOC*fO{^Zvn>Qa%Xt~tz0uGpQ%VLGS)B|EV`K@j*rzcl#z^K7-X3O%#0uoIN$6( z?vPbbmux`nQd)PBbmzKSMCe{nO+i2Bma{qzp=139CqoD9HXim;g z#s|*v^?@TXHVwLEO%Niq<7W2qxu+gGa2V5%-aA*UxwlhbwG)<8ncVoL=HM)W(cJiM zp37=`>ainFJ*K?&(zJlNP3?F*+v&jL+s{Bf-O>uqYxURP@Ej}Ebg#rQrto93r%uOn z$dsN{Kd8=AZq5~~nm6#EsJD7d1#B+m0!pgsTQoZqA!s+{72>kDt3aj0bv*Efe-B2IWib1Z_zJopUYh^ zZ}k`iB=59o2biAYbO!{cC;CiOh4zX4TBj(##cVoG*B!N$uY;x^>LvG;=28dAbt1F8 zX4Eg6Pbd-V%L|~dL&yN(cwi1_nn|zLZXBJOXWnH5&w+xwO&!{l4+&7rZ1!@L<6n)Y zKn;--9A}=1Cy)yyntGcZ;eticYAz9OO7V@D-7E?%xRoHvwuRm=pf4jwl+G9WZ~ zv9`1%TFGOZ(E$v_2dT?og-KN4wmBUrPav*NQduyVdlC35L+5fkpIPyQlNPVrsEZL6 zdn%gZmWbw!n)n$6MvfI>kW{7C)knNyhD}s0&`mQF1mbCm9j$sy{52d&&cc>W0_vwO zCPp|(FfS;-Ear=OOofM~*_}+~#q~`uE6%as8WhmSt^*d@bwG-q)P*Gkv3CJ4tnG+@ zX|FD?E&X!AY!qooOVB8~IvREF4H7hqPGmGKDv?qA)hF!LxYncZj@v6_AYNgJdP|N{ z2;F{-Zq1jlU+@x4ol-@oXHt0;r32Q3D5{!El`im~GEGDk*DRc>@MyNhY;{cTRLZa^ zy2&dp#-p32o~Uk)Zt|8VXuWvL#a1;{7u%wnyyfDq=%(q!s@tQRyyeM`=q3-jxI60Y zluPx^ZR(Dm>{4HX7A!pqfIt?*$$%P;D8*pLi;{_;R&%nJPe^ZMD#JvY5|dn$GrF2n zGOy&a5}{5ay`|eMS?x!OR0PR_5@ABJs07GINk_?&l50w&a7|JY!^t|XlL_K;G@;?N zM`?3m?)DYlaa~rXkj5%qi@+@*yNWj%K7!S^By~^nAlf`0v8dy>r?Pcu6rU^mJG8Yj zx<0?lpVqTgM2@gh^D$y{2hALq%K-K*9Rw3Yet9EYZLYXc?QNdgIIN`~ck{7IL&Ka2 zr&aNVFRD*!SX8MGlV-n@T&Fuo%IyUGS&Oc2sy;$D?fU$@nLf>37+q_VrSTqY@(+Gc zleOsaSqgLk?Xxx~_yW_UQpYznILRAoc)ivZBD}0>;SZHss7kY+0d+GEa-hDa8LQX9 zaB3sJj(bKu^#-e%YiV4tC-jZBkSYE1rPi%N%}=UzPD`PRMoV3L9B3&h%L3BKY3CEx z4@e?f`T~Ouwi*VTeDdfrDp26C87|tL>uc%M80E5PcSh3q5%t7;`D`-g6HQIfFO5k6 z=@8%IEMks`@G*yS}+JMA0 z8q7B;*Web#II18+eQ;MZ%$NTvdh#(__}Bc2Dl+N95Yd3~p*=#=Q9gclzS#y4-RFNa z8_*HgH6*k~I&mnM8ju#CPk=-~CN;=zt@;mLvlm-M4O9JcRxPAMrDea9T&H5?Ku@TV zYp9o=ag+RbY9$4YGw;5BtL^45kMT=+~A!h+6hF>;{*Wrge4@XKO! zmC4X}x31&};BQ?zK&GfkzpZ!1@VwT+eYlxsKnL3{Kr82QX8B(cV5xDTe! z9ZXX(yJuZ^jSouo+~vRL-3`c?t0qrkylzTYmz9YBx^hgAGdyxz%|dGtcWB{*L_Cx3 z)-tg+P(ZcHI<#8-3Bgn>DynRaMo}=`jqY+UY|iG0AS{KPb?V>af|dco8>YtxssIW| z;62_zo3ufDz1ZKy&Mx+Jv73v1Tn!XNTci1{Xwi>mNyCgnKE48_WDbqB z=v9zlvs)keiG%53EL$IhF54yxCv5hrbg-P&A)W6jm6fG!^<86buLL zk)%)4f%mU6jAZ*N!tiWxm0>ev^(w=QWP#yD_N_FmNMC7K!@^3#uER>huEXlW$Wv*h zVei5?Ss!erF+HJK5Fn%ORjX#~t5$up8@E1f8l8Hzn*0En-EmKZCwWJSO=ZlGQSx~u ze@4lBN`6quzp3Phm3*0`n6`HHp~+#=qdw9J7yMg$3oZb)%nc+P=;%ak8we%g~qsI%3h}SkX2BG7>3xd12l)_TvuYs!;THsXWV_ z3f4V0*kwXc++~lVliW-^$xn@LQx}?uc?Spi(g0Ed!9`M(kPe@ zKPoj8CP0N&Q#_X|u|zr7`LMTfc^)lUuI43&l-t@Xss7+7F0XrmGWX{vq}R5n2RU82Fx0mxV=2ozM+AK9W|*So2XgMg_7M3 z*O!@}!K!lXII3ZpX4t+Gj3~!~?oC|_Mol&+I!emo#N_JCdF2L+pnooG1ASHr;Kd+Z zZ_-J&3)=YQ0PsQr6vCc~d`T_JY7&!He@Hy=08~pzqro}l4Kf6GhfEneq3LG|<<{Kc z(D~BzbysviX>+X4Xd*N_x!mo7tqUt?eY(w~VZoHcu&qrNMv$%7bG5E4OahD3P|Q`{ zABu7lU8AXa%@F#Y8>i@)ic#2vX{&0z`)E+}J&|5s>~^fY%rF$k!}DT7i*BhI1?>YN zf?Id~b{tt(Xw!$rfj=boo#DW3S?8a~P%!DjFa$^T+YLtY6?$4gKKTiC{7F6I zcc*fzRz3~~_~{?yVBfeJ?2?t4daiFtFL4{7ZTe3Ua3B7hNy^WmWXw4d&trhdY$Cw} zqJ;a#CLnT{0Gp!AaMBJNG#6X%7;lG!iv%qqQy7M~o0`1PUM`g5LHl~ez08Wd7TkhgPC8uvGA zS^g+53itPXujgTT98tGCFLVeYHsw|922|_TlRn>S=VlkL7oh8XsCi)UGL%}5L zuWkmg#tOC&z^KZ@cgl}M^BRwWHMYM5>)Yz`YK)r`#=c^q_HYQiA}S-q|>k7iIX--<`g4Kd{LLiV`&(*u+;rwbVk5g5hHP zuy_37d?a#H8wh1C71@YA94LC0mG+)dF>bj6_Zgks$6?XpCCnE7KxNRF1IU5gH|&57 zR=@w4o5Awk7NK=Hz<_msej0P0`csy0$ZKH%&_94BELFR6X%B8IG??T=qvI;0b=|5k4MRWh=xJL7!}WEx#QzPXxK;U{v{F`OM;?;^_JO5Oy0kr!*P##f=fhN>d``Jz+b`Z%2(`CZ zN)Ql4>Yi$^gKiuU%3U-KQtre+gFmpmP|jZ( z`A}MxGT^Pf`8r7~wm=p0W;bmWMcDRb!)`FAb3L$26adoB070pztT3M{^=9ks@>DVA zDeTct`nZ%x4d#7U@3pnm54f+nh^%CFSaFIiq^^nEBR4NESr^=ij@`B$6h!%S%d|2< zLs!E&uGhmGa~Ac$M9uws1f#n40H8j~wq$!_X4$7ZpGoEtdaeGc&{HxiDh=Lt1_aa& z_=@Tdc!g<*v%Se3(n}Px_pqccT)M0@=WMRxad+l{atC~VifZIMrMBl!tUM~0Gxpof z!*b$8xNpJAwD1k{A{!ZNv!f5ALW^DpZWL#Q7#Nz3uzOqjx8S*2l3`qvGMF=?CYnKm z?=r$-DhP)<+5|a%8vV$HBjHPy7Xo7~vf(dUBoHDk3l*p-t`$HyVh^Qnw6z4|Ea2DX z6((lZK^|~Zp0D6h$-U{6xgQ$kv$mU4rQx>5H6UW6V*~^(4RoyJIViUmYpP3d{oy-k zuH?U5Zat7~Jve}d(*v#4O5=`-1?SzMK`QdPqgTHW3NkXaA!$^RwcjQT?Hp{QLz4OC zb=sbGVjVH-JhnpyCpNgHU@=Z2aEr}C6FKUcpOIGpTL(`-x%$yik*G5aIEX96?_KX0qM=$vhn&$8VWl;giMuRRB9E*Mou#7|L@Uo|kv-Tu zGr5e-U~<0mwqNj|{mx|E?A60MKhr@-p=acsjeOdhZbYP&N;f*c%!W5yPMy7IxxU}K zlFK$ZE%wg!(fGbz2Q_=&uFB&qVXgalac*E++Z%AEv=vb%kvI++ zGEbj=<8+eYrHT+WoJEo|OAYdtZ8B}atX3Q{s~OsN10pfz%AF-*jvV<5N%NNAD}y97 zmlZqBgG>%+{|z<)O{pux)VYP)ya;G+Ey4;E&|IaU_cD^{*?pf+CMMj%iWYbl8|Wkj zWdv@e!uZCq#h23H2nqZ0 zi@=z0WNI@9RG0WZph8aL(aZsYLl62^#{TN0%NXDxE}iE?be(;8c$$+B1Zaw$dqsK@HsgiWR7|u6 zCrP%=+rlxn(0O~{y}FbT`pHIdl3)4@B*Ckh3tXM?2f{PTy_)gE!PS{3_~*DkGa1~U znGEjFOa}L7CWHGklfnI&$zaAVxo6){R0wy&QGvK&;~r%0R&tFt7d>VBs%5jN6?v=* zU{Ilp!vVp}Uv|94bHTht>%-zz;;{mBCpCFFgb0OqN_$4*ffDpni1uakA zP?*g+A5xpE7z14a>l%k_r1Ewde>%b>%G88e4kmJ*X+UTO(R}gimRMWZX}QqX6k)xQ z5}!0Vm57c0xowrkg=YEE&Lr(XdL21LtR?U|W6Xh~U8R{V(3km{F-KD{Z}%-0{8_%q z5=evNpVW2cP16o6%G_sy5MgSw;DS>&$D6lG*~Z%w+J<@KUAOp{;mNH;lAc_dikVjC zwzkWmcooXKuP^^HzF&_=L{u(9x)WCRX;Ptcd5PS+YxO$h8xgl4L0S2d#qkM5Igd6CO z#da{|$h41=IUO4MS+~>zM?bZ3EyJW~A@my{Lf=q#&Hdg9fGX-k!RLA41fby#*gFj6 zR$--i{RXVO-?8$3uyS3Fm7=MMI^zzNLdXI?Z-tuuEnQ2W#jHS01pTbr-&A+Ud5m8) zF%T7E85C8A1q1zToavVjnCE`m8aCIiN{)mG6CS+11{^hO=C_2E=B>YB^U%(r&6P3m63iUR?ZLuO z!^|!hLPn6&r(>+SRv@a_KzPs2mUU70MVr0r2$A@ra zHbYcR-f7!T!?;4v!p7ZWwh|ILgz+dGK4W>uxA@HWgz#CeOv;c(#)z`b&4d~<4r)Ja zfmqJyLpev;PD^Z1ZOFc%XJ1zmla#2+NAgb+9SJBH`uR6CVt4ZxIx@T&O0t(R%hG(* zgcUYwSGkZ(Vr!x9ylr94t~FY|WMZ_Qw_zC81!ROZm3B;sWE8n9-jXQthypJuR~S-q zwQat5%*$y9zUVhFgNRq!!0&Xo}h>@n>=K^j}M`+xUg|!r;(3R z7%|`c!nnaSG;TI~M1p)cjC4H6K@m16cp*3E(M*ekaN35993e6Yx`o4~$bk;9pXV#5 zT@UE00A0-O_X=vpl8osXW&3>rOCnx0yobnQ+dy(dZ*@sOL+?c{98BF-f!kptP=?#- zvjJofD+LW}WvI@@a0t22?j8_+T&1w{2#o?TMmYh34+G8(j^G<%oWs-@5L~+*L2%Qj z6iT{-Xm)}tx149CnM8|}=~XQA!E|DjZ+)k;M%tnL3%W7@Pw8p^%IdQVVmU>z^0Q#>PK*3Ee3^z3Z zKo$(O9aHIJq87+4dWc_&Jp{-a3_jr@Ft>*YG29}-Kw5~EX(PZT5BBQ>lKd!>J<(N& z&`(GqfRc%k3#IX76e;Ix7T|A;;Zpd+)BxryN3+#SL6YfTgF+?K&16GoUigZ(R8mBo zL@q=PWYI+=8wG9nDpjDyJBU%DZowQ0rsP{r@{^2`~OnO?_|$KppxI!qyLJ;#8!_N@H2XbFcN0W zuW$v`6Ra?WB?t6EA-Amc3(a;(t`ZgxOe8Ppp)7xIp<&;p3yrnRD#JM^WWg{eyIjDQ ze+t;rk5N;!VLKX-okYmqYOzg;Lt%eMhqe!mIjIwo5O7ZNONaT;mqVIkHe0$66(X3P zi1JGcV?>!pd9GJ1m(#TVT+FY0fzS;?TKFz2i&B_r-w_RTA>tBii(#D;M{NX#aAERd+J*xMIL-lI2BGv|Hd&vt zthUna%i3p3_LxHWXfx@Hx}0EDD7wYZ7x5mLJbl#bYjXmLMM^9_g3bnWd^t!Z#xo1~ zPaE?v))}v~*!=3+gc&E@2g5>JhTt(2-aAZ!o!l&Hcqp*<)lDSqp8dGD<@V*~Upf?n z1%xjr{Y#5l7+5&Xz(?g)-7yQGQN-lm8o>nra zBr(nKDKq7j&0!YIu=H-GWEf3;Paof_v;@XL80nUSPL=SOoM_f>|ovmH%QO(R6*Az)s@px?nV(jHWTJ~rwJb$$g!1t z+t^4x0%6=>cbrefvd6FQBIh={4`fqpV1%EMO^u1>I{Q5SYQKbtbm9xyhb4SK)7s-= z7d{cWz0tD|EljXW+Na(+Bf_Wn-B31Ps9tFABhoYkj7Za;w@A|vB$HT-93zce<6McE2VF{`AM<7C zv}KciY&wWurg9;hmhaZ-u+V&v8kqb8+7nD%YgS_!;N~dSt$@Y!{<@yUB;Tu5z35AU z7(t0u6Lh!mCi?%?S0AVW`x8-L?H(MmLW9MUK8x;h8x2-IEo6TRjQ6mX4ju?fQv^{T zYee6~VD%YPE4GNc-0Ll+spc+d?}0gxAI&|<#6ZcH;c;Mi&XQoC?jxYcUv`dWf!Q(d zzE?;s!cZW!x8!YsW+N}6%DtwNHEk6S3nQN9v4cd*4PU~=1_`@%rMDmzs2QR59D4%82QWIQiO8!7Cd5i=A z_Q^yDPPyFgUc~{vL9B8OzcIF4K1%)v3L1p6`aA&TRwl#33$pNj%#gwJ+cD9UdYeU}smIG0<6ni9;s8G`7KgS# zP8C|7qC$hDpcMR>bb=tbh1L1$8k(x<-;MC)iJ{_bUa2Y#_m(>uO1#TVZ#H7#p)n=WXzS18NK(fxDM zAtJAVg zR4=Gu<;5B6Tqj!$YI)Q!-Y6eq;$RnbkcgZM40giNYE!@L848*v)krKYQaX(%KvJ%_S z{2%G+*OdGxN`6C$DXji`UHt-E%g*|^s$A$}`Jy{o|`=D)>7yxcm6yn{$XJt5= zH^qyb*5u=<51iGzl>X1TckPtkRMR33DJ0hbuPEzlfU!BI{*EB3f}>mb2?`eAKsNNs z^^x5BOgBaxXojj<{d=leMH*6y_vv?%U;1s58^0N?+!s*OG4c}D7W9*5cDc~L2W=yk zQ?!lGn6?qKfklJTId%|Gu+yP@6=BkFn1_31tud)Bi%klGXSN^cDq{intE^ykfR~PN zC}DehuGIVqRdepzZ&19=7e1svWaJZ(<#pNT1hVyi7kHK#%7u+)C2*zm|H-Y*nftfNwEPUez<1m)5RJP#l9s-Ztw4F$YRQlPz4QF?t`;!MzE1mNXp1fsH@ix@8ixztN}u9bogKK zaZ&*qMLX>3LsN$2Ur@r|RYNv%A<_ux8#1Hp)T|D_sq@(Wkvt&7u=3_YB#+9ZYmi5F#Q4DsRXD7zFVN zx27fYcolkUC`r(Y@Yc|cV}-)4tsE|5N+ak4fhs76h!q~Ff-tcjsDjNN9;kwd6=Tu+ zR87Q+O?E-VigCLjV#Q{=AYz3Fsvu&82dW@q#Ws6I#0n2oLBt9VR6)cF4^%4KLG3}*;C}lfympyjz{u!Y!-px``9zB=uT!N2-`KUqAUnbl;*{0ks*$= z8*rr*D1oyzI>eBj$kf8;@wH#9UBUR3ZPu3BaUgP7C92(DY0Z)U~|W2{u+R`&*uAAUiBClp}1A;&kkz#ooWMJ=A|8GR0=_1^-A^R(KsgjJ6XTN1OLw- zdppu9ogj{qop;t>U-`hN+f}Bry`XAq4y(h2-xN_XtVu<#@IJJF*jruppV(n7Sq!|; z=(@cNI;UH`oA8z1bM7~9(zah}&1_FY6vI{VI34s3isK;|-#HIo65{-MqeWaUT`+Pt z@#duimoB|^*lsROe(KH7O}_Q8h1&Hik($q4ntUXeJoN2J&6Q7*zz|_n9>SYt#(NfW zT8qO>{(wQj+t{9_btM;&pZsj&QxuTnlcE-#%m`h!SIG>++1@7*t`zdAHhlLNF^@qAy(; zw?Pe4IhK5Ao@T;GX{MH%GfZI{`Q>7ov~2Sr5#p6gM7Dg;>J?v+eR?$`?Y2FYnn1Zs z6q6iwOcx|>{R zwZvBZi3n@J+O7Vs)jOo^SEgNcu}|3IE5sMclC0OQ)>5`$G#SRje)~dG5}#{5Y~{@w zEu3dBT+Akj>I&CdB#8j|K|%vB%z;b}fNMdD9ekG&KO@vm^d1p*#Jp34^PQ~VB@gc4 zN=^y-f6T6SIx3U7Sa-g}O^<)sauX3~o$zDGcE=hK(rqTiBlUwv?>n+#Lop>O-&7dy zu+Qf;0n?ou759dX%m@SlGGedgz1rNaiuEe~?jy-Mum5`%c#2ZXsc3R93QIjWt+q({ zijS)z=49CGp%C=7t`>*Nchbn^@hp(8kjiUQ+mvBdgG6LkvEn{rtIzV#MW$f2ct`Y` zH#xD+G`$&VE)nBtMUQUhTL&H^GRWhPIbQ;|a;*u|5rg-9X`_FQfd``Fg?vy|YuN%!ev6OWZV>NBP(|kpEXhn$`_{mG( zuPd8x|Cp=Z-6VY&9H5f?@4P6fx1Q8zn}kZ+NY>Im&6R9ImSmde)JD?K3(H>gQ^iOf#ctaBUYPECYUejAHJ5DmWB!nq=e%Sr1pT zWQ$-SByiqR07*1y4M0lnqQG;4$VLi?Mn+JJ-|z4k#2% zUa@VyD8@6mzakL#JXD2Or+HKIi5xraxys~#l-~+9>*1Jj8MGDRIyAWrtkOAMUuv(OhCdJ(qb-!G}0n=SRuWXBU?dDBh3U3Um->h8Q z5%JHNsIRUP<i7Dn}_K3bKd8-^0wOP<9=+wDo{P1 zgUY_TBS1Oi4iKo^&TtIyvaW{S=S{dZxIzIR1!!Kx9fB^4q~Lhd2fqxfpL-mnF=&E7 zx_r>B8PD}$$Dy|hzHDo#bQ2RTF9QpY=<}sZmmatEpdX?me)HftS%3~4Ob-^WZs@*mh;vr|m#Nx1r2YP!cPVQ0j*%Y4v9Ivv#LlRWuNm@8*1KE4XXR1WDGD zC-ed2J8bir(v^v_59n$^$yp`pRLNs^lSse17+j?kAHvk1+F_LXR#0ozWqoV~{*X%G zcc&7CmQ{Ti|E4(8vnpw)>@J%(=;R)q;j??aPVc#w1>)Aqj-h+BSgh9nao>BnHQdaq2rdYIPaUvPI1ELf zyLL0;-vv>9%7>pJ?ZZ!IY-N)EB*QetIT7^^^fo{0I~Q}I=$ZIgig22^nkO7vP^Q7> zb4fWra-yHohjTG}!9Q7!Xd=;E@xn{utxC(P4*|ln@MtmXvix*~tUZ~Sf7oMa1>IXt zStM>}x<9Sh6?{oY@DkFVJNbuW3hIwT=C9;|Rgh#IAeo)2YaiONw#4wV|4myDbHh(r z!vAtRi^hf#Ec>cf>@d&NEWRI$#&8PCu_9c(IWMhRMA%tJre)wPTn5nfFTO%K=CmPj z+~*EFeqh>NrC%gpvYp(?9!*V=mZB#;Er)N@-sDUA(nRrZ=;|*i(bP2uj$13(G;aoF z&E2YdjVst>FCYK3ru805j>ed-80JTqwEKqlGI96enX$>ItkxHjN^Q!%fI`Iua#Y?3 zpy<1$yErstV(7jh-}QssxA&6Cga}!ZS2tQZDvQs%W0r1;Zv3`MdgIZ)Q*Zsp6=fff z-sOPW@nCR$mz_4Y*@hi`=@;x8XLt!);NzOWwpA1Jl@MBLQQ! zX12q5yT2J-OY9-00N*+E$EC0S$b2|98!giJqtQ9daimLE4SIQeiUSl5OHW7;?A~1o zMdp=1Aadkt~{g+lWrvV2c zyAKqK#Wfy1OP~2z@woiv0>Qev`-ZOVHR!S?V3#oS+rS&>68A5_*RQGUNf76psD?UJX3Il5@9nQD2`}E53-aMssAmgBKGGa zrb}X5E=KCiPC>y%q(=lAk+hjSn1BL47Izs1&MYaCf6gy+Fcs7Z!-W(|N5UEAip$n+ za2&c6?jnN-hAx_OndqJ6B4$=7OMR;mC(sUi=QMgiVmqe3`(9C&04wY{obXPF%Ys6?cFRx9u=dLYbJ(ubz3v$Rh_2Tli5xO<5k-iaV|>N7M7dgUd#0GjTPh~x`b)V z3b3op36r|jS~gq-5!s~sZ0;r3^#NUuu3GvwT9_zWsINr~-^-V{aT^7E2Wh9TN53lG z4QnOdqC|4+GW;DOP|LP4?#)GNcV&EN55o7JVRp{(M7A6$pyh(c@tgK!j00G86lZvY z)512m{}}JquOH!h4{yfVhRjzuGi*`0L-)Jeo+KPCc}y<&8r?($xC8@&y^;F%-t`t1 z#&n{{&f44SPw+=E<)|2;KVe6#KQZ^;E366#TjWpRA)W`midz=tz<4n017q0E*CVk? zG*O$x4S99p+Sg%~Fg#pgzA7ITGR}NfOxR1oF9G4+#EtL-8O5K#(>xC<9T;(iA#rdLU_r*gFj%ct}5eCBud5&b+zsSvwp5!Jy}R9NM~SvQ>i$XvG?pR^ zMsKo)fVIQ;wdY5e`GX4c#M>!x?C$8@ozcym8n*U*ecz`4SA{|-ZMQ4Y)X80n*csq* zdWdU{4}~5Y(w1rO{AhIT({fEAGp=gr&+w#>E^SoK-3VOl3+F2?kQ-DKpU(~H3AcP@ z5*QLZU_yvNrs2bz)C0VTgjVva;2 zOYUB3zPvB%xrj!lDrj;&p~>cyPSrxx+~ss$k42-Bhg7$S&xh_}SD6>>TP65~1?|H# zx;m~zU0yTg3gk-SRUhfvw;sx+%A?`!L)%dn@OOASRK{t~MnXUOvprgm7?l#e8Xwv` zbXR42#EG+_LPnXwo?zTSc|WYYdxl4b?Py`k<3|h^&Y|X2F6Oh2pV6&^3(3nyyhW>J}&YD5UVT&SovHwuRw0s zMb|Dz*FLU)7pP*jN1(yT*2jO5=J<^IhOSJ%73(S`DH&i4c$Rssz|%4qzlshv&9s^Y zjCB-F-x2h*oxygd6O~Zz?5Mzo4WFbflLTe(yT$-YN#*Xt5GeoxAYo*XTL_llIyD41 zU>>U_XdfW*Ygi}bOq)Nk>$B-5eaHUO-kTA>)4K%`J$t8E8K1Xz{0HjJ1Se6PG0h4? zy&}Gl^NlPb?>5&^Ed6vvcn&X-d+7S8yC^FOY4JJ4)RbW1^U+&;4#n2zv>ZYV5ts3X z1^W34gaev=<6)8HQ*Rv*TV|wvHHbY20x80vP1hD}Wv$ptn$FXv|1GW7%(z(G*F{r>=lad?GvVAZS!rgqr9Cp6 znW9l$C-_rl!c{dT@(KW>L;|Y$C7TGE8wLPAVjm{%TErfIjpSC+y*1L5% zsu^Mu4E>jckZMf?8MNcXlfT1_t%1;>g^1xL71P&wn>;Gw!ViU&62T*ROwYs`2l{30 z=d4E@?2;YiDa8`rNN!ei9NUsMjeeQ4QmE55A7ncI%MHB2HJJg6u6 zSehJDTQq-@CsiXCYf@a7-Q&I}K%G!Y7E5$1>G;qV}nj1yX91p5Bq7#5!(`EGd9g#OTU-9ri)VDVfD94Lq#wwqux9!hDwf#PQ69084+d}Gtnq^ z`iOMQJoG2eih*P|d*z;IY*;VN1C9x(>^!$8mOZ<%EfN%MTNT zg&xhr%VuZUqk@rR91(GYFO-7kHnS6hG=?93u!FCt`K6e81YsjG7dKN`6x$z+mO`D- zYuJLKe$4-SVhyA(NSVw}>GW&+3oV z7Kw-%@3AxRLi;Xs0xYf33H-EVAjvIiA6T*agLN1XPg{A&_lP3kf{9Sw)7B@$A0!j= z-$Wd7FG{}P-v!#}QF_w<39541&k+x!8_vedh;pVCwpd#VDH!`n(9-3G8C{|VuR!SC zqgdx4S$2>wM7`yB9|&-9e=IXNTJSF~&?$9JGlKnZsYa|}JHjF(+xTTnanQ3n&2cP9 z;OwZFbqpUWYY!U4RbG~zf&m3vUTVnMBq*b~@=hYB$*MfTXvsvtLp?9d zEsF9KmH!Gl%t3o-2EV-6aKe*38ben(#7T8SbOA2al?tzO#wIRulVLl9g*i_-PMech z4rjA77&-we8sNE3Fmh_VTJ_6$S?;DIo%~{pMzf4hCtqdxpwpo;>nhHKa|CPDF^bLj z(5ifSS)Tt;teqKjLnm#AA`I*T1jl-+u0p;G%~}l}+cS`T|F6%$YgXKnsaGht%G`Tf z(`;^S_S4)PABatr>#5_3{29vc*=ASg!ksUfOEP%_vq*-3pi+JsxoN-4-pVCogH-j` zo1jLidD4^TgxjF5`kgMz!r@GM72!@LqYp?@$5FVO)8T0uRb(3QVvfM$bsp1mhYqGV zw%FzfzqHK}y0qfN8_c4RafO5Fw2S#Kiq=ckza@`;wojB|^$qFQevLulULyRbn=2k& z{3lhK$ouzH^pa|R04jed_Hhr-qq}!h_6_Z=>>nPk#4wR{h}wJcj>ZEt6R;)np5&MQ z90?SbrJ+dwOX3W=MrwXgRN1e)Pe=-f*&u=wO;+sQq%<<`CDUFCc&?3}Nvw87I3sf>Xz1J2InM8nxdjf3af^Kg&sV@Mij4`rV~P8K@td8<)m zS4WL`C<&{gbXvX89zX>*~r@_+_r8(0n3$& zf!wO1%UyYQ7gaI$+IeB6iD>aXbe~rl46jz9NKohuj<&Z^&v~;=SopU|}d%Rajki{v_leKuV51f*&M0&$nf#)z6ofejnXQ+ zeB#C#skVzIS-g4lPe)5A(m1`n_Yj=7leibA3lsK^;AS)T$s4>Iu&@R}4Y~HYX%$#=e zUYs>-FrQ?F94Ea_JK*X%NEO?mZ1{#FOmQT!p=r(hudvvMerASdX4sihNwyC2Lwn=d zZs$51A5s?>6t&dEm<&9bq+{M?66iRmyCmJz7dmF+*8B)1(GsG0UM~T38b3szk~WQW zmRv@Kbd+D5>Pc2%hL{K>oG^Zbi2S%aq&l7>2ShEs>gRkJ7z~5m(jL0+r`uI?D1uHpMIcR9Yl#tjRFpwQ)n zm1iCIpENCDw(NY5MF{pMXF8_Qh1W<&K#CGoGr&EL_)d}4`gvSSqF@fDqlFP?Sd%}qqTECi!;l9x(o(Mu_L!)~L zGQDiLbc@5-4i}ywr9)pNBdI;K3?w*ddMUPjv*^{a=`pc-aal<@gno`K6ot?ybj#be z8W9J;BFR`s-A@#Vhz!?{fO%&v3@WMzDxxUHrD$2KXuu8-ao|=++T*GItuW!hCR1 z8L}R+ZKze3I`aM5uB7a7&LtS2ZQ6Kjc^CxK4M^9Qf1B;&PH<$1lK-1m77`)(W3EcG z{~_J-yF+GY{u`x+eo8>UTjjvxwpVsU;(ViUy}KlV(gIv5oa>$Be~VBG1#q|l(##s5 zQL^9*&Yks*VZ|C+mRZzkUc1ieeEPR65Pd@pMk1R7Q^a|R&|cq0`*!k+f@G5D#7)u)mPOiLvWcLqb)_( zay9xXszOh$uI=}(*`acD2BM2t4-EzC{Hv&s06`;x$n9j--pma}yMh_fDtq=|`sTs( zt>+D#M`D+pWPVM4gv2a0)v5(Ks8-Dzx6ovozO1{}t|won{F%JZG9g5}|D^l;hwAvt znpt1ss_1I+X+2vj`TAkX`s`p&rzaj%WqBkKk63ktdJHM>30rC{G?Tkk@lHLJHc=7z z^25&AAz|a=0<-R9KvK|g?VT`7l{CM}Mis%)2$Ai@b3g36bSr0U^KMQa)|FY_AJvta zx}Me5b4os?M9bdfw33rb6p+N|a`KX{G%J&{y3(*F>Txl-?dn5sBTt4Y=ASaZ?C7GA zeIxi_g}+1NTZq^)Ji49WRGUUm5vZy;nh>UH^XSL9ALsAE?e~t}HM*5M1+`jl@Oh6?j+`O}FO1j_@w$sN!=Pa#1PVsATy*!+7TGAa9E%_X&RK8zkPAhJ|>+ z3%cpMDcHSC>=SHY_Cybd>sBKYbcGT5^WTOdJwTn}hhtR*jZ86I{w=kJJ~7$~~ccrkL?PPW^)Hb78Mpcp9V% zf9ku!)Pj#%27w+p3GSnmV$x8ln@h{Hta1u9Tj+4s*W(5qb3v9uOnopB$|0!Bz&spI zs|&^%wqe@oT#*-QXfzjUD=P1Jrri$XWs;)gF$S&nP75KoJa()bt}0#((k1_-*>L%; z7Gn`-wbIfKMtf~qkJ{WE8s=1c8fpKcHmM&m+skZ|6xlb2b7+&Yqpch?7U`nB_Yqo}2(kctV(q+%J_Yq8|<#k$^U z#LKH|$NOwyny)NMi$**7Aj4YUpCsL^M|V{ap=l2lA;N2s-JE#{uI5H;ZoAp{$s^=) zzLQ~TN!=o+iDX8}8iR;f{cnqMj!}KIaU1GRc@X8JP1!!$BB_k_dnSVyi^V@8!;IGD z8LG1O>!V{*Z)BZ0ZjhG5XRro2U01<3B5PS0`LOrXHH^s01l`FZC&{bbY&O6*tl~?G zzJB_5tFKM{=jsvLW6&t{apxI&Z={D6yxwhb@Ca5HJ3X|vAY!t@npXNasYI5g7*%*b z(+07!%sUYPgK9-^%0*mOq{a=f84(MS=L;C9x?yQsLOe*42yI?mUXL`Br|TV zdekwuq%z_w8zl-3g|F^i1xuyt&G--7TH9BOn zxh+$!eL`NWtV45RMETGyZt;VAhyDN1H0-|i5D{OL4ZEw!_gGzVsbn4W*t%B^nzIqF_f!>rOcF8id&f>m~^wv>jkUn5YMty%v7d`HeP_SNW5@T zz{$g(X_U~Dm&g9Y;zmE#9;$T|IyLv*!)l70=E*)>vDhxmu};4F$7VCvBEoDXhfvVgau!(GIq?PBGQ- z?`Vo??GtU`IK_yc{Ld=)4CND!3_juhZ7#&`-5vf8C!A&&^UNHSnRdZmH4?4tw&y7A zZGElDhmP_>`vJTp*xfLySRB#`{dwtxG)EB|Mxrm^$){b0QZq0eJuaymyUNOWSxCAV zomNj)A&5%=rMVVtG6hmjFwAFPGXy;>PGF>2M=R&_ceyS%UA7xrAi*5xgApBm)sq{7 zA-CovxLcDaBXv`p$%>b8E@-yPQ-0bvA=wHu$DibaMthmQnz+ztHkaz1r4e=fg2E2vNyo7CoQ4dp~EjrBGtXkZvn6r!8U}m`C;mem77gvfrK3*@WShXi? z5KotpHZ`X1^lB5lYR`uC>``SOB|(+>3Jt;K)y(XB^zs!o`(9mrOo^bA99D8f$uT9L zRARk-R#&!*<9S`3Q1Y@8SrL+pO5Ri=_v7RRB~>LgC9IFV71M~x)Adom>-6WHX;Rj-Pv#&mOs1LX)G@^pWjba>1C3Uq4VGt+bW_B zXh`jUZ800##RjqQq3y%tI}m0!jUFW4_T5}bow0!!-0QgCGP+5d$_XL3ZS;ZBZNr>x izjCY(2nm!--(uX4gJ4|SNUfE literal 0 HcmV?d00001 diff --git a/BruteXSS/mechanize/_debug.py b/BruteXSS/mechanize/_debug.py new file mode 100644 index 0000000..8243969 --- /dev/null +++ b/BruteXSS/mechanize/_debug.py @@ -0,0 +1,28 @@ +import logging + +from _response import response_seek_wrapper +from _urllib2_fork import BaseHandler + + +class HTTPResponseDebugProcessor(BaseHandler): + handler_order = 900 # before redirections, after everything else + + def http_response(self, request, response): + if not hasattr(response, "seek"): + response = response_seek_wrapper(response) + info = logging.getLogger("mechanize.http_responses").info + try: + info(response.read()) + finally: + response.seek(0) + info("*****************************************************") + return response + + https_response = http_response + +class HTTPRedirectDebugProcessor(BaseHandler): + def http_request(self, request): + if hasattr(request, "redirect_dict"): + info = logging.getLogger("mechanize.http_redirects").info + info("redirecting to %s", request.get_full_url()) + return request diff --git a/BruteXSS/mechanize/_debug.pyc b/BruteXSS/mechanize/_debug.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95fb94db82341e9f2226dd328f9eb15a899ab00a GIT binary patch literal 1448 zcmbVLU279T6uq;Pr1nc=sZ|7}FT&zO9|RFpMA|;|K}(qw)DRf9yOVU&&2F5TAeQzi z{RzJKJN*HA?rahjgi<%Xw=;L{+>dk5$RFz~+4pbnQ<{Ae|KCDf4=Tf-pn|AduOli1 zjUwtsB=}5HA!*d0ZUg%UnFjR*Wlg#wni75LHYsb(txF4Qi-UIJTS$=q#CI4AEd4zw zYuvahozc#iGyTOTz@hXmY6K4*RGAoSr53xTuK0Z>jEBoeTGB~nTYMiU=q8|>u z3vrL23_3x9&=J>wGP{zDgoS{@e*!&R;P(eytfL=+E7%3L0%60G7JOb-(d^9xv0GC zS7BjIk|{>7+jk}h&%Xg2q`wB+hRoo&_Xt?%VcD<_UxR~S~;m#LUD+?#7bwL~jgUj2iaS#C}0uVdz2TwO5pDv%RwWa6eAo?&L$gSb~v zOhm9)5Qu1BFcF8~Wz9UvbOe>aFZh6nQ!o#&6%56`6UKEI0I{a7PG@=Q|IM`8!8tB) z?v8nF`)d1%3$QC5zhc%Z1|W>tGq~W5?oWzBPizr$^@`b~crZ8T0tcLNAyA8-g=2a# z6*k0{s59`G6%SQyLfw3p+~$3VW7JzBypYgacnJ%V)DMM|adSm}@=W(Dd!~3`fe(rU Y-8g+<@avkhZ#Y)ya4l+!w%ls}0v#43a{vGU literal 0 HcmV?d00001 diff --git a/BruteXSS/mechanize/_firefox3cookiejar.py b/BruteXSS/mechanize/_firefox3cookiejar.py new file mode 100644 index 0000000..a64d70f --- /dev/null +++ b/BruteXSS/mechanize/_firefox3cookiejar.py @@ -0,0 +1,248 @@ +"""Firefox 3 "cookies.sqlite" cookie persistence. + +Copyright 2008 John J Lee + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +import logging +import time + +from _clientcookie import CookieJar, Cookie, MappingIterator +from _util import isstringlike, experimental +debug = logging.getLogger("mechanize.cookies").debug + + +class Firefox3CookieJar(CookieJar): + + """Firefox 3 cookie jar. + + The cookies are stored in Firefox 3's "cookies.sqlite" format. + + Constructor arguments: + + filename: filename of cookies.sqlite (typically found at the top level + of a firefox profile directory) + autoconnect: as a convenience, connect to the SQLite cookies database at + Firefox3CookieJar construction time (default True) + policy: an object satisfying the mechanize.CookiePolicy interface + + Note that this is NOT a FileCookieJar, and there are no .load(), + .save() or .restore() methods. The database is in sync with the + cookiejar object's state after each public method call. + + Following Firefox's own behaviour, session cookies are never saved to + the database. + + The file is created, and an sqlite database written to it, if it does + not already exist. The moz_cookies database table is created if it does + not already exist. + """ + + # XXX + # handle DatabaseError exceptions + # add a FileCookieJar (explicit .save() / .revert() / .load() methods) + + def __init__(self, filename, autoconnect=True, policy=None): + experimental("Firefox3CookieJar is experimental code") + CookieJar.__init__(self, policy) + if filename is not None and not isstringlike(filename): + raise ValueError("filename must be string-like") + self.filename = filename + self._conn = None + if autoconnect: + self.connect() + + def connect(self): + import sqlite3 # not available in Python 2.4 stdlib + self._conn = sqlite3.connect(self.filename) + self._conn.isolation_level = "DEFERRED" + self._create_table_if_necessary() + + def close(self): + self._conn.close() + + def _transaction(self, func): + try: + cur = self._conn.cursor() + try: + result = func(cur) + finally: + cur.close() + except: + self._conn.rollback() + raise + else: + self._conn.commit() + return result + + def _execute(self, query, params=()): + return self._transaction(lambda cur: cur.execute(query, params)) + + def _query(self, query, params=()): + # XXX should we bother with a transaction? + cur = self._conn.cursor() + try: + cur.execute(query, params) + return cur.fetchall() + finally: + cur.close() + + def _create_table_if_necessary(self): + self._execute("""\ +CREATE TABLE IF NOT EXISTS moz_cookies (id INTEGER PRIMARY KEY, name TEXT, + value TEXT, host TEXT, path TEXT,expiry INTEGER, + lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)""") + + def _cookie_from_row(self, row): + (pk, name, value, domain, path, expires, + last_accessed, secure, http_only) = row + + version = 0 + domain = domain.encode("ascii", "ignore") + path = path.encode("ascii", "ignore") + name = name.encode("ascii", "ignore") + value = value.encode("ascii", "ignore") + secure = bool(secure) + + # last_accessed isn't a cookie attribute, so isn't added to rest + rest = {} + if http_only: + rest["HttpOnly"] = None + + if name == "": + name = value + value = None + + initial_dot = domain.startswith(".") + domain_specified = initial_dot + + discard = False + if expires == "": + expires = None + discard = True + + return Cookie(version, name, value, + None, False, + domain, domain_specified, initial_dot, + path, False, + secure, + expires, + discard, + None, + None, + rest) + + def clear(self, domain=None, path=None, name=None): + CookieJar.clear(self, domain, path, name) + where_parts = [] + sql_params = [] + if domain is not None: + where_parts.append("host = ?") + sql_params.append(domain) + if path is not None: + where_parts.append("path = ?") + sql_params.append(path) + if name is not None: + where_parts.append("name = ?") + sql_params.append(name) + where = " AND ".join(where_parts) + if where: + where = " WHERE " + where + def clear(cur): + cur.execute("DELETE FROM moz_cookies%s" % where, + tuple(sql_params)) + self._transaction(clear) + + def _row_from_cookie(self, cookie, cur): + expires = cookie.expires + if cookie.discard: + expires = "" + + domain = unicode(cookie.domain) + path = unicode(cookie.path) + name = unicode(cookie.name) + value = unicode(cookie.value) + secure = bool(int(cookie.secure)) + + if value is None: + value = name + name = "" + + last_accessed = int(time.time()) + http_only = cookie.has_nonstandard_attr("HttpOnly") + + query = cur.execute("""SELECT MAX(id) + 1 from moz_cookies""") + pk = query.fetchone()[0] + if pk is None: + pk = 1 + + return (pk, name, value, domain, path, expires, + last_accessed, secure, http_only) + + def set_cookie(self, cookie): + if cookie.discard: + CookieJar.set_cookie(self, cookie) + return + + def set_cookie(cur): + # XXX + # is this RFC 2965-correct? + # could this do an UPDATE instead? + row = self._row_from_cookie(cookie, cur) + name, unused, domain, path = row[1:5] + cur.execute("""\ +DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""", + (domain, path, name)) + cur.execute("""\ +INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) +""", row) + self._transaction(set_cookie) + + def __iter__(self): + # session (non-persistent) cookies + for cookie in MappingIterator(self._cookies): + yield cookie + # persistent cookies + for row in self._query("""\ +SELECT * FROM moz_cookies ORDER BY name, path, host"""): + yield self._cookie_from_row(row) + + def _cookies_for_request(self, request): + session_cookies = CookieJar._cookies_for_request(self, request) + def get_cookies(cur): + query = cur.execute("SELECT host from moz_cookies") + domains = [row[0] for row in query.fetchall()] + cookies = [] + for domain in domains: + cookies += self._persistent_cookies_for_domain(domain, + request, cur) + return cookies + persistent_coookies = self._transaction(get_cookies) + return session_cookies + persistent_coookies + + def _persistent_cookies_for_domain(self, domain, request, cur): + cookies = [] + if not self._policy.domain_return_ok(domain, request): + return [] + debug("Checking %s for cookies to return", domain) + query = cur.execute("""\ +SELECT * from moz_cookies WHERE host = ? ORDER BY path""", + (domain,)) + cookies = [self._cookie_from_row(row) for row in query.fetchall()] + last_path = None + r = [] + for cookie in cookies: + if (cookie.path != last_path and + not self._policy.path_return_ok(cookie.path, request)): + last_path = cookie.path + continue + if not self._policy.return_ok(cookie, request): + debug(" not returning cookie") + continue + debug(" it's a match") + r.append(cookie) + return r diff --git a/BruteXSS/mechanize/_firefox3cookiejar.pyc b/BruteXSS/mechanize/_firefox3cookiejar.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c27d68f350b925dda97247bf4ef3ed5a7e6ea6cd GIT binary patch literal 8683 zcmbtZ&vP8db?(_+F0dc~La-@W7HzfU$XePk^n*_9qGU-Vu(T+XOL`U-Nfha+!OSc$ z!0gO&x|al0z#*n8qAOQl;*_i6zrg3@;=dr*R1PU0oXTI2V=9$=-|N}Ml9bB@8o)HV zd%9nDzy99$UiZ}hdtomAhhJTfRP|HG|BvvPLnN_Mn@Bws*Hq@IO=MiYrZ#JmudA%C z1`V~@P=ixy^OPDi)n-$9Y}ZiPj2g_U%~>^Qsm+%15>M^UsrZz7gv@ERd0NFyl{8cz zt!LEdN$PV&i_@M`SJE=+7yJ5xemyFRy)-ea=6;siGqD*@4o%^5A`R-POd-E*OElPzq^}#G%U7?{ne-#d@wh+v6C7Ii4%?g z{Sr+~(YFu7GI>uQ6eAslxh|6!vdeU9WYNs(Fpu9VNr?{!uVSpp9OJ;@$Va=?Hmk5f_fv}c}Umz%$2T}wSgvqS51TrAd z{k7ERb+x&m;yHP>sN&O-!P|MsK<0vEAaGGK%gXPympRqP(BLtwDirO8B~+~Ovw_{I z7O#N?ZJBXzUl|AheZ?wIYY6M-jS0#`cIBHW!_I?tC5&l8b@?<_MZ< z#0uQHZ4ZWN6lU3h?iawA4y|Cq7DJsS50Xr#%8nt#I1?F`1!e0PWsZDsUK)iXTSP^k zqwb;(4LYFoAjwl&%nA#9rEMVt^nQ8`#xYo`x;HkkqAOUp>j!gKp_Ni7Hjf*a7kV`-!nl2YMb@xt!UswFJneI}Ok{ge8YFh7 zh|Q`4z+~MVCD-O4k0ypGLfn1NtRXcxKd#+Ojs1HjQ1G7VD6c&=0U6&zsdQJG*) zvC{~2@2GRqLq9B23+CkJr*=iBeR5n}Bu0ei1qd&Li1>)b*`TWd56L;do5L7R<@x~M5}og zgkw7Y zv1F_V-DsS42D!>g+w3nwOp^6|j$}_GnX++l2F~2tc%vjS=aE~#cy|qcE8o65F}Axw zMIIL>cwQYI`1B|5;4w=`lsD%sc`tisycT|&UIeeK=u-H|mc54zeZEqeQb*7{Hcj-8 zt;3E`|L_K~jk|RfVB4?@hyx%ReX1=G?4=ET>f zrpUk~MIHz(+OOk9;9wsJCWEvefZ$+GVR;~~(5EZ(xwPk25GTS_8*Z-h=Do9cL@`tj zA5jc_C)bKU0Cl`vzV5o2$TGOoQC66w%{MZ;=bqSCAmI`o!_84%#DByLADPl!WDeeW zVpc~r_1ThGjBM9YUERXAz+Sk05OVug{*v-MuUZKV5tYACDqmE$tGb5N!A5(shHjf{ za+}0Wj7Fs?N}rRH9WKFtTVb>(SVb^4NUb1S)-LewkMhXYT~}!ZqJ~F#UcE@L6WB7$ zO&Ad^>kw_eibQ#JAiM4@)tD}LGqnXT;=t37c^MUMsb47dg{S%w0RJ6gRu4<2lDm`L zXGgA7jK_6)?5W3S%fQxcK2t$DRIP4!3xS(RI# zC>_n8hbRS^f$YxDAi>a_l>lkUTdYkGL42(7{V^YaE&&}!e*z!#dp7brmo_?jhoH&l1ZaaD*b-5=?G%Zg03i=K)=d zma^U{;HX@77~+5-Sva?}JQ&MzowCr_OA+}JQARZiA_LO{P!UW`)qHI2@J5aZwW>Ss zfb}XC;&SBK=e1a$q59A97$Qb_&AMFbZ zRvn=p#|TdyomPjZ$inbJvBVK*cG!HfrYX3aZc|r>jVIeQ6ylM?7Pk(%XM^ z3}YJ9qZe>?h&9NBAPVgv=WvnTu?!Yl?0$&jk3=P9!4N3XW<=S zF(?2=>4q?w!L!I>cZrc0fu&&<#D%q-n;S1M!z4=kX%b6MoVla0jGq^^BDe~UgML{I z0<0T*qRd4ks`gr~=`B~k^Ytd8sfF4+(iTXgiFYn9DQfGJ--ueFQ{XmIo#u~7uw^dJ za$E^~JZdO!Td;=omtq|lo#FCt1k8Y*Bc8|T3OKm}c=qX55(#)!D_0<@ovdGvWnn;& zSk=lE?*Cfd{|zwAL$RJAQUz(2*V@b#2&9-}<=YcXUKB@}UaFFMksm=XIbAOy@20va%A|#K>~a@aasQff6XV0!s2&EMT`tb6?y&UpROFHggQ|{Z%AlA2Kse z)f$l{lXR>65gUGhi1~YVdmV}LUIeCz@df-Ir!OM@f*p9vb&0!Z>McCN@c)Ah zKnm&xi82Eey&{xe7haSwfx!U?8RYDW0y&_mDhit56sn@5UXUyPW<{p(IVGS74hF_& zRrv*(6@G;7qbXy}QcbwHd?Zi-|8qWaUCF+45-{UN%1s7rjmC>?3`2OCegaLM=>MP@ zpav#@N(@8lc9f1qLN4Ypr}{16kQ>(Z-$3gB6%z9;Bt0N5}9-Us^cG9d#A$BpuotnNRF0sk#F;-*X74JP)#&l;MNz=&5nmeJKtXo8%V zKDcH9R|cWA$CPPlDuq~m|C!ksUDMq6$=UpBf7c!5p+nu)}u-$Twp5t$PL(9tm{ z*m1F&!#`2Xw<$~xw4`45d$ywDW$z5?mH^RKZP^{6>S#TF%tt6Z22;VQeS;no!75k; zHDmfXCN@uqP2Q7k9gzYC(`N>1hsG`3bepmT@Q??Rf=G9u1`ZLuyml9%!c?q78p8{S zcKE5OKJdqw*}XvR6svKRuD~K?Ru=Ln_^l$(G3g4cj!9Pv6ViPTlCE}p9e;yy;Eib* z>Q66S`&p-_+aIpzFaJ9~=LqI1gb0Kjp9xAE_sdG@_a;N9wRW~dp6uZ#~8rvHArav-suW0pL+L~)UEwDIZ0b|3h^PQyAY;p2L9E?ucc6DepT~zk;T|Mq(N6 z1e1?sW4N0DA1n|HGv=N$I-y){j*EunwUK;v<0}>@#wWF9g43_D&uxRJG1T_N2F<^t z=!_7Y{4EI!=Dim@wi0f;gyShJlBY2iNrj_ zGx+ce8Q>tkyoHLM+z@sR31V$0iT3z0;M-Jwq(6`g>f*priMoNSG@ z#c7G13C&5gByqK+E8z@+<6aDFCnf(Ql-dM^1FXbDf;#bjH~He&8rfA2z!WK`^wR&B z0`R!xggEV@-+$sU-=PFp+F7pw?xfqB_uBZ+ko*iBA+P+;;uAm<&kJ7nc)&J|$kBbk zXaHX~xSAlq$MI3d;*1pIA_@ZkyU083kUrf_J_U#`^?7IHoW(M8l5SZft(7**r7XYPRaF#rgVt3%NI!&$hna`bw+ST5w@(fOfK+ ztk~WLg8U9E7~uv{h7YOMA!_KI1)^&R?`3 a*@gF=-ajBz3}d1u;@SDyIUKu|>;DgnoJ^ho literal 0 HcmV?d00001 diff --git a/BruteXSS/mechanize/_form.py b/BruteXSS/mechanize/_form.py new file mode 100644 index 0000000..d45bdfc --- /dev/null +++ b/BruteXSS/mechanize/_form.py @@ -0,0 +1,3280 @@ +"""HTML form handling for web clients. + +HTML form handling for web clients: useful for parsing HTML forms, filling them +in and returning the completed forms to the server. This code developed from a +port of Gisle Aas' Perl module HTML::Form, from the libwww-perl library, but +the interface is not the same. + +The most useful docstring is the one for HTMLForm. + +RFC 1866: HTML 2.0 +RFC 1867: Form-based File Upload in HTML +RFC 2388: Returning Values from Forms: multipart/form-data +HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX) +HTML 4.01 Specification, W3C Recommendation 24 December 1999 + + +Copyright 2002-2007 John J. Lee +Copyright 2005 Gary Poster +Copyright 2005 Zope Corporation +Copyright 1998-2000 Gisle Aas. + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +# TODO: +# Clean up post the merge into mechanize +# * Remove code that was duplicated in ClientForm and mechanize +# * Remove weird import stuff +# * Remove pre-Python 2.4 compatibility cruft +# * Clean up tests +# * Later release: Remove the ClientForm 0.1 backwards-compatibility switch +# Remove parser testing hack +# Clean action URI +# Switch to unicode throughout +# See Wichert Akkerman's 2004-01-22 message to c.l.py. +# Apply recommendations from google code project CURLIES +# Apply recommendations from HTML 5 spec +# Add charset parameter to Content-type headers? How to find value?? +# Functional tests to add: +# Single and multiple file upload +# File upload with missing name (check standards) +# mailto: submission & enctype text/plain?? + +# Replace by_label etc. with moniker / selector concept. Allows, e.g., a +# choice between selection by value / id / label / element contents. Or +# choice between matching labels exactly or by substring. etc. + + +__all__ = ['AmbiguityError', 'CheckboxControl', 'Control', + 'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm', + 'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl', + 'Item', 'ItemCountError', 'ItemNotFoundError', 'Label', + 'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile', + 'ParseFileEx', 'ParseResponse', 'ParseResponseEx','PasswordControl', + 'RadioControl', 'ScalarControl', 'SelectControl', + 'SubmitButtonControl', 'SubmitControl', 'TextControl', + 'TextareaControl', 'XHTMLCompatibleFormParser'] + +import HTMLParser +from cStringIO import StringIO +import inspect +import logging +import random +import re +import sys +import urllib +import urlparse +import warnings + +import _beautifulsoup +import _request + +# from Python itself, for backwards compatibility of raised exceptions +import sgmllib +# bundled copy of sgmllib +import _sgmllib_copy + + +VERSION = "0.2.11" + +CHUNK = 1024 # size of chunks fed to parser, in bytes + +DEFAULT_ENCODING = "latin-1" + +_logger = logging.getLogger("mechanize.forms") +OPTIMIZATION_HACK = True + +def debug(msg, *args, **kwds): + if OPTIMIZATION_HACK: + return + + caller_name = inspect.stack()[1][3] + extended_msg = '%%s %s' % msg + extended_args = (caller_name,)+args + _logger.debug(extended_msg, *extended_args, **kwds) + +def _show_debug_messages(): + global OPTIMIZATION_HACK + OPTIMIZATION_HACK = False + _logger.setLevel(logging.DEBUG) + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(logging.DEBUG) + _logger.addHandler(handler) + + +def deprecation(message, stack_offset=0): + warnings.warn(message, DeprecationWarning, stacklevel=3+stack_offset) + + +class Missing: pass + +_compress_re = re.compile(r"\s+") +def compress_text(text): return _compress_re.sub(" ", text.strip()) + +def normalize_line_endings(text): + return re.sub(r"(?:(? + w = MimeWriter(f) + ...call w.addheader(key, value) 0 or more times... + + followed by either: + + f = w.startbody(content_type) + ...call f.write(data) for body data... + + or: + + w.startmultipartbody(subtype) + for each part: + subwriter = w.nextpart() + ...use the subwriter's methods to create the subpart... + w.lastpart() + + The subwriter is another MimeWriter instance, and should be + treated in the same way as the toplevel MimeWriter. This way, + writing recursive body parts is easy. + + Warning: don't forget to call lastpart()! + + XXX There should be more state so calls made in the wrong order + are detected. + + Some special cases: + + - startbody() just returns the file passed to the constructor; + but don't use this knowledge, as it may be changed. + + - startmultipartbody() actually returns a file as well; + this can be used to write the initial 'if you can read this your + mailer is not MIME-aware' message. + + - If you call flushheaders(), the headers accumulated so far are + written out (and forgotten); this is useful if you don't need a + body part at all, e.g. for a subpart of type message/rfc822 + that's (mis)used to store some header-like information. + + - Passing a keyword argument 'prefix=' to addheader(), + start*body() affects where the header is inserted; 0 means + append at the end, 1 means insert at the start; default is + append for addheader(), but insert for start*body(), which use + it to determine where the Content-type header goes. + + """ + + def __init__(self, fp, http_hdrs=None): + self._http_hdrs = http_hdrs + self._fp = fp + self._headers = [] + self._boundary = [] + self._first_part = True + + def addheader(self, key, value, prefix=0, + add_to_http_hdrs=0): + """ + prefix is ignored if add_to_http_hdrs is true. + """ + lines = value.split("\r\n") + while lines and not lines[-1]: del lines[-1] + while lines and not lines[0]: del lines[0] + if add_to_http_hdrs: + value = "".join(lines) + # 2.2 urllib2 doesn't normalize header case + self._http_hdrs.append((key.capitalize(), value)) + else: + for i in range(1, len(lines)): + lines[i] = " " + lines[i].strip() + value = "\r\n".join(lines) + "\r\n" + line = key.title() + ": " + value + if prefix: + self._headers.insert(0, line) + else: + self._headers.append(line) + + def flushheaders(self): + self._fp.writelines(self._headers) + self._headers = [] + + def startbody(self, ctype=None, plist=[], prefix=1, + add_to_http_hdrs=0, content_type=1): + """ + prefix is ignored if add_to_http_hdrs is true. + """ + if content_type and ctype: + for name, value in plist: + ctype = ctype + ';\r\n %s=%s' % (name, value) + self.addheader("Content-Type", ctype, prefix=prefix, + add_to_http_hdrs=add_to_http_hdrs) + self.flushheaders() + if not add_to_http_hdrs: self._fp.write("\r\n") + self._first_part = True + return self._fp + + def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1, + add_to_http_hdrs=0, content_type=1): + boundary = boundary or choose_boundary() + self._boundary.append(boundary) + return self.startbody("multipart/" + subtype, + [("boundary", boundary)] + plist, + prefix=prefix, + add_to_http_hdrs=add_to_http_hdrs, + content_type=content_type) + + def nextpart(self): + boundary = self._boundary[-1] + if self._first_part: + self._first_part = False + else: + self._fp.write("\r\n") + self._fp.write("--" + boundary + "\r\n") + return self.__class__(self._fp) + + def lastpart(self): + if self._first_part: + self.nextpart() + boundary = self._boundary.pop() + self._fp.write("\r\n--" + boundary + "--\r\n") + + +class LocateError(ValueError): pass +class AmbiguityError(LocateError): pass +class ControlNotFoundError(LocateError): pass +class ItemNotFoundError(LocateError): pass + +class ItemCountError(ValueError): pass + +# for backwards compatibility, ParseError derives from exceptions that were +# raised by versions of ClientForm <= 0.2.5 +# TODO: move to _html +class ParseError(sgmllib.SGMLParseError, + HTMLParser.HTMLParseError): + + def __init__(self, *args, **kwds): + Exception.__init__(self, *args, **kwds) + + def __str__(self): + return Exception.__str__(self) + + +class _AbstractFormParser: + """forms attribute contains HTMLForm instances on completion.""" + # thanks to Moshe Zadka for an example of sgmllib/htmllib usage + def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): + if entitydefs is None: + entitydefs = get_entitydefs() + self._entitydefs = entitydefs + self._encoding = encoding + + self.base = None + self.forms = [] + self.labels = [] + self._current_label = None + self._current_form = None + self._select = None + self._optgroup = None + self._option = None + self._textarea = None + + # forms[0] will contain all controls that are outside of any form + # self._global_form is an alias for self.forms[0] + self._global_form = None + self.start_form([]) + self.end_form() + self._current_form = self._global_form = self.forms[0] + + def do_base(self, attrs): + debug("%s", attrs) + for key, value in attrs: + if key == "href": + self.base = self.unescape_attr_if_required(value) + + def end_body(self): + debug("") + if self._current_label is not None: + self.end_label() + if self._current_form is not self._global_form: + self.end_form() + + def start_form(self, attrs): + debug("%s", attrs) + if self._current_form is not self._global_form: + raise ParseError("nested FORMs") + name = None + action = None + enctype = "application/x-www-form-urlencoded" + method = "GET" + d = {} + for key, value in attrs: + if key == "name": + name = self.unescape_attr_if_required(value) + elif key == "action": + action = self.unescape_attr_if_required(value) + elif key == "method": + method = self.unescape_attr_if_required(value.upper()) + elif key == "enctype": + enctype = self.unescape_attr_if_required(value.lower()) + d[key] = self.unescape_attr_if_required(value) + controls = [] + self._current_form = (name, action, method, enctype), d, controls + + def end_form(self): + debug("") + if self._current_label is not None: + self.end_label() + if self._current_form is self._global_form: + raise ParseError("end of FORM before start") + self.forms.append(self._current_form) + self._current_form = self._global_form + + def start_select(self, attrs): + debug("%s", attrs) + if self._select is not None: + raise ParseError("nested SELECTs") + if self._textarea is not None: + raise ParseError("SELECT inside TEXTAREA") + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + + self._select = d + self._add_label(d) + + self._append_select_control({"__select": d}) + + def end_select(self): + debug("") + if self._select is None: + raise ParseError("end of SELECT before start") + + if self._option is not None: + self._end_option() + + self._select = None + + def start_optgroup(self, attrs): + debug("%s", attrs) + if self._select is None: + raise ParseError("OPTGROUP outside of SELECT") + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + + self._optgroup = d + + def end_optgroup(self): + debug("") + if self._optgroup is None: + raise ParseError("end of OPTGROUP before start") + self._optgroup = None + + def _start_option(self, attrs): + debug("%s", attrs) + if self._select is None: + raise ParseError("OPTION outside of SELECT") + if self._option is not None: + self._end_option() + + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + + self._option = {} + self._option.update(d) + if (self._optgroup and self._optgroup.has_key("disabled") and + not self._option.has_key("disabled")): + self._option["disabled"] = None + + def _end_option(self): + debug("") + if self._option is None: + raise ParseError("end of OPTION before start") + + contents = self._option.get("contents", "").strip() + self._option["contents"] = contents + if not self._option.has_key("value"): + self._option["value"] = contents + if not self._option.has_key("label"): + self._option["label"] = contents + # stuff dict of SELECT HTML attrs into a special private key + # (gets deleted again later) + self._option["__select"] = self._select + self._append_select_control(self._option) + self._option = None + + def _append_select_control(self, attrs): + debug("%s", attrs) + controls = self._current_form[2] + name = self._select.get("name") + controls.append(("select", name, attrs)) + + def start_textarea(self, attrs): + debug("%s", attrs) + if self._textarea is not None: + raise ParseError("nested TEXTAREAs") + if self._select is not None: + raise ParseError("TEXTAREA inside SELECT") + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + self._add_label(d) + + self._textarea = d + + def end_textarea(self): + debug("") + if self._textarea is None: + raise ParseError("end of TEXTAREA before start") + controls = self._current_form[2] + name = self._textarea.get("name") + controls.append(("textarea", name, self._textarea)) + self._textarea = None + + def start_label(self, attrs): + debug("%s", attrs) + if self._current_label: + self.end_label() + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + taken = bool(d.get("for")) # empty id is invalid + d["__text"] = "" + d["__taken"] = taken + if taken: + self.labels.append(d) + self._current_label = d + + def end_label(self): + debug("") + label = self._current_label + if label is None: + # something is ugly in the HTML, but we're ignoring it + return + self._current_label = None + # if it is staying around, it is True in all cases + del label["__taken"] + + def _add_label(self, d): + #debug("%s", d) + if self._current_label is not None: + if not self._current_label["__taken"]: + self._current_label["__taken"] = True + d["__label"] = self._current_label + + def handle_data(self, data): + debug("%s", data) + + if self._option is not None: + # self._option is a dictionary of the OPTION element's HTML + # attributes, but it has two special keys, one of which is the + # special "contents" key contains text between OPTION tags (the + # other is the "__select" key: see the end_option method) + map = self._option + key = "contents" + elif self._textarea is not None: + map = self._textarea + key = "value" + data = normalize_line_endings(data) + # not if within option or textarea + elif self._current_label is not None: + map = self._current_label + key = "__text" + else: + return + + if data and not map.has_key(key): + # according to + # http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break + # immediately after start tags or immediately before end tags must + # be ignored, but real browsers only ignore a line break after a + # start tag, so we'll do that. + if data[0:2] == "\r\n": + data = data[2:] + elif data[0:1] in ["\n", "\r"]: + data = data[1:] + map[key] = data + else: + map[key] = map[key] + data + + def do_button(self, attrs): + debug("%s", attrs) + d = {} + d["type"] = "submit" # default + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + controls = self._current_form[2] + + type = d["type"] + name = d.get("name") + # we don't want to lose information, so use a type string that + # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON} + # e.g. type for BUTTON/RESET is "resetbutton" + # (type for INPUT/RESET is "reset") + type = type+"button" + self._add_label(d) + controls.append((type, name, d)) + + def do_input(self, attrs): + debug("%s", attrs) + d = {} + d["type"] = "text" # default + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + controls = self._current_form[2] + + type = d["type"] + name = d.get("name") + self._add_label(d) + controls.append((type, name, d)) + + def do_isindex(self, attrs): + debug("%s", attrs) + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + controls = self._current_form[2] + + self._add_label(d) + # isindex doesn't have type or name HTML attributes + controls.append(("isindex", None, d)) + + def handle_entityref(self, name): + #debug("%s", name) + self.handle_data(unescape( + '&%s;' % name, self._entitydefs, self._encoding)) + + def handle_charref(self, name): + #debug("%s", name) + self.handle_data(unescape_charref(name, self._encoding)) + + def unescape_attr(self, name): + #debug("%s", name) + return unescape(name, self._entitydefs, self._encoding) + + def unescape_attrs(self, attrs): + #debug("%s", attrs) + escaped_attrs = {} + for key, val in attrs.items(): + try: + val.items + except AttributeError: + escaped_attrs[key] = self.unescape_attr(val) + else: + # e.g. "__select" -- yuck! + escaped_attrs[key] = self.unescape_attrs(val) + return escaped_attrs + + def unknown_entityref(self, ref): self.handle_data("&%s;" % ref) + def unknown_charref(self, ref): self.handle_data("&#%s;" % ref) + + +class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser): + """Good for XHTML, bad for tolerance of incorrect HTML.""" + # thanks to Michael Howitz for this! + def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): + HTMLParser.HTMLParser.__init__(self) + _AbstractFormParser.__init__(self, entitydefs, encoding) + + def feed(self, data): + try: + HTMLParser.HTMLParser.feed(self, data) + except HTMLParser.HTMLParseError, exc: + raise ParseError(exc) + + def start_option(self, attrs): + _AbstractFormParser._start_option(self, attrs) + + def end_option(self): + _AbstractFormParser._end_option(self) + + def handle_starttag(self, tag, attrs): + try: + method = getattr(self, "start_" + tag) + except AttributeError: + try: + method = getattr(self, "do_" + tag) + except AttributeError: + pass # unknown tag + else: + method(attrs) + else: + method(attrs) + + def handle_endtag(self, tag): + try: + method = getattr(self, "end_" + tag) + except AttributeError: + pass # unknown tag + else: + method() + + def unescape(self, name): + # Use the entitydefs passed into constructor, not + # HTMLParser.HTMLParser's entitydefs. + return self.unescape_attr(name) + + def unescape_attr_if_required(self, name): + return name # HTMLParser.HTMLParser already did it + def unescape_attrs_if_required(self, attrs): + return attrs # ditto + + def close(self): + HTMLParser.HTMLParser.close(self) + self.end_body() + + +class _AbstractSgmllibParser(_AbstractFormParser): + + def do_option(self, attrs): + _AbstractFormParser._start_option(self, attrs) + + # we override this attr to decode hex charrefs + entity_or_charref = re.compile( + '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)') + def convert_entityref(self, name): + return unescape("&%s;" % name, self._entitydefs, self._encoding) + def convert_charref(self, name): + return unescape_charref("%s" % name, self._encoding) + def unescape_attr_if_required(self, name): + return name # sgmllib already did it + def unescape_attrs_if_required(self, attrs): + return attrs # ditto + + +class FormParser(_AbstractSgmllibParser, _sgmllib_copy.SGMLParser): + """Good for tolerance of incorrect HTML, bad for XHTML.""" + def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): + _sgmllib_copy.SGMLParser.__init__(self) + _AbstractFormParser.__init__(self, entitydefs, encoding) + + def feed(self, data): + try: + _sgmllib_copy.SGMLParser.feed(self, data) + except _sgmllib_copy.SGMLParseError, exc: + raise ParseError(exc) + + def close(self): + _sgmllib_copy.SGMLParser.close(self) + self.end_body() + + +class _AbstractBSFormParser(_AbstractSgmllibParser): + + bs_base_class = None + + def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): + _AbstractFormParser.__init__(self, entitydefs, encoding) + self.bs_base_class.__init__(self) + + def handle_data(self, data): + _AbstractFormParser.handle_data(self, data) + self.bs_base_class.handle_data(self, data) + + def feed(self, data): + try: + self.bs_base_class.feed(self, data) + except _sgmllib_copy.SGMLParseError, exc: + raise ParseError(exc) + + def close(self): + self.bs_base_class.close(self) + self.end_body() + + +class RobustFormParser(_AbstractBSFormParser, _beautifulsoup.BeautifulSoup): + + """Tries to be highly tolerant of incorrect HTML.""" + + bs_base_class = _beautifulsoup.BeautifulSoup + + +class NestingRobustFormParser(_AbstractBSFormParser, + _beautifulsoup.ICantBelieveItsBeautifulSoup): + + """Tries to be highly tolerant of incorrect HTML. + + Different from RobustFormParser in that it more often guesses nesting + above missing end tags (see BeautifulSoup docs). + """ + + bs_base_class = _beautifulsoup.ICantBelieveItsBeautifulSoup + + +#FormParser = XHTMLCompatibleFormParser # testing hack +#FormParser = RobustFormParser # testing hack + + +def ParseResponseEx(response, + select_default=False, + form_parser_class=FormParser, + request_class=_request.Request, + entitydefs=None, + encoding=DEFAULT_ENCODING, + + # private + _urljoin=urlparse.urljoin, + _urlparse=urlparse.urlparse, + _urlunparse=urlparse.urlunparse, + ): + """Identical to ParseResponse, except that: + + 1. The returned list contains an extra item. The first form in the list + contains all controls not contained in any FORM element. + + 2. The arguments ignore_errors and backwards_compat have been removed. + + 3. Backwards-compatibility mode (backwards_compat=True) is not available. + """ + return _ParseFileEx(response, response.geturl(), + select_default, + False, + form_parser_class, + request_class, + entitydefs, + False, + encoding, + _urljoin=_urljoin, + _urlparse=_urlparse, + _urlunparse=_urlunparse, + ) + +def ParseFileEx(file, base_uri, + select_default=False, + form_parser_class=FormParser, + request_class=_request.Request, + entitydefs=None, + encoding=DEFAULT_ENCODING, + + # private + _urljoin=urlparse.urljoin, + _urlparse=urlparse.urlparse, + _urlunparse=urlparse.urlunparse, + ): + """Identical to ParseFile, except that: + + 1. The returned list contains an extra item. The first form in the list + contains all controls not contained in any FORM element. + + 2. The arguments ignore_errors and backwards_compat have been removed. + + 3. Backwards-compatibility mode (backwards_compat=True) is not available. + """ + return _ParseFileEx(file, base_uri, + select_default, + False, + form_parser_class, + request_class, + entitydefs, + False, + encoding, + _urljoin=_urljoin, + _urlparse=_urlparse, + _urlunparse=_urlunparse, + ) + +def ParseString(text, base_uri, *args, **kwds): + fh = StringIO(text) + return ParseFileEx(fh, base_uri, *args, **kwds) + +def ParseResponse(response, *args, **kwds): + """Parse HTTP response and return a list of HTMLForm instances. + + The return value of mechanize.urlopen can be conveniently passed to this + function as the response parameter. + + mechanize.ParseError is raised on parse errors. + + response: file-like object (supporting read() method) with a method + geturl(), returning the URI of the HTTP response + select_default: for multiple-selection SELECT controls and RADIO controls, + pick the first item as the default if none are selected in the HTML + form_parser_class: class to instantiate and use to pass + request_class: class to return from .click() method (default is + mechanize.Request) + entitydefs: mapping like {"&": "&", ...} containing HTML entity + definitions (a sensible default is used) + encoding: character encoding used for encoding numeric character references + when matching link text. mechanize does not attempt to find the encoding + in a META HTTP-EQUIV attribute in the document itself (mechanize, for + example, does do that and will pass the correct value to mechanize using + this parameter). + + backwards_compat: boolean that determines whether the returned HTMLForm + objects are backwards-compatible with old code. If backwards_compat is + true: + + - ClientForm 0.1 code will continue to work as before. + + - Label searches that do not specify a nr (number or count) will always + get the first match, even if other controls match. If + backwards_compat is False, label searches that have ambiguous results + will raise an AmbiguityError. + + - Item label matching is done by strict string comparison rather than + substring matching. + + - De-selecting individual list items is allowed even if the Item is + disabled. + + The backwards_compat argument will be removed in a future release. + + Pass a true value for select_default if you want the behaviour specified by + RFC 1866 (the HTML 2.0 standard), which is to select the first item in a + RADIO or multiple-selection SELECT control if none were selected in the + HTML. Most browsers (including Microsoft Internet Explorer (IE) and + Netscape Navigator) instead leave all items unselected in these cases. The + W3C HTML 4.0 standard leaves this behaviour undefined in the case of + multiple-selection SELECT controls, but insists that at least one RADIO + button should be checked at all times, in contradiction to browser + behaviour. + + There is a choice of parsers. mechanize.XHTMLCompatibleFormParser (uses + HTMLParser.HTMLParser) works best for XHTML, mechanize.FormParser (uses + bundled copy of sgmllib.SGMLParser) (the default) works better for ordinary + grubby HTML. Note that HTMLParser is only available in Python 2.2 and + later. You can pass your own class in here as a hack to work around bad + HTML, but at your own risk: there is no well-defined interface. + + """ + return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:] + +def ParseFile(file, base_uri, *args, **kwds): + """Parse HTML and return a list of HTMLForm instances. + + mechanize.ParseError is raised on parse errors. + + file: file-like object (supporting read() method) containing HTML with zero + or more forms to be parsed + base_uri: the URI of the document (note that the base URI used to submit + the form will be that given in the BASE element if present, not that of + the document) + + For the other arguments and further details, see ParseResponse.__doc__. + + """ + return _ParseFileEx(file, base_uri, *args, **kwds)[1:] + +def _ParseFileEx(file, base_uri, + select_default=False, + ignore_errors=False, + form_parser_class=FormParser, + request_class=_request.Request, + entitydefs=None, + backwards_compat=True, + encoding=DEFAULT_ENCODING, + _urljoin=urlparse.urljoin, + _urlparse=urlparse.urlparse, + _urlunparse=urlparse.urlunparse, + ): + if backwards_compat: + deprecation("operating in backwards-compatibility mode", 1) + fp = form_parser_class(entitydefs, encoding) + while 1: + data = file.read(CHUNK) + try: + fp.feed(data) + except ParseError, e: + e.base_uri = base_uri + raise + if len(data) != CHUNK: break + fp.close() + if fp.base is not None: + # HTML BASE element takes precedence over document URI + base_uri = fp.base + labels = [] # Label(label) for label in fp.labels] + id_to_labels = {} + for l in fp.labels: + label = Label(l) + labels.append(label) + for_id = l["for"] + coll = id_to_labels.get(for_id) + if coll is None: + id_to_labels[for_id] = [label] + else: + coll.append(label) + forms = [] + for (name, action, method, enctype), attrs, controls in fp.forms: + if action is None: + action = base_uri + else: + action = _urljoin(base_uri, action) + # would be nice to make HTMLForm class (form builder) pluggable + form = HTMLForm( + action, method, enctype, name, attrs, request_class, + forms, labels, id_to_labels, backwards_compat) + form._urlparse = _urlparse + form._urlunparse = _urlunparse + for ii in range(len(controls)): + type, name, attrs = controls[ii] + # index=ii*10 allows ImageControl to return multiple ordered pairs + form.new_control( + type, name, attrs, select_default=select_default, index=ii*10) + forms.append(form) + for form in forms: + form.fixup() + return forms + + +class Label: + def __init__(self, attrs): + self.id = attrs.get("for") + self._text = attrs.get("__text").strip() + self._ctext = compress_text(self._text) + self.attrs = attrs + self._backwards_compat = False # maintained by HTMLForm + + def __getattr__(self, name): + if name == "text": + if self._backwards_compat: + return self._text + else: + return self._ctext + return getattr(Label, name) + + def __setattr__(self, name, value): + if name == "text": + # don't see any need for this, so make it read-only + raise AttributeError("text attribute is read-only") + self.__dict__[name] = value + + def __str__(self): + return "" % (self.id, self.text) + + +def _get_label(attrs): + text = attrs.get("__label") + if text is not None: + return Label(text) + else: + return None + +class Control: + """An HTML form control. + + An HTMLForm contains a sequence of Controls. The Controls in an HTMLForm + are accessed using the HTMLForm.find_control method or the + HTMLForm.controls attribute. + + Control instances are usually constructed using the ParseFile / + ParseResponse functions. If you use those functions, you can ignore the + rest of this paragraph. A Control is only properly initialised after the + fixup method has been called. In fact, this is only strictly necessary for + ListControl instances. This is necessary because ListControls are built up + from ListControls each containing only a single item, and their initial + value(s) can only be known after the sequence is complete. + + The types and values that are acceptable for assignment to the value + attribute are defined by subclasses. + + If the disabled attribute is true, this represents the state typically + represented by browsers by 'greying out' a control. If the disabled + attribute is true, the Control will raise AttributeError if an attempt is + made to change its value. In addition, the control will not be considered + 'successful' as defined by the W3C HTML 4 standard -- ie. it will + contribute no data to the return value of the HTMLForm.click* methods. To + enable a control, set the disabled attribute to a false value. + + If the readonly attribute is true, the Control will raise AttributeError if + an attempt is made to change its value. To make a control writable, set + the readonly attribute to a false value. + + All controls have the disabled and readonly attributes, not only those that + may have the HTML attributes of the same names. + + On assignment to the value attribute, the following exceptions are raised: + TypeError, AttributeError (if the value attribute should not be assigned + to, because the control is disabled, for example) and ValueError. + + If the name or value attributes are None, or the value is an empty list, or + if the control is disabled, the control is not successful. + + Public attributes: + + type: string describing type of control (see the keys of the + HTMLForm.type2class dictionary for the allowable values) (readonly) + name: name of control (readonly) + value: current value of control (subclasses may allow a single value, a + sequence of values, or either) + disabled: disabled state + readonly: readonly state + id: value of id HTML attribute + + """ + def __init__(self, type, name, attrs, index=None): + """ + type: string describing type of control (see the keys of the + HTMLForm.type2class dictionary for the allowable values) + name: control name + attrs: HTML attributes of control's HTML element + + """ + raise NotImplementedError() + + def add_to_form(self, form): + self._form = form + form.controls.append(self) + + def fixup(self): + pass + + def is_of_kind(self, kind): + raise NotImplementedError() + + def clear(self): + raise NotImplementedError() + + def __getattr__(self, name): raise NotImplementedError() + def __setattr__(self, name, value): raise NotImplementedError() + + def pairs(self): + """Return list of (key, value) pairs suitable for passing to urlencode. + """ + return [(k, v) for (i, k, v) in self._totally_ordered_pairs()] + + def _totally_ordered_pairs(self): + """Return list of (key, value, index) tuples. + + Like pairs, but allows preserving correct ordering even where several + controls are involved. + + """ + raise NotImplementedError() + + def _write_mime_data(self, mw, name, value): + """Write data for a subitem of this control to a MimeWriter.""" + # called by HTMLForm + mw2 = mw.nextpart() + mw2.addheader("Content-Disposition", + 'form-data; name="%s"' % name, 1) + f = mw2.startbody(prefix=0) + f.write(value) + + def __str__(self): + raise NotImplementedError() + + def get_labels(self): + """Return all labels (Label instances) for this control. + + If the control was surrounded by a