Skip to content Skip to sidebar Skip to footer

Complete The Relative Paths To Absolute Using Python

I have rewritten the complete code to fetch the href and src link using beautifulsoup this time by the request of many SO users instead of regex. Here is the code: import os from b

Solution 1:

I found a simpler solution with re.sub because it accepts a function as the replace-with parameter.

import re
from urllib.parse import urljoin

abs_url = "https://sample.com/sample-page.html"
my_html = """
    <div class="sample-class">
        <a href="../new-page.html">New page</a>
        <img src="../sample-image.jpg" alt="">
        <img src="../sample-image2.jpg" alt="">
    </div>"""# "src"
absolutize = lambda m: ' src="' + urljoin(abs_url, m.group(1)) + '"'
my_html = re.sub(r' src="([^"]+)"', absolutize, my_html)
# "href"
absolutize2 = lambda m: ' href="' + urljoin(abs_url, m.group(1)) + '"'
my_html = re.sub(r' href="([^"]+)"', absolutize2, my_html)

# my_html"""
    <div class="sample-class">
        <a href="https://sample.com/new-page.html">New page</a>
        <img src="https://sample.com/sample-image.jpg" alt="">
        <img src="https://sample.com/sample-image2.jpg" alt="">
    </div>"""

Haven't tested with multiple depth relative urls (e.g. src="../../hello.jpg"), but it should work just as fine.

Solution 2:

Its final code and everything is working perfectly thanks to akash karothiya's solution.

This code changes all kinds of relative links into absolute links in any given html code.

import os, re
from bs4 import BeautifulSoup
from urllib.parse import urlparse, unquote

unquoteURL = unquote("http://webpy_server/?link=http%3A//www.example.com/dynamic/search.aspx%3Fsearchtype%3Dcat%26class_id%3D4520%26page%3D1")

path = urlparse(urlparse(unquoteURL).query.replace("link=", ""))
lpath = os.path.dirname(os.path.abspath(path.path))

html = u"\n<!DOCTYPE html class=\"\"><head id=\"pageHead\"><title>\n    Yarn Manufacturers &amp; Suppliers | Listings @ Phonebook Online\n</title>\n    <!--\n    <meta http-equiv=\"Cache-Control\" content=\"no-cache, no-store, must-revalidate\" /><meta http-equiv=\"Pragma\" content=\"no-cache\" /><meta http-equiv=\"Expires\" content=\"0\" />\n    -->\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"><link rel=\"stylesheet\" href=\"../css_responsive/category.css\" type=\"text/css\" media=\"screen\">\n    <script async=\"\" src=\"//www.google-analytics.com/analytics.js\"></script><script async=\"\" src=\"//www.google.com/adsense/search/async-ads.js\"></script><script type=\"text/javascript\" src=\"../styles/scripts/jquery-1.9.1.min.js\"></script>\n    <link rel=\"shortcut icon\" type=\"image/png\" href=\"/PhoneBook.ico\">\n    <!-- #Begin Css Plugin -->\n    <link rel=\"stylesheet\" href=\"../css_responsive/fontsss.css\"><link rel=\"stylesheet\" href=\"../css_responsive/bootstrap-3.3.4-dist/css/bootstrap.css\" type=\"text/css\" media=\"screen\"><link rel=\"stylesheet\" href=\"../styles/scripts/fancybox/jquery.fancybox.css\" type=\"text/css\" media=\"screen\"><link rel=\"stylesheet\" href=\"../css_responsive/icon-detail.css\" type=\"text/css\" media=\"screen\">\n    <!-- #Finish Css Plugin-->\n    <!--<script src=\"http://www.google.com/adsense/search/ads.js\" type=\"text/javascript\"></script> -->\n    <script type=\"text/javascript\" charset=\"utf-8\">\n            (function (G, o, O, g, L, e) {\n                G[g] = G[g] || function () {\n                    (G[g]['q'] = G[g]['q'] || []).push(\n       arguments)\n                }, G[g]['t'] = 1 * new Date; L = o.createElement(O), e = o.getElementsByTagName(\n       O)[0]; L.async = 1; L.src = '//www.google.com/adsense/search/async-ads.js';\n                e.parentNode.insertBefore(L, e)\n            })(window, document, 'script', '_googCsa');\n    </script>\n    <!-- Script For Mobile Base Banner-->\n        <script async=\"\" src=\"//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js\"></script>\n        <script>\n            (adsbygoogle = window.adsbygoogle || []).push({\n                google_ad_client: \"ca-pub-6517686434458516\",\n                enable_page_level_ads: true\n            });\n        </script>\n    <!-- Script For Mobile Base Banner END-->\n\n\n    <script type=\"text/javascript\">\n        function AddClass(Class, Element, HasPriority) {\n            if (HasPriority == 0) {\n                this.className = 'container ' + Class;\n            }\n        }\n    </script>\n\n<meta name=\"description\" content=\"Online Directory of Yarn Manufacturers &amp; Suppliers in Pakistan, providing list of names, contact numbers, addresses and reviews.\"><meta name=\"keywords\" content=\"Yarn Manufacturers &amp; Suppliers\"><style type=\"text/css\">.fancybox-margin{margin-right:17px;}</style></head>\n<body style=\"text-shadow: rgba(255, 255, 255, 0.4) 0px 1px 1px; background-color: rgb(240, 240, 240);\">\n    <!--Top Nav Bar Start -->\n\n<div class=\"wapper bg-h\">\n    <div class=\"container-fluid\">\n        <div class=\"col-xs-12 col-md-12\">\n            <div class=\"col-xs-12 col-md-12\">\n                <div class=\"ele-block text-right ele-color-white ele-pad-t-5 m-text-center cMobileTextCenter cfont-12\" style=\"padding-top:5px;\">\n                    <a class=\"\" href=\"../dynamic/free-basic-listing.aspx\"> Free basic listing</a> \n                    | \n                    <a class=\"\" href=\"/advertisement-center/\"> Advertise with us</a>\n                </div>\n            </div>\n        </div>\n    </div>\n    <div class=\"header\">\n        <div class=\"logo\">\n            <div class=\"cMobileHidden left cPad-b-t-25\">\n                <img alt=\"Slider\" height=\"26\" class=\"left\" src=\"../../images/list-icon-slvr.png\" onclick=\"DefaultSliderMenu()\" style=\"cursor:pointer;\">\n            </div>\n            <div class=\"cDesktopHidden cMobileShow\">\n                <img alt=\"Slider\" height=\"26\" class=\"ele-float-left\" src=\"../../images/list-icon-slvr.png\" onclick=\"SlideMenu()\" style=\"cursor:pointer;vertical-align: baseline !important; \">\n            </div>\n            <!--<span class=\"home-slide-icon icon-list2 cPad-b-t-10 cDesktopHidden\" onclick=\"SlideMenu()\"></span>-->\n            <a class=\"left ele-margin-t-b-15 cMobileFloatNone\" style=\"text-decoration:none !important\" href=\"../../\">\n                <img alt=\"Phonebook\" class=\"\" width=\"205\" src=\"../../images/final-logo2s.png\">\n            </a>\n            <div class=\"cDesktopHidden cMobileShow\">\n                <img alt=\"Slider\" width=\"38\" height=\"26\" class=\"ele-float-left\" src=\"/images/magnify-glass-2.png\" onclick=\"enableMobileSearchOption() \" style=\"cursor:pointer;vertical-align: baseline !important; \">\n            </div>\n            <!--<a href=\"../../default.aspx\"><img height=\"60\" alt=\"Phonebook\" src=\"../images/Phonebook-Online-Logo-Big-new2.png\" /></a>-->\n            <!--<h2 class=\"mColorWhite\">Your Online Search Engine</h2>-->\n        </div>\n        <div id=\"cHeader_sky_banner\" class=\"sky_banner\"><embed src=\"http://www.phonebook.com.pk/images/advertisement/swf/79042_8_160614_61864_1.swf\" pluginspage=\"http://www.adobe.com/shockwave/download/download.cgi?P1_Prod_Version=ShockwaveFlash\" width=\"700\" height=\"90\" quality=\"high\" value=\"autostart=true\" wmode=\"transparent\"></div>\n    </div>\n</div>\n<div class=\"wapper bg-h bg-fixed flow-visible m-on-mob-hide\" style=\"top: 0px;\">\n    <div class=\"header\">\n        <form method=\"POST\" action=\"../redirect.aspx?searchtype=kl\">\n            <input class=\"icon-search\" type=\"text\" name=\"keyword\" placeholder=\"What ? (Name or Keyword)\" autocomplete=\"off\" required=\"\">\n            <input class=\"icon-loc\" type=\"text\" name=\"location\" placeholder=\"Where ? (City or Area)\" autocomplete=\"off\">\n            <input class=\"submit\" type=\"submit\" value=\"Find\">\n        </form>\n    </div>\n    <i class=\"after icon-circle-up\"></i>\n</div>\n    <!--Top Nav Bar End -->\n    <div class=\"wapper\">\n        <div class=\"pagecontent search_width c-no-t-margin\">\n            <div class=\"cblock ele-margin-t-b-15 m-on-mob-hide\"><a href=\"../../default.aspx\">Home</a> &gt; <a href=\"../../dynamic/categories.aspx\">Search by category</a> &gt; <a href=\"../../dynamic/categories.aspx?class_id=19\">Industrial supplies &amp; services</a> &gt; <a href=\"../../dynamic/categories.aspx?class_id=234\">Textiles</a> &gt; Yarn Wholesale &amp; Manufacturers in Pakistan</div>\n\n\n\n            <div id=\"cResultMainControl\">\n                <div class=\"result_hldr\" id=\"cResultContainer\">\n\n\n                    <div class=\"cMobileHidden col-md-12 col-xs-12 text-center overflow-visible cheight-25 margin-t\" style=\"background-color: rgb(240, 240, 240);\">\n                        <script async=\"\" src=\"//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js\"></script>\n                        <!-- New Line Link Ad -->\n                        <ins class=\"adsbygoogle\" style=\"display:inline-block;width:468px;height:15px;background-color: rgb(240, 240, 240);\" data-ad-client=\"ca-pub-6517686434458516\" data-ad-slot=\"4522680219\"></ins>\n                        <script>\n                            (adsbygoogle = window.adsbygoogle || []).push({});\n                        </script>\n                    </div>\n                    <div id=\"cAlpNav\" class=\"margin-t-10 cAlpNav m-on-mob-hide\">\n                    <div class=\"text-center\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520\">all</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=a\">a</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=b\">b</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=c\">c</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=d\">d</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=e\">e</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=f\">f</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=g\">g</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=h\">h</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=i\">i</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=j\">j</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=k\">k</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=l\">l</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=m\">m</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=n\">n</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=o\">o</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=p\">p</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=q\">q</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=r\">r</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=s\">s</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=t\">t</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=u\">u</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=v\">v</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=w\">w</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=x\">x</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=y\">y</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=z\">z</a></div></div>\n                    <div>\n                        <div id=\"cListingHldr\" class=\"listing\">\n\n<div class=\"container\">\n    <div class=\"comp_info\">\n        <h2><a href=\"../../company/77683-A-J-Apparels-Pvt-Ltd\">A &amp; J Apparels (Pvt) Ltd.</a></h2>\n        <!--<img class=\"margin-t\" alt=\"Comapny Rating\" src=\"../../images/Stars>.png\" />-->\n        <i class=\"cfont-12 cnoPad left icon-zero-star\"></i>\n\n            <span class=\"blue margin-t\">(No Review)</span>\n\n                <span class=\"cfontBold margin-t cColor-Black cColor-SilverDark\">\n                LA/6-A Block  22, F. B Area, Karachi\n            </span>\n\n        <div class=\"inline-block  cMobile-Right\">\n            <ul class=\"margin-t cMobile-Text-Align-Right\">\n                <li>\n                    <a data-fancybox-type=\"iframe\" href=\"../../dynamic/emailtocustomer.aspx?Request_ID=8127&amp;comp_name=A-J-Apparels-Pvt-Ltd&amp;isAdvertizer=0\" class=\"other_links fancybox\">Email</a>\n                </li>\n                 <li>\n                    <a title=\"Call Now\" href=\"tel:+92-21-36342521\" class=\"c_circle cMobileShow\"></a>\n                </li>\n                <li>\n                    <a class=\"other_links\" href=\"../../company/77683-A-J-Apparels-Pvt-Ltd\" title=\"Company Detail\">Detail</a>\n                </li>\n\n             </ul>\n        </div>\n    </div>\n    <div class=\"comp_info contact_info\">\n        <strong><a class=\"tel\" href=\"tel:+92-21-36342521\">+92-21-36342521</a></strong>\n\n    </div>\n</div>\n\n\n\n\n\n\n\n\n</div>\n                        <div id=\"cRecoredInfo\" class=\"listing dotted\">Displaying listings from 1 to 10 of 161</div>\n                        <div class=\"text-center m-pad-l-r-10\">\n                            <div id=\"related-suggestions\" class=\"listing inline-block text-center cPad-b-t-10\"><span class=\"left cfont-14\"><b>Related Searches:</b></span> <div class=\"newsssss left inline\" style=\"font-style: italic;font-weight:bold;\"><a href=\"search.aspx?searchtype=cat&amp;class_id=1030\" class=\"left ele-pad-r-l-20 text-underline cfont-14\">Importers</a></div><div class=\"newsssss left inline\" style=\"font-style: italic;font-weight:bold;\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4499\" class=\"left ele-pad-r-l-20 text-underline cfont-14\">Textiles Wholesale &amp; Manufacturers</a></div><div class=\"newsssss left inline\" style=\"font-style: italic;font-weight:bold;\"><a href=\"search.aspx?searchtype=cat&amp;class_id=1029\" class=\"left ele-pad-r-l-20 text-underline cfont-14\">Exporters</a></div>\n                                <div class=\"text-left ele-margin-t-b-15 left inline\"><b>Need help with your search?</b> Browse by:<a class=\"text-left ele-pad-r-l-20 text-underline\" onclick=\"hide_show('#related-locations',this);$('#related-categories').addClass('hide');\" href=\"javascript:void(0)\">other locations <img alt=\"\" class=\"margin-l\" width=\"18\" src=\"../../images/plus.png\"></a><a class=\"text-left ele-pad-r-l-20 text-underline\" onclick=\"hide_show('#related-categories',this);$('#related-locations').addClass('hide');\" href=\"javascript:void(0)\">similar categories <img alt=\"\" class=\"margin-l\" width=\"18\" src=\"../../images/plus.png\"></a></div><ul id=\"related-locations\" class=\"col-xs-12 col-sm-12 sugesstion-box hide\">\n                                <li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=1\" class=\"left\">Karachi</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=2\" class=\"left\">Lahore</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=49\" class=\"left\">Faisalabad</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=77\" class=\"left\">Multan</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=81\" class=\"left\">Gujranwala</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=15\" class=\"left\">Hub</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=79\" class=\"left\">Rawalpindi</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=76\" class=\"left\">Hyderabad</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=62\" class=\"left\">Muzaffar Garh</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=60\" class=\"left\">Layyah</a></li></ul>\n                                <ul id=\"related-categories\" class=\"col-xs-12 col-sm-12 sugesstion-box hide\">\n                                <li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4470\" class=\"left\">Knitted Fabrics</a></li><li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4489\" class=\"left\">Synthetic &amp; Blended Fabrics Wholesale &amp; Manufacturers</a></li><li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2391\" class=\"left\">Aprons Wholesale &amp; Manufacturers</a></li><li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2109\" class=\"left\">Linens Wholesale &amp; Manufacturers</a></li></ul>\n                            </div>\n                        </div>\n                        <div class=\"text-center\">\n                            <div id=\"cPagination\" class=\"listing\">\n                                <img class=\"left\" alt=\"\" src=\"../../images/page-1.png\">\n                            <a id=\"ctl39_cPageUrl\" class=\"pagi_anchor\">\n    <span id=\"ctl39_cAlp\">B</span>\n    <span id=\"ctl39_cPageNo\"></span>\n </a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;page=1\" id=\"ctl40_cPageUrl\" class=\"pagi_anchor\">\n    <span id=\"ctl40_cAlp\" style=\"color:red !important;;\">O</span>\n    <span id=\"ctl40_cPageNo\" style=\"color:red !important;;\">1</span>\n </a></div>\n                        </div>\n\n                    </div>\n                </div>\n            </div>\n\n\n            <div class=\"srch_banner\"> \n\n\n\n\n\n\n            </div>\n        </div>\n    </div><div style=\"height: 0px; visibility: hidden; font-weight: normal; text-align: center;\"><iframe frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" allowtransparency=\"true\" scrolling=\"no\" width=\"100%\" name=\"{&quot;name&quot;:&quot;master-2&quot;,&quot;slave-0-2&quot;:{&quot;container&quot;:&quot;adNewNTRSearchPagecontainer2&quot;,&quot;linkTarget&quot;:&quot;_top&quot;,&quot;lines&quot;:3,&quot;colorBackground&quot;:&quot;#e0e0e0&quot;,&quot;colorBorder&quot;:&quot;#0b0b0b&quot;,&quot;fontFamily&quot;:&quot;verdana&quot;,&quot;adIconLocation&quot;:&quot;ad-left&quot;,&quot;width&quot;:&quot;300px&quot;,&quot;type&quot;:&quot;ads&quot;,&quot;hl&quot;:&quot;en&quot;,&quot;columns&quot;:1,&quot;horizontalAlignment&quot;:&quot;left&quot;,&quot;resultsPageQueryParam&quot;:&quot;query&quot;},&quot;master-2&quot;:{&quot;linkTarget&quot;:&quot;_top&quot;,&quot;lines&quot;:3,&quot;colorBackground&quot;:&quot;#e0e0e0&quot;,&quot;colorBorder&quot;:&quot;#0b0b0b&quot;,&quot;fontFamily&quot;:&quot;verdana&quot;,&quot;adIconLocation&quot;:&quot;ad-left&quot;,&quot;width&quot;:&quot;300px&quot;,&quot;type&quot;:&quot;ads&quot;,&quot;hl&quot;:&quot;en&quot;,&quot;columns&quot;:1,&quot;horizontalAlignment&quot;:&quot;left&quot;,&quot;resultsPageQueryParam&quot;:&quot;query&quot;}}\" id=\"master-2\" src=\"https://www.google.com/afs/ads?q=Yarn%20Wholesale%20%26%20Manufacturers&amp;adpage=1&amp;r=m&amp;fexp=21404%2C7000107&amp;client=pub-6517686434458516&amp;channel=3589710218&amp;hl=en&amp;type=0&amp;oe=UTF-8&amp;ie=UTF-8&amp;jsei=3&amp;format=n2&amp;ad=n2&amp;nocache=3631469793737437&amp;num=0&amp;output=uds_ads_only&amp;v=3&amp;allwcallad=1&amp;preload=true&amp;adext=as1%2Csr1%2Cctc1&amp;bsl=10&amp;u_his=3&amp;u_tz=300&amp;dt=1469793737439&amp;u_w=1366&amp;u_h=768&amp;biw=1349&amp;bih=599&amp;psw=1349&amp;psh=1589&amp;frm=0&amp;uio=uv3vp1sl1sr1cc1-wi300ff1&amp;jsv=12350&amp;rurl=http%3A%2F%2Fwww.phonebook.com.pk%2Fdynamic%2Fsearch.aspx%3Fsearchtype%3Dcat%26class_id%3D4520#master-2\" style=\"visibility: hidden; height: 0px;\"></iframe></div>\n\n<div class=\"container-fluid bg-silver m-on-mob-hide\">\n    <div class=\"row cPad-b-t-10\" style=\"border-bottom:1px solid #ECECEC;\">\n            <!--\n            <div class=\"col-md-12 col-lg-12 col-xs-12\">\n            <img height=\"40\" alt=\"\" src=\"../images/Phonebook-Online-Logo-Big-new.png\" />\n            </div>\n            -->\n    </div>\n</div>\n<div class=\"wapper pad-top-10 footerBg bg-white m-pad-zero\">\n    <div class=\"width footer m-on-mob-hide cMobileHiddenblock \">\n        <ul class=\"list-unstyled col-sm-4 m-on-mob-hide cMobileHidden\">\n            <li class=\"\"><strong style=\"color:#37aef0;\">Popular Keywords :</strong></li>\n            <li>\n                <ul class=\"list-unstyled\">\n                    <li><a href=\"../../dynamic/search.aspx?searchtype=kl&amp;k=restaurants&amp;l=pakistan\">Restaurants</a>,</li>\n                    <li><a href=\"../../dynamic/search.aspx?searchtype=kl&amp;k=pizza&amp;l=pakistan\">Pizza</a>,</li>\n                    <li><a href=\"../../dynamic/search.aspx?searchtype=kl&amp;k=hajj+%26+umrah&amp;l=pakistan\">Hajj &amp; Umrah</a>,</li>\n\n\n\n\n\n\n\n\n\n                </ul>\n            </li>\n            <li class=\"margin-t\"><strong style=\"color:#37aef0;\">Popular Cities :</strong></li>\n            <li>\n                <ul class=\"list-unstyled\">\n                    <li><a href=\"../../dynamic/city_categories.aspx?city_id=1\">Karachi</a>,</li>\n                    <li><a href=\"../../dynamic/city_categories.aspx?city_id=2\">Lahore</a>,</li>\n\n\n\n\n\n\n\n\n\n                    <li><a href=\"../../dynamic/city_categories.aspx?city_id=75\">Sukkur</a></li>\n                </ul>\n            </li>\n        </ul>\n        <ul class=\"col-xs-6 col-sm-2 styled\">\n            <li class=\"\"><strong style=\"color:#37aef0;\">ADVERTISE :</strong></li>\n            <li><a href=\"/advertisement-center/\">Advertise with us</a></li>\n            <li><a href=\"../../dynamic/free-basic-listing.aspx\">Get a Free Listings</a></li>\n\n        </ul>\n        <ul class=\"col-xs-6 col-sm-2 styled\">\n            <li class=\"\"><strong style=\"color:#37aef0;\">QUICK LINKS :</strong></li>\n            <li><a href=\"../../dynamic/categories.aspx\">Search by Category</a>,</li>\n\n\n            <li><a href=\"javascript:void(0)\">Browse by Video</a></li>\n        </ul>\n        <ul class=\"col-xs-6 col-sm-2 styled\">\n            <li class=\"\"><strong style=\"color:#37aef0;\">ABOUT US:</strong></li>\n            <li><a href=\"../../static/contact-us.aspx\">Contact Us</a></li>\n            <li><a href=\"javscript:void(0)\">Report an Error</a></li>\n\n\n\n\n        </ul>\n        <ul class=\"col-xs-6 col-sm-2 styled\">\n            <li class=\"\"><strong style=\"color:#37aef0;\">PARTNERS:</strong></li>\n            <li><a href=\"http://jang.com.pk/\">Jang Group of Newspapers</a></li>\n\n\n            <li><a href=\"http://www.ptcl.com.pk/\">PTCL - White Page Telephone Directory Data</a></li>\n        </ul>\n    </div>\n    <div class=\"col-xs-12 m-footer-wapper m-hidden-on-desktop\">\n        <div class=\"col-xs-3\">\n            <a title=\"Home\" href=\"/\"><img class=\"col-xs-12 cNoPad ele-pad-zero\" alt=\"Home\" src=\"../images/footer-icon-home.png\"></a>\n        </div>\n        <div class=\"col-xs-3\">\n            <a title=\"Free Basic Listing\" href=\"/dynamic/free-basic-listing.aspx\"><img class=\"col-xs-12 cNoPad ele-pad-zero\" alt=\"Home\" src=\"../images/footer-icon-free-listing.png\"></a>\n        </div>\n        <div class=\"col-xs-3\">\n            <a title=\"Contact Us\" href=\"/static/contact-us.aspx\"><img class=\"col-xs-12 cNoPad ele-pad-zero\" alt=\"Home\" src=\"../images/footer-icon-contact.png\"></a>\n        </div>\n        <div class=\"col-xs-3\">\n            <a title=\"Free Basic Listing\" href=\"/advertisement-center\"><img class=\"col-xs-12 cNoPad ele-pad-zero\" alt=\"Home\" src=\"../images/footer-icon-advertisewithus.png\"></a>\n        </div>\n    </div>\n</div>\n\n\n<script>\n    (function (i, s, o, g, r, a, m) {\n        i['GoogleAnalyticsObject'] = r; i[r] = i[r] || function () {\n            (i[r].q = i[r].q || []).push(arguments)\n        }, i[r].l = 1 * new Date(); a = s.createElement(o),\n  m = s.getElementsByTagName(o)[0]; a.async = 1; a.src = g; m.parentNode.insertBefore(a, m)\n    })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');\n\n    ga('create', 'UA-2028280-1', 'auto');\n    ga('send', 'pageview');\n</script>\n\n    <div class=\"modal\" id=\"cSlideMenu\" onclick=\"SlideMenu2()\">\n    </div>\n\n\n<div class=\"slideMenu cfont-12 ie-ele-none\" id=\"defaultSliderMenu\" style=\"max-height: 599px; overflow: auto;\">\n    <ul>\n        <!--\n        <li class=\"ele-pad-t-b-30\"></li>\n        -->\n        <li>\n            <a class=\"icon-circle-down\" href=\"javascript:void(0)\" onclick=\"showSubMenu(this,'.menuSearchType')\">Business search </a>\n            <ul class=\"hide menuSearchType\">\n                <li><a href=\"../../dynamic/categories.aspx\">Search by category</a></li>\n                <li><a href=\"../../dynamic/city_select.aspx\">Search by city</a></li>\n                <li><a href=\"../../searchbyphone.aspx\">Search by phone</a></li>\n                <li><a href=\"../../searchbyaddress.aspx\">Search by address</a></li>\n                <li><a href=\"../../searchbybrand.aspx\">Search by brand</a></li>\n            </ul>\n        </li>\n        <li>\n            <a class=\"icon-circle-down\" href=\"javascript:void(0)\" onclick=\"showSubMenu(this,'.menuSearchFap')\">People search</a>\n            <ul class=\"hide menuSearchFap\">\n                <li><a href=\"../../findaperson/findaperson.aspx?type=name\">Search by name</a></li>\n                <li><a href=\"../../findaperson/findaperson.aspx?type=number\">Search by number</a></li>\n            </ul>\n        </li>\n        <li>\n            <a class=\"icon-circle-down\" href=\"javascript:void(0)\" onclick=\"showSubMenu(this,'.menuGuides')\">Specialized Guides</a>\n            <ul class=\"hide menuGuides\">\n                <li><a href=\"../../dynamic/search.aspx?searchtype=cat&amp;class_id=4710\">Development Sector &amp; NGOs</a></li>\n                <li><a href=\"../../dynamic/search.aspx?searchtype=cat&amp;class_id=863\">Associations &amp; Trade Bodies</a></li>\n                <li><a href=\"../../dynamic/search.aspx?searchtype=cat&amp;class_id=864\">Chambers of Commerce</a></li>\n                <li><a href=\"../../dynamic/search.aspx?SearchType=cat&amp;class_id=1514\">Embassies &amp; Foreign Missions</a></li>\n                <li><a href=\"../../dynamic/categories.aspx?class_Id=65\">Import &amp; Export</a></li>\n                <li><a href=\"../../dynamic/search.aspx?SearchType=cat&amp;class_id=1517\">Federal Government</a></li>\n                <li><a href=\"../../dynamic/categories.aspx?class_id=4638\">Emergency &amp; Complain</a></li>\n                <li><a href=\"../../static/nwdcode.aspx\">NWD Codes</a></li>\n            </ul>\n        </li>\n        <li><a href=\"/advertisement-center/\">Advertise with us</a></li>\n        <li><a href=\"javascript:void(0)\">Help</a></li>\n    </ul>\n</div>\n<div class=\"modal in\" id=\"cSlideMenu\" onclick=\"SlideMenu2()\" aria-hidden=\"false\" style=\"display:none; padding-right: 17px;\">\n</div>\n\n\n\n<script type=\"text/javascript\" src=\"../css_responsive/script/global_functions.js\"></script>\n<script type=\"text/javascript\" src=\"../styles/scripts/fancybox/jquery.fancybox.js?v=2.1.5\"></script>\n<script type=\"text/javascript\" src=\"../css_responsive/bootstrap-3.3.4-dist/js/bootstrap.js\"></script>\n\n\n</body></html>"

soup =BeautifulSoup(html, "lxml")

all = soup.find_all(href=True)
for i in all:
    try:
        output = re.sub(r'(?is)(href="../../)([^.])', 'href="' + path.scheme + '://' + os.path.normpath(path.netloc) + '/'+r'\2', str(html))
    except:
        output = i

html = output

for i in all:
    try:
        output = re.sub(r'(?is)(href="../)([^.])', 'href="' + path.scheme + '://' + os.path.normpath(path.netloc) + '/'+r'\2', str(html))
    except:
        output = i

html = output

for i in all:
    try:
        output = re.sub(r'(?is)(href="/)([^./])', 'href="' + path.scheme +"://"+ path.netloc + '/'+r'\2', str(html))
    except:
        output = i

html = output


for i in all:
    try:
        output = re.sub(r'(?is)(href=")([^.|jav|ht|//|/|../|../../])', 'href="' + path.scheme + '://' + path.netloc + lpath+r'\2', str(html))
    except:
        output = i

html = output

all = soup.find_all(src=True)
for i in all:
    try:
        output = re.sub(r'(?is)(src="../)([^.])', 'src="' + path.scheme + '://' + os.path.normpath(path.netloc) + '/'+r'\2', str(html))
    except:
        output = i

html = output

for i in all:
    try:
        output = re.sub(r'(?is)(src="/)([^./])', 'src="' + path.scheme +"://"+ path.netloc + '/'+r'\2', str(html))
    except:
        output = i

html = output

for i in all:
    try:
        output = re.sub(r'(?is)(src="../../)([^.])', 'src="' + path.scheme + '://' + os.path.normpath(path.netloc) + '/'+r'\2', str(html))
    except:
        output = i

html = output

all = soup.find_all(action=True)
for i in all:
    try:
        output = re.sub(r'(?is)(action="../)([^.])', 'action="' + path.scheme + '://' + os.path.normpath(path.netloc) + '/'+r'\2', str(html))
    except:
        output = i

html = output

print (html)

Post a Comment for "Complete The Relative Paths To Absolute Using Python"