from urllib.parse import urljoin from Common.PSoup import * class HtmlCommon: def handleHtmlString(self,htmlString,url,dic={}): psoup=PSoup() docBody=psoup.getPSoup(htmlString) bodyElement = docBody.find("body") # <editor-fold desc="填充字典参数的数据"> str="" for item in dic.items(): key,value = item; str=str+"<div id='"+key+"'>"+value+"</div>" if bodyElement!=None: bodyElement.append(str) else: htmlString = "<body>" + htmlString + "</body>" docBody = psoup.getPSoup(htmlString) bodyElement = docBody.find("body") bodyElement.append(str) htmlString = docBody.html() # </editor-fold> # <editor-fold desc="替换A标签和Img标签的路径"> docA = psoup.getPSoup(htmlString) elesA = docA.find("a") for da in elesA.items(): href=da.attr("href") if href!=None: nhref = urljoin(url, href) da.attr("href", nhref) htmlString = docA.html() docI = psoup.getPSoup(htmlString) elesI = docI.find("img") for ds in elesI.items(): src=ds.attr("src") if src!=None: nsrc=urljoin(url,src) ds.attr("src",nsrc) htmlString=docI.html() # </editor-fold> return htmlString