'''
about what
'''
from SpiderUtil import *
ulist = []
depthDict = {}
if __name__ == "__main__":
startUrl = "http://www.baidu.com/s?wd=%E5%B2%9B%E5%9B%BD%20%E9%82%AE%E7%AE%B1"
depthDict[startUrl] = 1
ulist.append(startUrl)
html = getHtml(startUrl)
secondList = getPageUrl(startUrl, html)
for url in secondList:
if url not in depthDict:
depthDict[url] = 2
ulist.append(url)
for url in secondList:
thirdList = getPageUrl(url)
for url in thirdList:
if url not in depthDict:
depthDict[url] = 3
ulist.append(url)
for url in ulist:
print("\t\t\t" * depthDict[url], "爬取%d级页面:%s" % (depthDict[url], url))
print("main over")