有高手可以帮帮我吗？为什么抓取网页总是显示为None.而且我用urllib库抓取网页时，IDEL总是什么也不显示。不知道是什么回事？ def get_page(url): try: import urllib return urllib.urlopen(url).read() except: return '' print get_page("http://www.baidu.com")

def get_next_target(page): start_link = page.find('<a href=') if start_link == -1: return None,0 start_quote = page.find('"',start_link) end_quote = page.find('"',start_quote + 1) url = page[start_quote + 1:end_quote] return url,end_quote

print get_next_target('this is a <a href="http://udacity.com">link!</a>')

url,endpos = get_next_target('this is a <a href="http://udacity.com">link!</a>')

print url,endpos

def print_all_links(page): while True: url,endpos = get_next_target(page) if url: print url page = page[endpos:] else: break

print_all_links('this <a href="test1">link 1</a> is <a href="test2">link 2</a> is <a #href="test3">link 3</a>')

print print_all_links(get_page('https://www.nvovn.com/'))