我在阅读《编程集体智慧》一书,关于搜索引擎的那一章时,遇到了以下代码片段,并在实现时遇到了错误。请帮助我解决这个问题。
import urllib2from BeautifulSoup import *from urlparse import urljoinclass crawler:def __init__(self,dbname): passdef __del__(self): passdef dbcommit(self): passdef getentryid(self,table,field,value,createnew=True): return Nonedef addtoindex(self,url,soup): print 'Indexing %s' % urldef gettextonly(self,soup): return Nonedef seperatewords(self,text): return Nonedef isindexed(self,url): return Falsedef addlinkref(self,urlFrom,urlTo,linkText): passdef crawl(self,pages,depth=2): for i in range(depth): newpages=set() for page in pages: try: c=urllib2.urlopen(page) except: print 'Could not open %s'%page continue soup=BeautifulSoup(c.read()) self.addtoindex(page,soup) links=soup('a') for link in links: if('href' in dict(link.attrs)): url=urljoin(page,link['href']) if url.find("'")!=-1: continue url=url.split('#')[0] if url[0:4]=='http' and not self.isindexed(url): newpages.add(url) linkText=self.gettextonly(link) self.addlinkref(page,url,linkText) self.dbcommit() pages=newpagesdef createindextables(self): pass
我得到了以下错误:
>>cwlr.crawl(pagelist)Indexing http://en.wikipedia.org/wiki/Artificial_neural_network---------------------------------------------------------------------------NameError Traceback (most recent call last)<ipython-input-50-97778b0c0db8> in <module>()----> 1 cwlr.crawl(pagelist)C:\Users\Blue\Anaconda\searchengine.py in crawl(self, pages, depth) 47 url=urljoin(page,link['href']) 48 if url.find("'")!=-1: continue---> 49 url=url.split('#')[0] 50 if url[0:4]=='http' and not self.isindexed(url): 51 newpages.add(url)NameError: global name 'linkTest' is not defined
回答:
NameError: global name ‘linkTest’ is not defined
您将linkText
拼写错误为linkTest
:
linkText=self.gettextonly(link) ↑self.addlinkref(page,url,linkTest) ↑