from HTMLParser import HTMLParserimport sysclass TitleParser(HTMLParser): def __init__(self): self.title = '' self.readingtitle=0 HTMLParser.__init__(self) def handle_starttag(self, tag, attrs): if tag == 'title': self.readingtitle=1 def handle_data(self, data): if self.readingtitle: self.title +=data def handle_endtag(self, tag): if tag == 'title': self.readingtitle=0 def gettitle(self): return self.titlefd=open(sys.argv[1])tp=TitleParser()tp.feed(fd.read())print "Title is: ", tp.gettitle()
参考:Python网络编程基础。