import xml.sax
'''
ContentHandler类方法介绍
* characters(content)方法
调用时机:
从行开始,遇到标签之前,存在字符,content的值为这些字符串。
从一个标签,遇到下一个标签之前, 存在字符,content的值为这些字符串。
从一个标签,遇到行结束符之前,存在字符,content的值为这些字符串。
标签可以是开始标签,也可以是结束标签。
* startDocument()方法
文档启动的时候调用。
* endDocument()方法
解析器到达文档结尾时调用。
* startElement(name, attrs)方法
遇到XML开始标签时调用,name是标签的名字,attrs是标签的属性值字典。
* endElement(name)方法
遇到XML结束标签时调用。
'''
class MovieHandler(xml.sax.ContentHandler):
def __init__(self):
self.CurrentData = ""
self.type = ""
self.format = ""
self.year = ""
self.rating = ""
self.stars = ""
self.description = ""
def startElement(self, tag, attributes):
self.CurrentData = tag
if tag == "movie":
print("****movie***")
title = attributes["title"]
print("Title:", title)
def endElement(self, tag):
if self.CurrentData == "type":
print("Type:", self.type)
elif self.CurrentData == "format":
print("Format:", self.format)
elif self.CurrentData == "year":
print("Year:", self.year)
elif self.CurrentData == "rating":
print("Rating:", self.rating)
elif self.CurrentData == "stars":
print("Stars:", self.stars)
elif self.CurrentData == "description":
print("Description:", self.description)
self.CurrentData = ""
def characters(self, content):
if self.CurrentData == "type":
self.type = content
elif self.CurrentData == "format":
self.format = content
elif self.CurrentData == "year":
self.year = content
elif self.CurrentData == "rating":
self.rating = content
elif self.CurrentData == "stars":
self.stars = content
elif self.CurrentData == "description":
self.description = content
if __name__ == "__main__":
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces, 0)
Handler = MovieHandler()
parser.setContentHandler(Handler)
parser.parse("./doc/1.xml")