|
发表于 2006-9-6 17:35:22
|
显示全部楼层
A dirty snippet:
- #!/usr/bin/env python
- from sgmllib import SGMLParser
- s = """
- <html>
- <head>what's in</head>
- <td> hello
- <td> table1 blahblah </td>
- <td> table </td>
- </td>
- ok the end blah
- </html>
- """
- class Parse(SGMLParser):
- def reset(self):
- self.found_td = 0
- SGMLParser.reset(self)
- def start_td(self, attrs):
- self.found_td += 1
- def end_td(self):
- self.found_td -= 1
- def handle_data(self, text):
- if self.found_td > 0:
- print 'Data: %s' % text
- p = Parse()
- p.feed(s)
复制代码
For more info:
http://www.diveintopython.org/html_processing/index.html |
|