Un pequeño ejemplo para hacer un parseo de XML (de un RSS) utilizando SAX:
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Stack;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.blackthorne.common.newsfeed.model.Feed;
import com.blackthorne.common.newsfeed.model.FeedMessage;
import com.blackthorne.common.newsfeed.util.DateParser;
/**
* RSSFeedParser to read and parse the feed, according 'source'
*
* @author Alejandro Hernández Pérez
* <p>
* Created: 29th August 2013<br>
* Modified (dd/mm/yy):
* <ul>
* <li>07/10/2013 - Using SAX to parse RSSFeed</li>
* </ul>
*/
public class RSSFeedParser {
/**
* URL to get the feed
*/
private URL url;
/**
* Receives the URL to read the feed
*
* @param feedUrl
*/
RSSFeedParser(String source) {
try {
url = new URL(source);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
}
/**
* Reads and Parses the response
*
* @return
*/
Feed readFeed() {
SAXParserFactory factory = SAXParserFactory.newInstance();
DefaultHandler handler = new RSSFeedParser.SaxHandler();
try {
// Setup a new RSSReader
InputStream rssInput = read();
InputSource is = new InputSource(rssInput);
is.setEncoding("UTF-8");
SAXParser saxParser = factory.newSAXParser();
saxParser.parse(rssInput, handler);
return ((SaxHandler) handler).getFeed();
} catch (Throwable err) {
err.printStackTrace();
}
return null;
}
/**
* Class to parse Elements of XML
*
* @author Alejandro Hernández Pérez
* <p>
* Created: 7th October 2013<br>
* Modified (dd/mm/yy):
* <ul>
* <li></li>
* </ul>
*/
static class SaxHandler extends DefaultHandler {
// Real object in XML File
Feed feed = null;
// Stack to be considered in elements
private Stack<String> elementStack = new Stack<String>();
// Elements to read in XML
static final String CHANNEL = "channel";
static final String ITEM = "item";
static final String TITLE = "title";
String title = "";
static final String DESCRIPTION = "description";
String description = "";
static final String LANGUAGE = "language";
String language = "";
static final String COPYRIGHT = "copyright";
String copyright = "";
static final String LINK = "link";
String link = "";
static final String AUTHOR = "author";
String author = "";
static final String PUB_DATE = "pubDate";
String pubdate = "";
static final String GUID = "guid";
String guid = "";
// Set header values initial
boolean isFeedHeader = true;
/**
* Get {@link Feed}
*/
public Feed getFeed() {
return feed;
}
/**
* Method to handle Stack of elements read in XML
*/
private String currentElement() {
return this.elementStack.peek();
}
/**
* Method to clean values when one item is closed
*/
private void initValues() {
description = "";
title = "";
link = "";
language = "";
copyright = "";
author = "";
pubdate = "";
guid = "";
}
/**
* Method executed when one element (in XML) starts
*/
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
elementStack.push(qName);
// In this case only cares when ITEM starts.
switch (qName) {
case ITEM:
if (isFeedHeader) {
isFeedHeader = false;
feed = new Feed(title, link, description, language,
copyright, pubdate);
initValues();
}
}
}
/**
* Method executed when one element (in XML) ends<br>
*/
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
this.elementStack.pop();
// In this case only cares when ITEM ends.
if (qName.equals(ITEM)) {
FeedMessage message = new FeedMessage();
message.setAuthor(author);
message.setDescription(description);
message.setGuid(guid);
message.setDate(pubdate);
message.setLink(link);
message.setTitle(title);
feed.getMessages().add(message);
initValues();
}
}
/**
* Method to read the content of element
*/
@Override
public void characters(char ch[], int start, int length)
throws SAXException {
String value = new String(ch, start, length).trim();
switch (currentElement()) {
// Title, Description and Link could have multiple lines
// Date is parsed by DateParser
case TITLE:
title = (isFeedHeader ? value : title + value);
break;
case DESCRIPTION:
description += value;
break;
case LINK:
link += value;
break;
case GUID:
guid = value;
break;
case LANGUAGE:
language = value;
break;
case AUTHOR:
author = value;
break;
case PUB_DATE:
pubdate = value;
pubdate = DateParser.parseDate(pubdate).toString();
break;
case COPYRIGHT:
copyright = value;
break;
}
}
}
/**
* Opens the Stream to read the feed
*
* @return
*/
private InputStream read() {
try {
return url.openStream();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Stack;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.blackthorne.common.newsfeed.model.Feed;
import com.blackthorne.common.newsfeed.model.FeedMessage;
import com.blackthorne.common.newsfeed.util.DateParser;
/**
* RSSFeedParser to read and parse the feed, according 'source'
*
* @author Alejandro Hernández Pérez
* <p>
* Created: 29th August 2013<br>
* Modified (dd/mm/yy):
* <ul>
* <li>07/10/2013 - Using SAX to parse RSSFeed</li>
* </ul>
*/
public class RSSFeedParser {
/**
* URL to get the feed
*/
private URL url;
/**
* Receives the URL to read the feed
*
* @param feedUrl
*/
RSSFeedParser(String source) {
try {
url = new URL(source);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
}
/**
* Reads and Parses the response
*
* @return
*/
Feed readFeed() {
SAXParserFactory factory = SAXParserFactory.newInstance();
DefaultHandler handler = new RSSFeedParser.SaxHandler();
try {
// Setup a new RSSReader
InputStream rssInput = read();
InputSource is = new InputSource(rssInput);
is.setEncoding("UTF-8");
SAXParser saxParser = factory.newSAXParser();
saxParser.parse(rssInput, handler);
return ((SaxHandler) handler).getFeed();
} catch (Throwable err) {
err.printStackTrace();
}
return null;
}
/**
* Class to parse Elements of XML
*
* @author Alejandro Hernández Pérez
* <p>
* Created: 7th October 2013<br>
* Modified (dd/mm/yy):
* <ul>
* <li></li>
* </ul>
*/
static class SaxHandler extends DefaultHandler {
// Real object in XML File
Feed feed = null;
// Stack to be considered in elements
private Stack<String> elementStack = new Stack<String>();
// Elements to read in XML
static final String CHANNEL = "channel";
static final String ITEM = "item";
static final String TITLE = "title";
String title = "";
static final String DESCRIPTION = "description";
String description = "";
static final String LANGUAGE = "language";
String language = "";
static final String COPYRIGHT = "copyright";
String copyright = "";
static final String LINK = "link";
String link = "";
static final String AUTHOR = "author";
String author = "";
static final String PUB_DATE = "pubDate";
String pubdate = "";
static final String GUID = "guid";
String guid = "";
// Set header values initial
boolean isFeedHeader = true;
/**
* Get {@link Feed}
*/
public Feed getFeed() {
return feed;
}
/**
* Method to handle Stack of elements read in XML
*/
private String currentElement() {
return this.elementStack.peek();
}
/**
* Method to clean values when one item is closed
*/
private void initValues() {
description = "";
title = "";
link = "";
language = "";
copyright = "";
author = "";
pubdate = "";
guid = "";
}
/**
* Method executed when one element (in XML) starts
*/
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
elementStack.push(qName);
// In this case only cares when ITEM starts.
switch (qName) {
case ITEM:
if (isFeedHeader) {
isFeedHeader = false;
feed = new Feed(title, link, description, language,
copyright, pubdate);
initValues();
}
}
}
/**
* Method executed when one element (in XML) ends<br>
*/
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
this.elementStack.pop();
// In this case only cares when ITEM ends.
if (qName.equals(ITEM)) {
FeedMessage message = new FeedMessage();
message.setAuthor(author);
message.setDescription(description);
message.setGuid(guid);
message.setDate(pubdate);
message.setLink(link);
message.setTitle(title);
feed.getMessages().add(message);
initValues();
}
}
/**
* Method to read the content of element
*/
@Override
public void characters(char ch[], int start, int length)
throws SAXException {
String value = new String(ch, start, length).trim();
switch (currentElement()) {
// Title, Description and Link could have multiple lines
// Date is parsed by DateParser
case TITLE:
title = (isFeedHeader ? value : title + value);
break;
case DESCRIPTION:
description += value;
break;
case LINK:
link += value;
break;
case GUID:
guid = value;
break;
case LANGUAGE:
language = value;
break;
case AUTHOR:
author = value;
break;
case PUB_DATE:
pubdate = value;
pubdate = DateParser.parseDate(pubdate).toString();
break;
case COPYRIGHT:
copyright = value;
break;
}
}
}
/**
* Opens the Stream to read the feed
*
* @return
*/
private InputStream read() {
try {
return url.openStream();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
Comentarios
Publicar un comentario