001 /*
002 * Copyright 1999,2004 The Apache Software Foundation.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017 package org.apache.commons.feedparser.sax;
018
019 import java.util.HashMap;
020 import java.util.HashSet;
021
022 import org.apache.commons.feedparser.FeedParserException;
023 import org.apache.commons.feedparser.FeedParserListener;
024 import org.apache.commons.feedparser.FeedParserState;
025 import org.apache.commons.feedparser.FeedVersion;
026 import org.xml.sax.Attributes;
027 import org.xml.sax.SAXException;
028 import org.xml.sax.helpers.DefaultHandler;
029
030 /** *
031 * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
032 * @version $Id: RSSFeedParser.java 373622 2006-01-30 22:53:00Z mvdb $
033 */
034 public class RSSFeedParser extends BaseDefaultHandler {
035
036 public FeedParserListener listener = null;
037
038 boolean onItem = false;
039
040 HashMap properties = new HashMap();
041
042 FeedParserState state = new FeedParserState();
043
044 static HashSet RSS_NAMESPACES = new HashSet();
045
046 static HashSet RDF_NAMESPACES = new HashSet();
047
048 static HashSet MOD_CONTENT_NAMESPACES = new HashSet();
049
050 static {
051
052 RSS_NAMESPACES.add( "http://purl.org/rss/1.0/" );
053
054 RDF_NAMESPACES.add( "http://www.w3.org/1999/02/22-rdf-syntax-ns#" );
055
056 MOD_CONTENT_NAMESPACES.add( "http://purl.org/rss/1.0/modules/content/" );
057
058 }
059
060 /**
061 *
062 * Create a new <code>RSSFeedParser</code> instance.
063 *
064 *
065 */
066 public RSSFeedParser() {
067
068 super( "FIXME" );
069
070 this.parser = this;
071
072 setNext( new ChannelTemplate( this ) );
073
074 }
075
076 public void startDocument() throws SAXException {
077
078 try {
079
080 FeedVersion v = new FeedVersion();
081 v.isRSS = true;
082 listener.onFeedVersion( v );
083
084 listener.init();
085
086 } catch ( FeedParserException f ) {
087 throw new SAXException( f );
088 }
089
090 }
091
092 public void endDocument() throws SAXException {
093
094 try {
095
096 listener.finished();
097
098 } catch ( FeedParserException f ) {
099 throw new SAXException( f );
100 }
101
102 }
103
104 /**
105 * Match rss:channel
106 */
107 class ChannelTemplate extends BaseDefaultHandler {
108
109 public ChannelTemplate( RSSFeedParser parser ) {
110
111 super( "channel", parser.RSS_NAMESPACES, parser );
112
113 setNext( new URLTemplate( parser ) );
114
115 }
116
117 public void beginFeedElement() throws FeedParserException {
118
119 parser.listener.onChannel( parser.state,
120 getProperty( "title" ),
121 getProperty( "link" ),
122 getProperty( "description" ) );
123
124 }
125
126 public void endFeedElement() throws FeedParserException {
127 parser.listener.onChannelEnd();
128 }
129
130 }
131
132 /**
133 * Match rss:url for images/etc
134 */
135 class URLTemplate extends BaseDefaultHandler {
136
137 public URLTemplate( RSSFeedParser parser ) {
138
139 super( "url", parser.RSS_NAMESPACES, parser );
140
141 setNext( new ModContentTemplate( parser ) );
142 //this.setNext( new RDFValueTemplate( parser ) );
143
144 }
145
146 }
147
148 /**
149 * Match the rdf:value for mod_content
150 *
151 *
152 */
153 class ModContentTemplate extends BaseDefaultHandler {
154
155 public ModContentTemplate( RSSFeedParser parser ) {
156
157 super( "items", parser.MOD_CONTENT_NAMESPACES, parser );
158
159 this.setNext( new RDFValueTemplate( parser ) );
160
161 }
162
163 }
164
165 /**
166 * Match the rdf:value for mod_content
167 *
168 *
169 */
170 class RDFValueTemplate extends BaseDefaultHandler {
171
172 public RDFValueTemplate( RSSFeedParser parser ) {
173
174 super( "value", parser.RDF_NAMESPACES, parser );
175
176 this.setIncludeContent( true );
177 this.setNext( new RSSImageFeedParser( parser ) );
178
179 }
180
181 public void endFeedElement() throws FeedParserException {
182 //System.out.println( " FIXME: (debug): " + getProperty( "value" ) );
183 }
184
185 }
186
187 }
188
189 class RSSImageFeedParser extends BaseDefaultHandler {
190
191 public RSSImageFeedParser( RSSFeedParser parser ) {
192
193 super( "image", parser.RSS_NAMESPACES, parser );
194
195 setNext( new RSSItemFeedParser( parser ) );
196
197 }
198
199 public void beginFeedElement() throws FeedParserException {
200
201 parser.listener.onImage( parser.state,
202 getProperty( "title" ),
203 getProperty( "link" ),
204 getProperty( "url" ) );
205
206 }
207
208 public void endFeedElement() throws FeedParserException {
209 parser.listener.onImageEnd();
210 }
211
212 }
213
214 class RSSItemFeedParser extends BaseDefaultHandler {
215
216 public RSSItemFeedParser( RSSFeedParser parser ) {
217
218 super( "item", parser );
219 this.namespaces = parser.RSS_NAMESPACES;
220
221 setNext( new RSSTitleFeedParser( parser ) );
222
223 }
224
225 public void beginFeedElement() throws FeedParserException {
226
227 parser.listener.onItem( parser.state,
228 getProperty( "title" ),
229 getProperty( "link" ),
230 getProperty( "description" ),
231 null );
232
233 }
234
235 public void endFeedElement() throws FeedParserException {
236 parser.listener.onItemEnd();
237 }
238
239 }
240
241 class RSSTitleFeedParser extends BaseDefaultHandler {
242
243 public RSSTitleFeedParser( RSSFeedParser parser ) {
244
245 super( "title", parser );
246
247 setNext( new RSSLinkFeedParser( parser ) );
248
249 }
250
251 }
252
253 class RSSLinkFeedParser extends BaseDefaultHandler {
254
255 public RSSLinkFeedParser( RSSFeedParser parser ) {
256 super( "link", parser );
257
258 setNext( new RSSDescriptionFeedParser( parser ) );
259 }
260
261 }
262
263 class RSSDescriptionFeedParser extends BaseDefaultHandler {
264
265 public RSSDescriptionFeedParser( RSSFeedParser parser ) {
266 super( "description", parser );
267 }
268
269 }
270
271 /**
272 * dc:subject support
273 */
274 class RSSDcSubjectFeedParser extends BaseDefaultHandler {
275
276 //MetaFeedParserListener metadataListener= null;
277
278 public RSSDcSubjectFeedParser( RSSFeedParser parser ) {
279 super( "subject", parser );
280 }
281
282 public void beginFeedElement() {
283
284 //only if it's dc:subject
285 //listener.onSubject( parser.state, parser.getProperty( "subject" ) );
286
287 }
288
289 public void endFeedElement() {
290
291 }
292
293 }
294
295 class BaseDefaultHandler extends DefaultHandler {
296
297 public static int STRING_BUFFER_CAPACITY = 100000;
298
299 //BUG: this will break on nested code:
300
301 // <foo>
302 // <foo>
303 //
304 // </foo>
305 //
306 // </foo>
307
308 // won't be smart enough to realize it's nested
309
310 /**
311 * The local name of the element
312 */
313 private String local = null;
314
315 //FIXME: move to a FastStringBuffer that's not synchronized.
316 private StringBuffer buff = null;
317
318 private boolean onElement = false;
319
320 private boolean includeContent = false;
321
322 BaseDefaultHandler next = null;
323
324 FeedParserListener listener = null;
325
326 RSSFeedParser parser = null;
327
328 static HashMap nsPrefixMapping = new HashMap();
329
330 /**
331 * Store a hashset of namespaces that the given URL supports.
332 *
333 */
334 HashSet namespaces = null;
335
336 public BaseDefaultHandler( String local ) {
337 this.local = local;
338 }
339
340 public BaseDefaultHandler( String local, RSSFeedParser parser ) {
341
342 this.local = local;
343 this.parser = parser;
344
345 }
346
347 public BaseDefaultHandler( String local,
348 HashSet namespaces,
349 RSSFeedParser parser ) {
350
351 this.local = local;
352 this.namespaces = namespaces;
353 this.parser = parser;
354
355 }
356
357 /**
358 * If true we include the RAW XML content from the parser.
359 *
360 *
361 */
362 public void setIncludeContent( boolean includeContent ) {
363 this.includeContent = includeContent;
364 }
365
366 /**
367 * Set the next template to process in this chain.
368 *
369 *
370 */
371 public void setNext( BaseDefaultHandler next ) {
372 this.next = next;
373 }
374
375 /**
376 * Return the value of character data forfor the element.
377 *
378 *
379 */
380 public String toString() {
381
382 if ( buff == null )
383 return null;
384
385 if ( buff.length() == 0 )
386 return null;
387
388 return buff.toString();
389 }
390
391 /**
392 * Return true if the namespace is valid and this class is handling the
393 * given element name
394 *
395 *
396 */
397 boolean isLocal( String namespace, String local ) {
398
399 //wee if we need to test forfor namespaces
400 if ( namespace != null && namespaces != null && ! namespaces.contains( namespace ) )
401 return false;
402
403 return this.local.equals( local );
404 }
405
406 /**
407 * Get the value of a string property we found whilewhile parsing
408 *
409 *
410 */
411 public String getProperty( String name ) {
412 return (String)parser.properties.get( name );
413 }
414
415 public boolean getBoolean( String name ) {
416
417 return "true".equals( getProperty( name ) );
418
419 }
420
421 /**
422 * Method to call when we're finished processing this element but BEFORE
423 * processing of the next element in the chain.
424 *
425 *
426 */
427 public void beginFeedElement() throws FeedParserException {}
428
429 /**
430 * Method to call when we're finished processing this element but AFTER
431 * processing of the next element in the chain.
432 *
433 *
434 */
435 public void endFeedElement() throws FeedParserException {}
436
437 private boolean includeContentPrefix( String namespace ) {
438
439 if ( namespace != null ) {
440
441 String prefix = (String)nsPrefixMapping.get( namespace );
442
443 if ( prefix != null ) {
444
445 buff.append( prefix );
446 buff.append( ":" );
447 return true;
448 }
449
450 }
451
452 return false;
453
454 }
455
456 // **** SAX DefaultHandler **************************************************
457
458 /**
459 * Keep track of namespaces.
460 *
461 *
462 */
463 public void startPrefixMapping( String prefix,
464 String namespace ) throws SAXException {
465
466 if ( prefix != null && ! "".equals( prefix ) ) {
467 //System.out.println( namespace + " -> " + prefix );
468
469 nsPrefixMapping.put( namespace, prefix );
470
471 }
472
473 }
474
475 //FIXME: it might be possible to call an item again without a member and the
476 //value from the LAST item is used... this needs to be a fatal error and we
477 //need to clear ...
478
479 public void startElement( String namespace,
480 String local,
481 String qName,
482 Attributes attributes ) throws SAXException {
483
484 if ( isLocal( namespace, local ) ) {
485
486 //FIXME: is there a more efficient way to clear a buffer than this?
487
488 //FIXME: also only do this ifif it's necessary and content has
489 //actually been added. This will save some performance.
490
491 //buff = new StringBuffer( STRING_BUFFER_CAPACITY );
492
493 //buff = new StringBuffer( 1000 );
494
495 if ( buff == null ) {
496 buff = new StringBuffer( 1000 );
497 } else {
498 buff.setLength( 0 );
499 }
500
501 onElement = true;
502 }
503
504 if ( next != null )
505 next.startElement( namespace, local, qName, attributes );
506
507 if ( includeContent && onElement ) {
508 buff.append( "<" );
509
510 boolean hasPrefix = includeContentPrefix( namespace );
511
512 buff.append( local );
513
514 if ( ! hasPrefix && namespace != null ) {
515 buff.append( " xmlns=\"" );
516 buff.append( namespace );
517 buff.append( "\"" );
518 }
519
520 //now include attributes
521
522 int length = attributes.getLength();
523
524 for ( int i = 0; i < length; ++i ) {
525
526 buff.append( " " );
527 buff.append( attributes.getQName( i ) );
528 buff.append( "=" );
529 buff.append( "\"" );
530 buff.append( attributes.getValue( i ) );
531 buff.append( "\"" );
532
533 }
534
535 buff.append( ">" );
536 }
537
538 }
539
540 public void characters( char[] ch,
541 int start,
542 int length ) throws SAXException {
543
544 if ( onElement ) {
545 buff.append( ch, start, length );
546 }
547
548 if ( next != null )
549 next.characters( ch, start, length );
550
551 }
552
553 public void endElement( String namespace,
554 String local,
555 String qName ) throws SAXException {
556
557 try {
558
559 if ( isLocal( namespace, local ) ) {
560
561 onElement = false;
562 parser.properties.put( local, toString() );
563
564 beginFeedElement();
565
566 }
567
568 if ( next != null )
569 next.endElement( namespace, local, qName );
570
571 if ( isLocal( namespace, local ) )
572 endFeedElement();
573
574 if ( includeContent && onElement ) {
575 buff.append( "</" );
576
577 includeContentPrefix( namespace );
578
579 buff.append( local );
580
581 buff.append( ">" );
582 }
583
584 } catch ( FeedParserException fpe ) {
585
586 throw new SAXException( fpe );
587
588 }
589
590 }
591
592 }
593