View Javadoc

1   
2   /*
3    * SmartCrawler
4    *
5    * $Id: PatternProvider.java,v 1.6 2005/08/05 15:55:53 vincool Exp $
6    * Copyright 2005 Davide Pozza
7    *
8    * This program is free software; you can redistribute it
9    * and/or modify it under the terms of the GNU General Public
10   * License as published by the Free Software Foundation;
11   * either version 2 of the License, or (at your option) any
12   * later version.
13   *
14   * This program is distributed in the hope that it will be
15   * useful, but WITHOUT ANY WARRANTY; without even the implied
16   * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17   * PURPOSE. See the GNU General Public License for more
18   * details.
19   *
20   * You should have received a copy of the GNU General Public
21   * License along with this program; if not, write to the Free
22   * Software Foundation, Inc., 59 Temple Place, Suite 330,
23   * Boston, MA 02111-1307 USA
24   *
25   */
26  
27  package org.smartcrawler.extractor;
28  import java.io.File;
29  import java.io.FileInputStream;
30  import java.io.InputStream;
31  import java.net.URL;
32  import java.util.ArrayList;
33  import java.util.List;
34  import org.apache.commons.digester.Digester;
35  import org.apache.log4j.Logger;
36  import org.smartcrawler.common.SCLogger;
37  import org.smartcrawler.extractor.pattern.AbstractPattern;
38  import org.smartcrawler.extractor.pattern.ConcretePattern;
39  
40  /***
41   *
42   *
43   * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
44   * @version <tt>$Revision: 1.6 $</tt>
45   */
46  public class PatternProvider {
47  
48      private static PatternProvider paProv = null;
49      private AbstractPattern[] patterns = null;
50      private static Logger log = SCLogger.getLogger(PatternProvider.class);
51  
52      /*** Creates a new instance of PatternProvider */
53      private PatternProvider() {
54          init();
55      }
56  
57      /***
58       *
59       * @return
60       */
61      public static synchronized PatternProvider instance() {
62          if (paProv == null) {
63              paProv = new PatternProvider();
64          }
65          return paProv;
66      }
67  
68      private void init() {
69          InputStream input = null;
70          try {
71              String path = System.getProperty("extractionPatterns.file.path");
72              File customPatternsXml = null;
73              if (path != null) {
74                  customPatternsXml = new File(System.getProperty("extractionPatterns.file.path"));
75              }
76              if (customPatternsXml != null && customPatternsXml.exists() && customPatternsXml.isFile()) {
77                  //use custom settings
78                  input = new FileInputStream(customPatternsXml);
79                  log.info("Loaded extraction patterns file: "
80                          + customPatternsXml.getAbsolutePath());
81              } else {
82                  //use default settings
83                  URL source = getClass().getResource("/extractPatterns.xml");
84                  input = source.openStream();
85                  log.info("Loaded default extraction patterns file");
86              }
87              Digester digester = new Digester();
88              // Push empty List onto Digester's Stack
89              List list = new ArrayList();
90              digester.push(list);
91              digester.addObjectCreate("extractionPatterns/pattern",
92                      ConcretePattern.class);
93              digester.addSetNext("extractionPatterns/pattern",
94                      "add",
95                      "java.lang.Object");
96              digester.addBeanPropertySetter("extractionPatterns/pattern/expression",
97                      "stringPattern");
98              digester.addSetProperties( "extractionPatterns/pattern", "group", "group" );
99              digester.addSetProperties( "extractionPatterns/pattern", "tagName", "tagName" );
100 
101 
102             digester.parse( input );
103             patterns = (AbstractPattern[]) list.toArray(new AbstractPattern[list.size()]);
104         } catch (Exception e) {
105             log.fatal("Unable to load extraction patterns", e);
106         } finally {
107             try {
108                 input.close();
109             }
110             catch (Exception e){}
111 
112         }
113     }
114 
115     /***
116      *
117      * @return
118      */
119     public AbstractPattern[] getPatterns() {
120         return patterns;
121 
122     }
123 }