View Javadoc

1   
2   /*
3    * SmartCrawler
4    *
5    * $Id: QuickTest.java,v 1.9 2005/08/05 15:55:53 vincool Exp $
6    * Copyright 2005 Davide Pozza
7    *
8    * This program is free software; you can redistribute it
9    * and/or modify it under the terms of the GNU General Public
10   * License as published by the Free Software Foundation;
11   * either version 2 of the License, or (at your option) any
12   * later version.
13   *
14   * This program is distributed in the hope that it will be
15   * useful, but WITHOUT ANY WARRANTY; without even the implied
16   * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17   * PURPOSE. See the GNU General Public License for more
18   * details.
19   *
20   * You should have received a copy of the GNU General Public
21   * License along with this program; if not, write to the Free
22   * Software Foundation, Inc., 59 Temple Place, Suite 330,
23   * Boston, MA 02111-1307 USA
24   *
25   */
26  
27  package org.smartcrawler.examples;
28  
29  import org.smartcrawler.common.MalformedLinkException;
30  import org.smartcrawler.*;
31  
32  /***
33   * The engine thread which is started by the {@link org.smartcrawler.Crawler}
34   *
35   * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
36   * @version <tt>$Revision: 1.9 $</tt>
37   */
38  public class QuickTest {
39  
40      /***
41       * Creates a new instance of QuickTest
42       */
43      public QuickTest() {
44  
45      }
46      /***
47       * The main method
48       *
49       * @param args the command line arguments
50       */
51      public static void main(String[] args) {
52          String urlStr = null;
53          String configFileName = null;
54  
55          //only for test purpose
56          //urlStr = "http://www.alice.it";
57          //configFileName = "src/bin/conf/smartcrawler-config.xml";
58  
59          urlStr = "http://images.google.it/images?q=casa&hl=it";
60          configFileName = "examples/googleImages/conf/google_images-config.xml";
61          System.setProperty("extractionPatterns.file.path", "examples/googleImages/conf/extractPatterns.xml");
62          //urlStr = "http://www.nytimes.com";
63          //configFileName = "src/bin/conf/nyt_rss-config.xml";
64  
65          //configFileName = "src/bin/conf/yellowPages-config.xml";
66          //urlStr = "http://www.paginegialle.it/pg/cgi/pgsearch.cgi?btt=1&ts=1&l=1&cb=0&ind=&nc=&qs=albergo&dv=vicenza&x=0&y=0";
67          //urlStr = "http://pgd.paginegialle.it/66/ct=66&cc=337100290&cl=1&iq=000212487235030529042997&cb=0";
68  
69          //urlStr = "http://www.photosig.com/go/photos/browse?sort=id-d&page=1&id=1";
70          urlStr = "http://www.photosig.com/go/photos/view;jsessionid=5CACE7874611EFEBE567706E1565D291?id=1577721&forward=browse";
71          configFileName = "examples/photosig/conf/photosig-config.xml";
72          System.setProperty("extractionPatterns.file.path", "examples/photosig/conf/extractPatterns.xml");
73          try {
74  
75              new Crawler(urlStr, configFileName).startEngines();
76  
77          } catch (MalformedLinkException e){
78              System.out.println("Invalid initial link! " + urlStr);
79          } catch (Exception e){
80              System.out.println("Generic error");
81              e.printStackTrace();
82          }
83      }
84  }