1
2 /*
3 * SmartCrawler
4 *
5 * $Id: HtmlURL.java,v 1.3 2005/07/04 16:07:38 vincool Exp $
6 * Copyright 2005 Davide Pozza
7 *
8 * This program is free software; you can redistribute it
9 * and/or modify it under the terms of the GNU General Public
10 * License as published by the Free Software Foundation;
11 * either version 2 of the License, or (at your option) any
12 * later version.
13 *
14 * This program is distributed in the hope that it will be
15 * useful, but WITHOUT ANY WARRANTY; without even the implied
16 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17 * PURPOSE. See the GNU General Public License for more
18 * details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the Free
22 * Software Foundation, Inc., 59 Temple Place, Suite 330,
23 * Boston, MA 02111-1307 USA
24 *
25 */
26
27 package org.smartcrawler.extractor;
28
29 /***
30 *
31 *
32 * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
33 * @version <tt>$Revision: 1.3 $</tt>
34 */
35 public interface HtmlURL {
36
37 /*** Represents a link containing an absolute URL. */
38 public static final int LINK_ABSOLUTE_URL = 1;
39
40 /*** Represents a link containing an absolute URI. */
41 public static final int LINK_ABSOLUTE_URI = 2;
42
43 /*** Represents a link containing a relative URL. */
44 public static final int LINK_RELATIVE = 3;
45
46 /*** Represents the standard HTTP protocol prefix. */
47 public static final String PROTOCOL_PREF = "http://";
48
49 /*** Represents the URL path separator. */
50 public static final String PATH_SEP = "/";
51
52
53 /***
54 *
55 * @return
56 */
57 public boolean isValid();
58 /***
59 *
60 * @return
61 */
62 public int getType();
63 /***
64 *
65 * @return
66 */
67 public String getCleanedLinkAsString();
68 }