1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 package org.smartcrawler.extractor;
28
29 /***
30 *
31 *
32 * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
33 * @version <tt>$Revision: 1.3 $</tt>
34 */
35 public interface HtmlURL {
36
37 /*** Represents a link containing an absolute URL. */
38 public static final int LINK_ABSOLUTE_URL = 1;
39
40 /*** Represents a link containing an absolute URI. */
41 public static final int LINK_ABSOLUTE_URI = 2;
42
43 /*** Represents a link containing a relative URL. */
44 public static final int LINK_RELATIVE = 3;
45
46 /*** Represents the standard HTTP protocol prefix. */
47 public static final String PROTOCOL_PREF = "http://";
48
49 /*** Represents the URL path separator. */
50 public static final String PATH_SEP = "/";
51
52
53 /***
54 *
55 * @return
56 */
57 public boolean isValid();
58 /***
59 *
60 * @return
61 */
62 public int getType();
63 /***
64 *
65 * @return
66 */
67 public String getCleanedLinkAsString();
68 }