682d4032 |
1 | <?php |
2 | /** |
3 | * Zend Framework |
4 | * |
5 | * LICENSE |
6 | * |
7 | * This source file is subject to the new BSD license that is bundled |
8 | * with this package in the file LICENSE.txt. |
9 | * It is also available through the world-wide-web at this URL: |
10 | * http://framework.zend.com/license/new-bsd |
11 | * If you did not receive a copy of the license and are unable to |
12 | * obtain it through the world-wide-web, please send an email |
13 | * to license@zend.com so we can send you a copy immediately. |
14 | * |
15 | * @category Zend |
16 | * @package Zend_Search_Lucene |
17 | * @subpackage Analysis |
18 | * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com) |
19 | * @license http://framework.zend.com/license/new-bsd New BSD License |
20 | */ |
21 | |
22 | |
23 | /** |
24 | * @category Zend |
25 | * @package Zend_Search_Lucene |
26 | * @subpackage Analysis |
27 | * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com) |
28 | * @license http://framework.zend.com/license/new-bsd New BSD License |
29 | */ |
30 | class Zend_Search_Lucene_Analysis_Token |
31 | { |
32 | /** |
33 | * The text of the term. |
34 | * |
35 | * @var string |
36 | */ |
37 | private $_termText; |
38 | |
39 | /** |
40 | * Start in source text. |
41 | * |
42 | * @var integer |
43 | */ |
44 | private $_startOffset; |
45 | |
46 | /** |
47 | * End in source text |
48 | * |
49 | * @var integer |
50 | */ |
51 | private $_endOffset; |
52 | |
53 | /** |
54 | * Lexical type. |
55 | * |
56 | * @var string |
57 | */ |
58 | private $_type; |
59 | |
60 | /** |
61 | * The position of this token relative to the previous Token. |
62 | * |
63 | * The default value is one. |
64 | * |
65 | * Some common uses for this are: |
66 | * Set it to zero to put multiple terms in the same position. This is |
67 | * useful if, e.g., a word has multiple stems. Searches for phrases |
68 | * including either stem will match. In this case, all but the first stem's |
69 | * increment should be set to zero: the increment of the first instance |
70 | * should be one. Repeating a token with an increment of zero can also be |
71 | * used to boost the scores of matches on that token. |
72 | * |
73 | * Set it to values greater than one to inhibit exact phrase matches. |
74 | * If, for example, one does not want phrases to match across removed stop |
75 | * words, then one could build a stop word filter that removes stop words and |
76 | * also sets the increment to the number of stop words removed before each |
77 | * non-stop word. Then exact phrase queries will only match when the terms |
78 | * occur with no intervening stop words. |
79 | * |
80 | * @var integer |
81 | */ |
82 | private $_positionIncrement; |
83 | |
84 | |
85 | /** |
86 | * Object constructor |
87 | * |
88 | * @param string $text |
89 | * @param integer $start |
90 | * @param integer $end |
91 | * @param string $type |
92 | */ |
93 | public function __construct($text, $start, $end, $type = 'word' ) |
94 | { |
95 | $this->_termText = $text; |
96 | $this->_startOffset = $start; |
97 | $this->_endOffset = $end; |
98 | $this->_type = $type; |
99 | |
100 | $this->_positionIncrement = 1; |
101 | } |
102 | |
103 | |
104 | /** |
105 | * positionIncrement setter |
106 | * |
107 | * @param integer $positionIncrement |
108 | */ |
109 | public function setPositionIncrement($positionIncrement) |
110 | { |
111 | $this->_positionIncrement = $positionIncrement; |
112 | } |
113 | |
114 | /** |
115 | * Returns the position increment of this Token. |
116 | * |
117 | * @return integer |
118 | */ |
119 | public function getPositionIncrement() |
120 | { |
121 | return $this->_positionIncrement; |
122 | } |
123 | |
124 | /** |
125 | * Returns the Token's term text. |
126 | * |
127 | * @return string |
128 | */ |
129 | public function getTermText() |
130 | { |
131 | return $this->_termText; |
132 | } |
133 | |
134 | /** |
135 | * Returns this Token's starting offset, the position of the first character |
136 | * corresponding to this token in the source text. |
137 | * |
138 | * Note: |
139 | * The difference between getEndOffset() and getStartOffset() may not be equal |
140 | * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered |
141 | * by a stemmer or some other filter. |
142 | * |
143 | * @return integer |
144 | */ |
145 | public function getStartOffset() |
146 | { |
147 | return $this->_startOffset; |
148 | } |
149 | |
150 | /** |
151 | * Returns this Token's ending offset, one greater than the position of the |
152 | * last character corresponding to this token in the source text. |
153 | * |
154 | * @return integer |
155 | */ |
156 | public function getEndOffset() |
157 | { |
158 | return $this->_endOffset; |
159 | } |
160 | |
161 | /** |
162 | * Returns this Token's lexical type. Defaults to 'word'. |
163 | * |
164 | * @return string |
165 | */ |
166 | public function getType() |
167 | { |
168 | return $this->_type; |
169 | } |
170 | } |
171 | |