Initial commit
[moodle.git] / search / Zend / Search / Lucene / Analysis / Token.php
CommitLineData
682d4032 1<?php
2/**
3 * Zend Framework
4 *
5 * LICENSE
6 *
7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
14 *
15 * @category Zend
16 * @package Zend_Search_Lucene
17 * @subpackage Analysis
18 * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 */
21
22
23/**
24 * @category Zend
25 * @package Zend_Search_Lucene
26 * @subpackage Analysis
27 * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
28 * @license http://framework.zend.com/license/new-bsd New BSD License
29 */
30class Zend_Search_Lucene_Analysis_Token
31{
32 /**
33 * The text of the term.
34 *
35 * @var string
36 */
37 private $_termText;
38
39 /**
40 * Start in source text.
41 *
42 * @var integer
43 */
44 private $_startOffset;
45
46 /**
47 * End in source text
48 *
49 * @var integer
50 */
51 private $_endOffset;
52
53 /**
54 * Lexical type.
55 *
56 * @var string
57 */
58 private $_type;
59
60 /**
61 * The position of this token relative to the previous Token.
62 *
63 * The default value is one.
64 *
65 * Some common uses for this are:
66 * Set it to zero to put multiple terms in the same position. This is
67 * useful if, e.g., a word has multiple stems. Searches for phrases
68 * including either stem will match. In this case, all but the first stem's
69 * increment should be set to zero: the increment of the first instance
70 * should be one. Repeating a token with an increment of zero can also be
71 * used to boost the scores of matches on that token.
72 *
73 * Set it to values greater than one to inhibit exact phrase matches.
74 * If, for example, one does not want phrases to match across removed stop
75 * words, then one could build a stop word filter that removes stop words and
76 * also sets the increment to the number of stop words removed before each
77 * non-stop word. Then exact phrase queries will only match when the terms
78 * occur with no intervening stop words.
79 *
80 * @var integer
81 */
82 private $_positionIncrement;
83
84
85 /**
86 * Object constructor
87 *
88 * @param string $text
89 * @param integer $start
90 * @param integer $end
91 * @param string $type
92 */
93 public function __construct($text, $start, $end, $type = 'word' )
94 {
95 $this->_termText = $text;
96 $this->_startOffset = $start;
97 $this->_endOffset = $end;
98 $this->_type = $type;
99
100 $this->_positionIncrement = 1;
101 }
102
103
104 /**
105 * positionIncrement setter
106 *
107 * @param integer $positionIncrement
108 */
109 public function setPositionIncrement($positionIncrement)
110 {
111 $this->_positionIncrement = $positionIncrement;
112 }
113
114 /**
115 * Returns the position increment of this Token.
116 *
117 * @return integer
118 */
119 public function getPositionIncrement()
120 {
121 return $this->_positionIncrement;
122 }
123
124 /**
125 * Returns the Token's term text.
126 *
127 * @return string
128 */
129 public function getTermText()
130 {
131 return $this->_termText;
132 }
133
134 /**
135 * Returns this Token's starting offset, the position of the first character
136 * corresponding to this token in the source text.
137 *
138 * Note:
139 * The difference between getEndOffset() and getStartOffset() may not be equal
140 * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
141 * by a stemmer or some other filter.
142 *
143 * @return integer
144 */
145 public function getStartOffset()
146 {
147 return $this->_startOffset;
148 }
149
150 /**
151 * Returns this Token's ending offset, one greater than the position of the
152 * last character corresponding to this token in the source text.
153 *
154 * @return integer
155 */
156 public function getEndOffset()
157 {
158 return $this->_endOffset;
159 }
160
161 /**
162 * Returns this Token's lexical type. Defaults to 'word'.
163 *
164 * @return string
165 */
166 public function getType()
167 {
168 return $this->_type;
169 }
170}
171