Core  3.2
PHP API documentation
 All Data Structures Namespaces Files Functions Variables Pages
LexGeneralFilter.php
Go to the documentation of this file.
1 <?php
2 /*
3  * @author Anakeen
4  * @package FDL
5 */
6 
7 namespace Dcp\Lex;
8 /**
9  * Analyse a general filter string
10  * @package Dcp\Lex
11  */
13 {
14  const T_ESCAPE = "T_ESCAPE";
15  const T_QUOTE = "T_QUOTE";
16  const T_WHITESPACE = "T_WHITESPACE";
17  const T_STAR_BEGIN = "T_STAR_BEGIN";
18  const T_STAR_END = "T_STAR_END";
19  const T_OPEN_PARENTHESIS = "T_OPEN_PARENTHESIS";
20  const T_CLOSE_PARENTHESIS = "T_CLOSE_PARENTHESIS";
21  const T_OR = "T_OR";
22  const T_AND = "T_AND";
23  const T_WORD = "T_WORD";
24  const T_PUNCTUATION = "T_PUNCTUATION";
25  const MODE_STRING = "string";
26  const MODE_WORD = "word";
27  const MODE_OPEN_PARENTHESIS = "open_parenthesis";
28  const MODE_PARTIAL_BEGIN = "partial_begin";
29  const MODE_PARTIAL_END = "partial_end";
30  const MODE_PARTIAL_BOTH = "partial_both";
31  const MODE_CLOSE_PARENTHESIS = "close_parenthesis";
32  const MODE_OR = "or";
33  const MODE_AND = "and";
34  /**
35  * List of tokens in priority order
36  * @var array
37  */
38  protected static $_terminals = array(
39  '/^(\\\)/' => self::T_ESCAPE,
40  '/^(\")/' => self::T_QUOTE,
41  '/^(\s+)/' => self::T_WHITESPACE,
42  '/^(OR)/' => self::T_OR,
43  '/^(AND)/' => self::T_AND,
44  '/^(\()/' => self::T_OPEN_PARENTHESIS,
45  '/^(\))/' => self::T_CLOSE_PARENTHESIS,
46  '/^(\*(?=\s|\z))/' => self::T_STAR_END,
47  '/^(\*)/' => self::T_STAR_BEGIN,
48  // '/^([\p{L}\p{N}-]+)/u' => self::T_WORD,
49  '/^([\p{L}\']?[\p{L}\p{N}]+(?:-[\p{L}][\p{L}\p{N}]*)?)/u' => self::T_WORD, // 2013-45 is not a word, but sous-marin is a word
50  '/^([\p{P}\p{S}])/u' => self::T_PUNCTUATION,
51  );
52  /**
53  * Analyze a general filter string
54  *
55  * @param string $source the filter
56  * @param bool $onlyToken use it if you only want the lexer token
57  *
58  * @return array
59  * array of filter elements ("mode" => word, string, partial_begin, partial_end, partial_both, open_parenthesis, close_parenthesis, and, or, "word" => currentWord)
60  * or array of token elements ("token" => token type (see $_terminals), "match" => matched string)
61  *
62  * @throws LexException
63  */
64  public static function analyze($source, $onlyToken = false)
65  {
66  $tokens = array();
67  $offset = 0;
68  while ($offset < strlen($source)) {
69  $result = static::_match($source, $offset);
70  if ($result === false) {
71  throw new LexException(sprintf(_("LEX_GENERAL_FILTER:Unable to parse %s") , $source));
72  }
73  $tokens[] = $result;
74  $offset+= strlen($result['match']);
75  }
76  if ($onlyToken) {
77  return $tokens;
78  } else {
79  return static::_convertToken($tokens);
80  }
81  }
82  /**
83  * Analyze a fragment of source
84  *
85  * @param string $line current line
86  * @param int $offset offset of the line
87  * @return array|bool current fragment or false
88  */
89  protected static function _match($line, $offset)
90  {
91  $string = substr($line, $offset);
92 
93  foreach (static::$_terminals as $pattern => $name) {
94  if (preg_match($pattern, $string, $matches)) {
95  return array(
96  'match' => $matches[1],
97  'token' => $name
98  );
99  }
100  }
101  return false;
102  }
103  /**
104  * Convert the tokens in filter element
105  *
106  * @param $tokens array of token
107  * @return array array of filter elements
108  */
109  protected static function _convertToken($tokens)
110  {
111  // Keys are stored in this array
112  $keys = array();
113  // Mode are word, partial_begin, partial_end, partial_both, string, false
114  $currentMode = false;
115 
116  $inEscape = false;
117  $inQuote=false;
118  $currentWord = "";
119  foreach ($tokens as $value) {
120  if ($inEscape) {
121  if ($currentMode === false) {
122  $currentMode = self::MODE_STRING;
123  }
124  if ($currentMode == self::MODE_WORD) {
125  $currentWord.= '\\';
126  }
127  $currentWord.= $value["match"];
128  $inEscape = false;
129  continue;
130  }
131  if ($value["token"] === self::T_ESCAPE) {
132  $inEscape = true;
133  continue;
134  }
135  if ($value["token"] === self::T_QUOTE) {
136  $inQuote=!$inQuote;
137  if ($currentMode === false) {
138  $currentMode = self::MODE_STRING;
139  continue;
140  } else if ($currentMode === self::MODE_STRING) {
141  $keys[] = array(
142  "word" => $currentWord,
143  "mode" => self::MODE_STRING
144  );
145  $currentWord = "";
146  $currentMode = false;
147  } else {
148  $currentWord.= $value["match"];
149  }
150  }
151 
152  if ($currentMode === self::MODE_STRING && $inQuote) {
153  $currentWord.= $value["match"];
154  continue;
155  }
156  if ($value["token"] === self::T_WHITESPACE) {
157  if ($currentWord !== "") {
158  $keys[] = array(
159  "word" => $currentWord,
160  "mode" => $currentMode
161  );
162  }
163  $currentWord = "";
164  $currentMode = false;
165  continue;
166  }
167  if ($value["token"] === self::T_STAR_BEGIN) {
168  if ($currentMode === false) {
169  $currentMode = self::MODE_PARTIAL_BEGIN;
170  } else {
171  $currentWord.= $value["match"];
172  }
173  }
174  if ($value["token"] === self::T_STAR_END) {
175  if ($currentMode === false || $currentMode === self::MODE_WORD) {
176  $currentMode = self::MODE_PARTIAL_END;
177  } else if ($currentMode === self::MODE_PARTIAL_BEGIN) {
178  $currentMode = self::MODE_PARTIAL_BOTH;
179  }
180  }
181  if ($value["token"] === self::T_OPEN_PARENTHESIS) {
182  $keys[] = array(
183  "mode" => self::MODE_OPEN_PARENTHESIS
184  );
185  continue;
186  }
187  if ($value["token"] === self::T_CLOSE_PARENTHESIS) {
188  if ($currentWord !== "") {
189  $keys[] = array(
190  "word" => $currentWord,
191  "mode" => $currentMode ? $currentMode : self::MODE_WORD
192  );
193  }
194  $currentWord = "";
195  $keys[] = array(
196  "mode" => self::MODE_CLOSE_PARENTHESIS
197  );
198  continue;
199  }
200  if ($value["token"] === self::T_OR) {
201  $keys[] = array(
202  "mode" => self::MODE_OR
203  );
204  continue;
205  }
206  if ($value["token"] === self::T_AND) {
207  $keys[] = array(
208  "mode" => self::MODE_AND
209  );
210  continue;
211  }
212  if ($value["token"] === self::T_WORD) {
213  if ($currentMode === false) {
214  $currentMode = self::MODE_WORD;
215  }
216  $currentWord.= $value["match"];
217  }
218  if ($value["token"] === self::T_PUNCTUATION) {
219  if ($currentMode === false || $currentMode === self::MODE_WORD) {
220  $currentMode = self::MODE_STRING;
221  }
222  $currentWord.= $value["match"];
223  }
224  }
225  if ($currentWord !== "") {
226  $keys[] = array(
227  "word" => $currentWord,
228  "mode" => $currentMode
229  );
230  }
231  return $keys;
232  }
233 }
234 
236 {
237 }
Exception class use exceptionCode to identifiy correctly exception.
Definition: exceptions.php:19
static analyze($source, $onlyToken=false)
static _convertToken($tokens)
static _match($line, $offset)
$value
← centre documentaire © anakeen