Core  3.2
PHP API documentation
 All Data Structures Namespaces Files Functions Variables Pages
Class.MailAddrParser.php
Go to the documentation of this file.
1 <?php
2 /*
3  * @author Anakeen
4  * @package FDL
5 */
6 
7 namespace Dcp\Mail;
8 
10 {
11 }
12 /**
13  * Class MailAddrParser
14  *
15  * Try to parse a UTF8 string containing multiple mail addresses in
16  * RFC5322/RFC2822 notation, and extract them as a list of
17  * \Dcp\MailAddress objects.
18  *
19  * Jöhn Dôé <john.doe@example.net>, "Foo, Bar <ACME Corp.>" <foo.bar@acme.corp>, xyz@example.net
20  *
21  * And returns a list of \Dcp\Mail\Address objects with display names
22  * and mail adresses :
23  *
24  * array(
25  * new \Dcp\Mail\Address("john.doe@example.net", "Jöhn Dôé"),
26  * new \Dcp\Mail\Address("foo.bar@acme.corp", "Foo, Bar <ACME Corp.>"),
27  * new \Dcp\Mail\Address("xyz@example.net", "")
28  * )
29  *
30  * Note:
31  * - It will not validate the mail addresses.
32  *
33  * @package Dcp\Mail
34  */
36 {
37  private $debug = false;
38  private $lax = false;
39  private $state = self::ST_lookingForStartOfMail;
40  private $esc = false;
41  private $encoding = 'UTF-8';
42  private $s = '';
43  private $p = 0;
44  /*
45  * Parser's states
46  */
47  const ST_lookingForStartOfMail = 'ST_lookingForStartOfMail';
48  const ST_inAngleMail = 'ST_inAngleMail';
49  const ST_inMail = 'ST_inMail';
50  const ST_inQuotedDisplayName = 'ST_inQuotedDisplayName';
51  const ST_lookingForAngleMail = 'ST_lookingForAngleMail';
52  const ST_lookingForSeparator = 'ST_lookingForSeparator';
53  const ST_end = 'ST_end';
54  /**
55  * Set laxist mode and do not throw exceptions. Misunderstood
56  * elements will be skipped.
57  *
58  * Default is bool(false).
59  *
60  * @param bool $lax
61  */
62  public function setLax($lax = true)
63  {
64  $this->lax = ($lax === true);
65  }
66  /**
67  * Print debugging information with PHP's error_log()
68  *
69  * Default is bool(false)
70  *
71  * @param bool $debug
72  */
73  public function setDebug($debug = true)
74  {
75  $this->debug = ($debug === true);
76  }
77  /**
78  * Set Multi-Byte string encoding.
79  *
80  * Default is 'UTF-8'.
81  *
82  * @param string $encoding
83  */
84  public function setEncoding($encoding = 'UTF-8')
85  {
86  $this->encoding = $encoding;
87  }
88  /**
89  * Check if we are after the end of the string.
90  *
91  * @return bool
92  */
93  private function eos()
94  {
95  return ($this->p >= mb_strlen($this->s, $this->encoding));
96  }
97  /**
98  * Check if next char would be after the end of the string.
99  *
100  * @return bool
101  */
102  private function eosNext()
103  {
104  return (($this->p + 1) >= mb_strlen($this->s, $this->encoding));
105  }
106  /**
107  * Peek char at current position
108  *
109  * @return string
110  */
111  private function peek()
112  {
113  return mb_substr($this->s, $this->p, 1, $this->encoding);
114  }
115  /**
116  * Advance the position in the string by 1 char.
117  */
118  private function next()
119  {
120  $this->p++;
121  }
122  /**
123  * Check if the given char is a space char.
124  *
125  * @param $c
126  * @return int
127  */
128  private function isSpace($c)
129  {
130  return preg_match('/^\s*$/u', $c);
131  }
132  /**
133  * Parse the given string and extract \Dcp\Mail\Address objects
134  *
135  * @param $s
136  * @return \Dcp\Mail\Address[]
137  * @throws MailAddrParserException
138  */
139  function parse($s)
140  {
141  $this->s = $s;
142  $this->p = 0;
143  $this->state = self::ST_lookingForStartOfMail;
144  $this->esc = false;
145  $addresses = array();
146  $mail = '';
147  $name = '';
148  if ($this->eos()) {
149  // Empty string
150  return $addresses;
151  }
152  while (!$this->eos()) {
153  $c = $this->peek();
154  if ($this->debug) {
155  error_log(__METHOD__ . " " . sprintf("(p=%s, state=%s) char='%s' {'%s', '%s'}%s", $this->p, $this->state, $c, $mail, $name, ($this->esc) ? ' (ESC)' : ''));
156  }
157  if ($c == '\\') {
158  if (!$this->esc) {
159  $this->esc = true;
160  $this->next();
161  continue;
162  }
163  }
164  switch ($this->state) {
165  case self::ST_lookingForStartOfMail:
166  if ($this->esc) {
167  $name.= $c;
168  $this->esc = false;
169  $this->next();
170  if ($this->eos()) {
171  $this->state = self::ST_end;
172  } else {
173  $this->state = self::ST_inMail;
174  }
175  } elseif ($this->isSpace($c) || $c == ",") {
176  $this->next();
177  if ($this->eos()) {
178  $this->state = self::ST_end;
179  }
180  } else {
181  $name = '';
182  $mail = '';
183  $this->state = self::ST_inMail;
184  }
185  break;
186 
187  case self::ST_lookingForSeparator:
188  if ($this->esc) {
189  $this->esc = false;
190  } elseif ($this->isSpace($c)) {
191  // Discard spaces
192 
193  } elseif ($c == ",") {
194  $this->state = self::ST_lookingForStartOfMail;
195  } else {
196  if ($this->lax) {
197  // Reset state to lookup next mail address
198  $mail = '';
199  $name = '';
200  } else {
201  throw new MailAddrParserException(sprintf("Unexpected char '%s' at position %d: '%s'\n", $c, $this->p, $s));
202  }
203  }
204  $this->next();
205  if ($this->eos()) {
206  $this->state = self::ST_end;
207  }
208  break;
209 
210  case self::ST_inMail:
211  if ($this->esc) {
212  $name.= $c;
213  $this->esc = false;
214  } elseif ($c == '"') {
215  $this->state = self::ST_inQuotedDisplayName;
216  } elseif ($c == '<') {
217  $this->state = self::ST_inAngleMail;
218  } elseif ($c == ',' || $this->eosNext()) {
219  if ($c != ',') {
220  // Append the last char and flush the mail
221  $name.= $c;
222  }
223  $mail = trim($name);
224  if ($this->debug) {
225  error_log(__METHOD__ . " " . sprintf("Got {'%s'}", $mail));
226  }
227  $addresses[] = new Address($mail);
228  $mail = '';
229  $name = '';
230  if ($this->eosNext()) {
231  $this->state = self::ST_end;
232  } else {
233  $this->state = self::ST_lookingForStartOfMail;
234  }
235  } else {
236  $name.= $c;
237  }
238  $this->next();
239  break;
240 
241  case self::ST_inQuotedDisplayName:
242  if ($this->esc) {
243  $this->esc = false;
244  $name.= $c;
245  } elseif ($c == '"') {
246  $this->state = self::ST_inMail;
247  } else {
248  $name.= $c;
249  }
250  $this->next();
251  break;
252 
253  case self::ST_lookingForAngleMail:
254  if ($this->esc) {
255  $this->esc = false;
256  } elseif ($this->isSpace($c)) {
257  // Discard leading spaces
258 
259  } elseif ($c == '<') {
260  $this->state = self::ST_inAngleMail;
261  } else {
262  if ($this->lax) {
263  // Reset state to lookup next mail address
264  $mail = '';
265  $name = '';
266  $this->state = self::ST_lookingForSeparator;
267  } else {
268  throw new MailAddrParserException(sprintf("Unexpected char '%s' at position %d: '%s'", $c, $this->p, $s));
269  }
270  }
271  $this->next();
272  break;
273 
274  case self::ST_inAngleMail:
275  if ($c == '>') {
276  $mail = trim($mail);
277  $name = trim($name);
278  if ($this->debug) {
279  error_log(__METHOD__ . " " . sprintf("Got {'%s', '%s'}", $mail, $name));
280  }
281  $addresses[] = new Address($mail, $name);
282  $mail = '';
283  $name = '';
284  if ($this->eosNext()) {
285  $this->state = self::ST_end;
286  } else {
287  $this->state = self::ST_lookingForSeparator;
288  }
289  } elseif ($c == '<') {
290  if ($this->lax) {
291  // Reset state to lookup next mail address
292  $mail = '';
293  $name = '';
294  $this->state = self::ST_lookingForSeparator;
295  } else {
296  throw new MailAddrParserException(sprintf("Unnexpected char '%s' at position %d: '%s'", $c, $this->p, $s));
297  }
298  } else {
299  $mail.= $c;
300  }
301  $this->next();
302  break;
303 
304  case self::ST_end:
305  break;
306 
307  default:
308  throw new MailAddrParserException(sprintf("Unknown state '%s'.", $this->state));
309  }
310  }
311  if (!$this->lax && $this->state != self::ST_end) {
312  throw new MailAddrParserException(sprintf("Unterminated string in state '%s': '%s'", $this->state, $s));
313  }
314  return $addresses;
315  }
316 }
Exception class use exceptionCode to identifiy correctly exception.
Definition: exceptions.php:19
setEncoding($encoding= 'UTF-8')
← centre documentaire © anakeen