Core  3.2
PHP API documentation
 All Data Structures Namespaces Files Functions Variables Pages
Class.SearchHighlight.php
Go to the documentation of this file.
1 <?php
2 /*
3  * @author Anakeen
4  * @package FDL
5 */
6 /**
7  * Full Text Search document
8  *
9  * @author Anakeen
10  * @version $Id: fullsearch.php,v 1.10 2008/01/04 17:56:37 eric Exp $
11  * @package FDL
12  * @subpackage GED
13  */
14 /**
15  */
16 
17 include_once ("FDL/Class.SearchDoc.php");
18 include_once ("FDL/Class.DocSearch.php");
19 
20 include_once ("FDL/freedom_util.php");
21 
23 {
24 
25  private $dbid;
26  /**
27  * @var string limit size in Kb
28  */
29  private $limit = 200;
30 
31  public $beginTag = '<b>';
32  public $endTag = '</b>';
33 
34  public function __construct()
35  {
36  $this->dbid = getDbId(getDbAccess());
37  }
38 
39  public function setLimit($limit)
40  {
41  $this->limit = $limit;
42  }
43  public static function strtr8($s, $c1, $c2)
44  {
45  $s9 = utf8_decode($s);
46  $s9 = strtr($s9, utf8_decode($c1) , utf8_decode($c2));
47  return utf8_encode($s9);
48  }
49  /**
50  * return part of text where are found keywords
51  * use simply regexp replace
52  * @param string $s original text
53  * @param string $k keywords
54  * @return string HTML text with <b> tags
55  */
56  public function rawHighLight($s, $k)
57  {
58  $offsetStart = 100; // number of characters displayed before and after first result
59  $replace = $this->beginTag . '$1' . $this->endTag;
60 
61  $out = preg_replace("/($k)/iu", $replace, str_replace(array(
62  '£',
63  $this->beginTag,
64  $this->endTag
65  ) , array(
66  ' - ',
67  " ",
68  ""
69  ) , ($s)));
70  $begin = strpos($out, $this->beginTag);
71  $end = strpos($out, $this->endTag);
72  if ($begin === false) {
73  $out = "-";
74  } else {
75  $begin = strpos($out, " ", max(0, $begin - $offsetStart));
76  if ($begin === false) {
77  $begin = 0;
78  }
79  $end = strpos($out, " ", $end + $offsetStart);
80  if ($end === false) {
81  $end = $begin + 100;
82  }
83 
84  $out = substr($out, $begin, $end - $begin);
85  }
86  return $out;
87  }
88  /**
89  * return part of text where are found keywords
90  * Due to unaccent fulltext vectorisation need to transpose original text with highlight text done by headline tsearch2 sql function
91  * @param string $s original text
92  * @param string $k keywords
93  * @return string HTML text with <b> tags
94  */
95  public function highlight($s, $k)
96  {
97  $headline = '';
98  $k = trim($k);
99  if ($k == "") {
100  $h = str_replace('£', ' - ', substr($s, 0, 100));
101  $pos1 = mb_strpos($h, ' ');
102  $pos2 = mb_strrpos($h, ' ');
103  $headline = substr($h, $pos1, ($pos2 - $pos1));
104  } else if ((strlen($s) / 1024) > $this->limit) {
105  $headline = sprintf(_("document too big (%dKo): no highlight") , (strlen($s) / 1024));
106  } else {
107 
108  $k = preg_replace('/\s+/u', '&', unaccent($k));
109  // print_r("\n============\n\tK=$k\n");
110  $s = self::strtr8($s, "£", ",");
111  $s = preg_replace('/[ ]+ /u', ' ', $s);
112  $s = str_replace(array(
113  "<br />",
114  " \r",
115  "\n "
116  ) , array(
117  '',
118  '',
119  "\n"
120  ) , $s);
121 
122  $s = preg_replace('/<[a-z][^>]+>/i', '', $s);
123  $s = preg_replace('/<\/[a-z]+\s*>/i', '', $s);
124  $s = preg_replace('/<[a-z]+\/>/i', '', $s);
125  $s = preg_replace('/«/u', '"', $s);
126  $s = preg_replace('/»/u', '"', $s);
127  $s = preg_replace('/\p{C}/u', '', $s); // delete control characters
128  $s = preg_replace('/\p{S}/u', '', $s); // delete symbol characters
129  $us = unaccent($s);
130  //print_r("\n\tSL".mb_strlen($s).'=='.mb_strlen($us)."\n");
131  //print_r("\n\tS=$s\n");
132  //print_r("\n\tUS=$us\n");
133  $q = sprintf("select ts_headline('french','%s',to_tsquery('french','%s'),'MaxFragments=1,StartSel=%s, StopSel=%s')", pg_escape_string($us) , pg_escape_string($k) , pg_escape_string($this->beginTag) , pg_escape_string($this->endTag));
134  $result = pg_query($this->dbid, $q);
135  if (pg_numrows($result) > 0) {
136  $arr = pg_fetch_array($result, 0, PGSQL_ASSOC);
137  $headline = $arr["ts_headline"];
138  //print_r("\n\tL=$headline");
139 
140  }
141 
142  $pos = mb_strpos($headline, $this->beginTag);
143  if ($pos !== false) {
144  $sw = (str_replace(array(
145  $this->beginTag,
146  $this->endTag
147  ) , array(
148  '',
149  ''
150  ) , $headline));
151 
152  $offset = mb_strpos($us, $sw);
153 
154  if ($offset === false) return $headline; // case mismatch in characters
155  $nh = mb_substr($s, $offset, mb_strlen($sw));
156  //print_r("\n\tN=$nh\n");
157  //print "\nGOOD : $offset - ".mb_strlen($headline)."========\n";
158  // recompose headline with accents
159  $bo = mb_strpos($headline, $this->beginTag, 0);
160  while ($bo !== false) {
161  $nh = mb_substr($nh, 0, $bo) . $this->beginTag . mb_substr($nh, $bo);
162  $bo = mb_strpos($headline, $this->endTag, $bo);
163  if ($bo) {
164  $nh = mb_substr($nh, 0, $bo) . $this->endTag . mb_substr($nh, $bo);
165  $bo = mb_strpos($headline, $this->beginTag, $bo);
166  }
167  }
168  $headline = $nh;
169  }
170  }
171  return $headline;
172  }
173 }
174 ?>
static strtr8($s, $c1, $c2)
if($famId) $s
strtr8($s, $c1, $c2)
getDbAccess()
Definition: Lib.Common.php:368
unaccent($s)
Definition: Lib.Util.php:569
← centre documentaire © anakeen