Backport utf fix for CakeText::tokenize().

This commit is contained in:
Mark Scherer 2015-07-13 08:23:05 +02:00
parent e07b64f4e3
commit 2eea245491
2 changed files with 17 additions and 11 deletions

View file

@ -304,6 +304,11 @@ class CakeTextTest extends CakeTestCase {
$expected = array('tagA', '"single tag"', 'tagB');
$this->assertEquals($expected, $result);
// Ideographic width space.
$result = CakeText::tokenize("tagA\xe3\x80\x80\"single\xe3\x80\x80tag\"\xe3\x80\x80tagB", "\xe3\x80\x80", '"', '"');
$expected = array('tagA', '"single tag"', 'tagB');
$this->assertEquals($expected, $result);
$result = CakeText::tokenize('');
$expected = array();
$this->assertEquals($expected, $result);

View file

@ -115,15 +115,15 @@ class CakeText {
$offset = 0;
$buffer = '';
$results = array();
$length = strlen($data);
$length = mb_strlen($data);
$open = false;
while ($offset <= $length) {
$tmpOffset = -1;
$offsets = array(
strpos($data, $separator, $offset),
strpos($data, $leftBound, $offset),
strpos($data, $rightBound, $offset)
mb_strpos($data, $separator, $offset),
mb_strpos($data, $leftBound, $offset),
mb_strpos($data, $rightBound, $offset)
);
for ($i = 0; $i < 3; $i++) {
if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
@ -131,22 +131,23 @@ class CakeText {
}
}
if ($tmpOffset !== -1) {
$buffer .= substr($data, $offset, ($tmpOffset - $offset));
if (!$depth && $data{$tmpOffset} === $separator) {
$buffer .= mb_substr($data, $offset, ($tmpOffset - $offset));
$char = mb_substr($data, $tmpOffset, 1);
if (!$depth && $char === $separator) {
$results[] = $buffer;
$buffer = '';
} else {
$buffer .= $data{$tmpOffset};
$buffer .= $char;
}
if ($leftBound !== $rightBound) {
if ($data{$tmpOffset} === $leftBound) {
if ($char === $leftBound) {
$depth++;
}
if ($data{$tmpOffset} === $rightBound) {
if ($char === $rightBound) {
$depth--;
}
} else {
if ($data{$tmpOffset} === $leftBound) {
if ($char === $leftBound) {
if (!$open) {
$depth++;
$open = true;
@ -157,7 +158,7 @@ class CakeText {
}
$offset = ++$tmpOffset;
} else {
$results[] = $buffer . substr($data, $offset);
$results[] = $buffer . mb_substr($data, $offset);
$offset = $length + 1;
}
}