Backport utf fix for CakeText::tokenize().

This commit is contained in:
Mark Scherer 2015-07-13 08:23:05 +02:00
parent e07b64f4e3
commit 2eea245491
2 changed files with 17 additions and 11 deletions

View file

@ -304,6 +304,11 @@ class CakeTextTest extends CakeTestCase {
$expected = array('tagA', '"single tag"', 'tagB'); $expected = array('tagA', '"single tag"', 'tagB');
$this->assertEquals($expected, $result); $this->assertEquals($expected, $result);
// Ideographic width space.
$result = CakeText::tokenize("tagA\xe3\x80\x80\"single\xe3\x80\x80tag\"\xe3\x80\x80tagB", "\xe3\x80\x80", '"', '"');
$expected = array('tagA', '"single tag"', 'tagB');
$this->assertEquals($expected, $result);
$result = CakeText::tokenize(''); $result = CakeText::tokenize('');
$expected = array(); $expected = array();
$this->assertEquals($expected, $result); $this->assertEquals($expected, $result);

View file

@ -115,15 +115,15 @@ class CakeText {
$offset = 0; $offset = 0;
$buffer = ''; $buffer = '';
$results = array(); $results = array();
$length = strlen($data); $length = mb_strlen($data);
$open = false; $open = false;
while ($offset <= $length) { while ($offset <= $length) {
$tmpOffset = -1; $tmpOffset = -1;
$offsets = array( $offsets = array(
strpos($data, $separator, $offset), mb_strpos($data, $separator, $offset),
strpos($data, $leftBound, $offset), mb_strpos($data, $leftBound, $offset),
strpos($data, $rightBound, $offset) mb_strpos($data, $rightBound, $offset)
); );
for ($i = 0; $i < 3; $i++) { for ($i = 0; $i < 3; $i++) {
if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) { if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
@ -131,22 +131,23 @@ class CakeText {
} }
} }
if ($tmpOffset !== -1) { if ($tmpOffset !== -1) {
$buffer .= substr($data, $offset, ($tmpOffset - $offset)); $buffer .= mb_substr($data, $offset, ($tmpOffset - $offset));
if (!$depth && $data{$tmpOffset} === $separator) { $char = mb_substr($data, $tmpOffset, 1);
if (!$depth && $char === $separator) {
$results[] = $buffer; $results[] = $buffer;
$buffer = ''; $buffer = '';
} else { } else {
$buffer .= $data{$tmpOffset}; $buffer .= $char;
} }
if ($leftBound !== $rightBound) { if ($leftBound !== $rightBound) {
if ($data{$tmpOffset} === $leftBound) { if ($char === $leftBound) {
$depth++; $depth++;
} }
if ($data{$tmpOffset} === $rightBound) { if ($char === $rightBound) {
$depth--; $depth--;
} }
} else { } else {
if ($data{$tmpOffset} === $leftBound) { if ($char === $leftBound) {
if (!$open) { if (!$open) {
$depth++; $depth++;
$open = true; $open = true;
@ -157,7 +158,7 @@ class CakeText {
} }
$offset = ++$tmpOffset; $offset = ++$tmpOffset;
} else { } else {
$results[] = $buffer . substr($data, $offset); $results[] = $buffer . mb_substr($data, $offset);
$offset = $length + 1; $offset = $length + 1;
} }
} }