Added tests for TextHelper::truncate. Truncate now also fully respects HTML tags and characters if $considerHtml is passed and true. The third parameter of truncate can now also be an array consisting of any of the remaining params.

git-svn-id: https://svn.cakephp.org/repo/branches/1.2.x.x@6711 3807eeeb-6ff5-0310-8944-8be069107fe0
This commit is contained in:
joelmoss 2008-04-23 13:23:12 +00:00
parent aee33deb3a
commit 8fa2d1cf9c
2 changed files with 127 additions and 13 deletions

View file

@ -148,28 +148,112 @@ class TextHelper extends AppHelper {
* Cuts a string to the length of $length and replaces the last characters
* with the ending if the text is longer than length.
*
* @param string $text String to truncate.
* @param string $text String to truncate.
* @param integer $length Length of returned string, including ellipsis.
* @param string $ending Ending to be appended to the trimmed string.
* @param mixed $ending If string, will be used as Ending and appended to the trimmed string. Can also be an associative array that can contain the last three params of this method.
* @param boolean $exact If false, $text will not be cut mid-word
* @param boolean $considerHtml If true, HTML tags would be handled correctly
* @return string Trimmed string.
* @access public
*/
function truncate($text, $length = 100, $ending = '...', $exact = true) {
if (strlen($text) <= $length) {
return $text;
} else {
$truncate = substr($text, 0, $length - strlen($ending));
function truncate($text, $length = 100, $ending = '...', $exact = true, $considerHtml = false) {
if (is_array($ending)) {
extract($ending);
}
if ($considerHtml) {
// if the plain text is shorter than the maximum length, return the whole text
if (strlen(preg_replace('/<.*?>/', '', $text)) <= $length) {
return $text;
}
if (!$exact) {
$spacepos = strrpos($truncate, ' ');
// splits all html-tags to scanable lines
preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER);
$total_length = strlen($ending);
$open_tags = array();
$truncate = '';
if (isset($spacepos)) {
return substr($truncate, 0, $spacepos) . $ending;
foreach ($lines as $line_matchings) {
// if there is any html-tag in this line, handle it and add it (uncounted) to the output
if (!empty($line_matchings[1])) {
// if it's an "empty element" with or without xhtml-conform closing slash (f.e. <br/>)
if (preg_match('/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is', $line_matchings[1])) {
// do nothing
// if tag is a closing tag (f.e. </b>)
} elseif (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings)) {
// delete tag from $open_tags list
$pos = array_search($tag_matchings[1], $open_tags);
if ($pos !== false) {
unset($open_tags[$pos]);
}
// if tag is an opening tag (f.e. <b>)
} elseif (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings)) {
// add tag to the beginning of $open_tags list
array_unshift($open_tags, strtolower($tag_matchings[1]));
}
// add html-tag to $truncate'd text
$truncate .= $line_matchings[1];
}
// calculate the length of the plain text part of the line; handle entities as one character
$content_length = strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $line_matchings[2]));
if ($total_length+$content_length > $length) {
// the number of characters which are left
$left = $length - $total_length;
$entities_length = 0;
// search for html entities
if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $line_matchings[2], $entities, PREG_OFFSET_CAPTURE)) {
// calculate the real length of all entities in the legal range
foreach ($entities[0] as $entity) {
if ($entity[1]+1-$entities_length <= $left) {
$left--;
$entities_length += strlen($entity[0]);
} else {
// no more characters left
break;
}
}
}
$truncate .= substr($line_matchings[2], 0, $left+$entities_length);
// maximum lenght is reached, so get off the loop
break;
} else {
$truncate .= $line_matchings[2];
$total_length += $content_length;
}
// if the maximum length is reached, get off the loop
if ($total_length >= $length) {
break;
}
}
return $truncate . $ending;
} else {
if (strlen($text) <= $length) {
return $text;
} else {
$truncate = substr($text, 0, $length - strlen($ending));
}
}
// if the words shouldn't be cut in the middle...
if (!$exact) {
// ...search the last occurance of a space...
$spacepos = strrpos($truncate, ' ');
if (isset($spacepos)) {
// ...and cut the text in this position
$truncate = substr($truncate, 0, $spacepos);
}
}
// add the defined ending to the text
$truncate .= $ending;
if ($considerHtml) {
// close all unclosed html-tags
foreach ($open_tags as $tag) {
$truncate .= '</' . $tag . '>';
}
}
return $truncate;
}
/**
* Alias for truncate().

View file

@ -40,6 +40,36 @@ class TextTest extends UnitTestCase {
$this->Text = new TextHelper();
}
function testTruncate() {
$text1 = 'The quick brown fox jumps over the lazy dog';
$text2 = 'Heiz&ouml;lr&uuml;cksto&szlig;abd&auml;mpfung';
$text3 = '<b>&copy; 2005-2007, Cake Software Foundation, Inc.</b><br />written by Alexander Wegener';
$text4 = '<img src="mypic.jpg"> This image tag is not XHTML conform!<br><hr/><b>But the following image tag should be conform <img src="mypic.jpg" alt="Me, myself and I" /></b><br />Great, or?';
$text5 = '0<b>1<i>2<span class="myclass">3</span>4<u>5</u>6</i>7</b>8<b>9</b>0';
// normal truncate tests
$this->assertIdentical($this->Text->truncate($text1, 15), 'The quick br...');
$this->assertIdentical($this->Text->truncate($text1, 15, '...', false), 'The quick...');
$this->assertIdentical($this->Text->truncate($text1, 100), 'The quick brown fox jumps over the lazy dog');
$this->assertIdentical($this->Text->truncate($text2, 10, '...'), 'Heiz&ou...');
$this->assertIdentical($this->Text->truncate($text2, 10, '...', false), '...');
$this->assertIdentical($this->Text->truncate($text3, 20), '<b>&copy; 2005-20...');
$this->assertIdentical($this->Text->truncate($text4, 15), '<img src="my...');
$this->assertIdentical($this->Text->truncate($text5, 6, ''), '0<b>1<');
// html considering tests
$this->assertIdentical($this->Text->truncate($text1, 15, array('ending' => '...', 'exact' => true, 'considerHtml' => true)), 'The quick br...');
$this->assertIdentical($this->Text->truncate($text1, 15, '...', true, true), 'The quick br...');
$this->assertIdentical($this->Text->truncate($text1, 15, '...', false, true), 'The quick...');
$this->assertIdentical($this->Text->truncate($text2, 10, '...', true, true), 'Heiz&ouml;lr...');
$this->assertIdentical($this->Text->truncate($text2, 10, '...', false, true), '...');
$this->assertIdentical($this->Text->truncate($text3, 20, '...', true, true), '<b>&copy; 2005-2007, Cake...</b>');
$this->assertIdentical($this->Text->truncate($text4, 15, '...', true, true), '<img src="mypic.jpg"> This image ...');
$this->assertIdentical($this->Text->truncate($text4, 45, '...', true, true), '<img src="mypic.jpg"> This image tag is not XHTML conform!<br><hr/><b>But t...</b>');
$this->assertIdentical($this->Text->truncate($text4, 90, '...', true, true), '<img src="mypic.jpg"> This image tag is not XHTML conform!<br><hr/><b>But the following image tag should be conform <img src="mypic.jpg" alt="Me, myself and I" /></b><br />Grea...');
$this->assertIdentical($this->Text->truncate($text5, 6, '', true, true), '0<b>1<i>2<span class="myclass">3</span>4<u>5</u></i></b>');
}
function testHighlight() {
$text = 'This is a test text';
$phrases = array('This', 'text');