class TEXT {
// tag=>max number of attributes
private $ValidTags = array('b'=>0, 'u'=>0, 'i'=>0, 's'=>0, '*'=>0, '#'=>0, 'artist'=>0, 'user'=>0, 'n'=>0, 'inlineurl'=>0, 'inlinesize'=>1, 'headline'=>1, 'align'=>1, 'color'=>1, 'colour'=>1, 'size'=>1, 'url'=>1, 'img'=>1, 'quote'=>1, 'pre'=>1, 'code'=>1, 'tex'=>0, 'hide'=>1, 'plain'=>0, 'important'=>0, 'torrent'=>0, 'rule'=>0, 'mature'=>1,
private $Smileys = array(
':angry:' => 'angry.gif',
':-D' => 'biggrin.gif',
':D' => 'biggrin.gif',
':|' => 'blank.gif',
':-|' => 'blank.gif',
':blush:' => 'blush.gif',
':cool:' => 'cool.gif',
':'(' => 'crying.gif',
':crying:' => 'crying.gif',
'>.>' => 'eyesright.gif',
':frown:' => 'frown.gif',
'<3' => 'heart.gif',
':unsure:' => 'hmm.gif',
//':\\' => 'hmm.gif',
':whatlove:' => 'ilu.gif',
':lol:' => 'laughing.gif',
':loveflac:' => 'loveflac.gif',
':flaclove:' => 'loveflac.gif',
':ninja:' => 'ninja.gif',
':no:' => 'no.gif',
':nod:' => 'nod.gif',
':ohno:' => 'ohnoes.gif',
':ohnoes:' => 'ohnoes.gif',
':omg:' => 'omg.gif',
':o' => 'ohshit.gif',
':O' => 'ohshit.gif',
':paddle:' => 'paddle.gif',
':(' => 'sad.gif',
':-(' => 'sad.gif',
':shifty:' => 'shifty.gif',
':sick:' => 'sick.gif',
':)' => 'smile.gif',
':-)' => 'smile.gif',
':sorry:' => 'sorry.gif',
':thanks:' => 'thanks.gif',
':P' => 'tongue.gif',
':p' => 'tongue.gif',
':-P' => 'tongue.gif',
':-p' => 'tongue.gif',
':wave:' => 'wave.gif',
';-)' => 'wink.gif',
':wink:' => 'wink.gif',
':creepy:' => 'creepy.gif',
':worried:' => 'worried.gif',
':wtf:' => 'wtf.gif',
':wub:' => 'wub.gif',
private $NoImg = 0; // If images should be turned into URLs
private $Levels = 0;
* The maximum amount of nesting allowed (exclusive)
* In reality n-1 nests are shown.
* @var int $MaximumNests
private $MaximumNests = 10;
* Used to detect and disable parsing (e.g. TOC) within quotes
* @var int $InQuotes
private $InQuotes = 0;
* Used to [hide] quote trains starting with the specified depth (inclusive)
* @var int $NestsBeforeHide
* This defaulted to 5 but was raised to 10 to effectively "disable" it until
* an optimal number of nested [quote] tags is chosen. The variable $MaximumNests
* effectively overrides this variable, if $MaximumNests is less than the value
* of $NestsBeforeHide.
private $NestsBeforeHide = 10;
* Array of headlines for Table Of Contents (TOC)
* @var array $HeadLines
private $Headlines;
* Counter for making headline URLs unique
* @var int $HeadLines
private $HeadlineID = 0;
* Depth
* @var array $HeadlineLevels
private $HeadlineLevels = array('1', '2', '3', '4');
* TOC enabler
* @var bool $TOC
private $TOC;
* @param bool $TOC When used, will enabled TOC
public function __construct ($TOC = false) {
$this->TOC = (boolean)$TOC;
foreach ($this->Smileys as $Key=>$Val) {
$this->Smileys[$Key] = '';
* Output BBCode as XHTML
* @param string $Str BBCode text
* @param bool $OutputTOC Ouput TOC near (above) text
* @param int $Min See {@link parse_toc}
* @return string
public function full_format ($Str, $OutputTOC = true, $Min = 3) {
$Str = display_str($Str);
$this->Headlines = array();
//Inline links
$URLPrefix = '(\[url\]|\[url\=|\[img\=|\[img\])';
$Str = preg_replace('/'.$URLPrefix.'\s+/i', '$1', $Str);
$Str = preg_replace('/(?TOC) {
$Str = preg_replace('/(\={5})([^=].*)\1/i', '[headline=4]$2[/headline]', $Str);
$Str = preg_replace('/(\={4})([^=].*)\1/i', '[headline=3]$2[/headline]', $Str);
$Str = preg_replace('/(\={3})([^=].*)\1/i', '[headline=2]$2[/headline]', $Str);
$Str = preg_replace('/(\={2})([^=].*)\1/i', '[headline=1]$2[/headline]', $Str);
} else {
$Str = preg_replace('/(\={4})([^=].*)\1/i', '[inlinesize=3]$2[/inlinesize]', $Str);
$Str = preg_replace('/(\={3})([^=].*)\1/i', '[inlinesize=5]$2[/inlinesize]', $Str);
$Str = preg_replace('/(\={2})([^=].*)\1/i', '[inlinesize=7]$2[/inlinesize]', $Str);
$Str = $this->parse($Str);
$HTML = $this->to_html($Str);
$HTML = nl2br($HTML);
if ($this->TOC && $OutputTOC)
$HTML = $this->parse_toc($Min) . $HTML;
return $HTML;
public function strip_bbcode ($Str) {
$Str = display_str($Str);
//Inline links
$Str = preg_replace('/(?parse($Str);
$Str = $this->raw_text($Str);
$Str = nl2br($Str);
return $Str;
private function valid_url ($Str, $Extension = '', $Inline = false) {
$Regex = '/^';
$Regex .= '(https?|ftps?|irc):\/\/'; // protocol
$Regex .= '(\w+(:\w+)?@)?'; // user:pass@
$Regex .= '(';
$Regex .= '(([0-9]{1,3}\.){3}[0-9]{1,3})|'; // IP or...
$Regex .= '(([a-z0-9\-\_]+\.)+\w{2,6})'; //
$Regex .= ')';
$Regex .= '(:[0-9]{1,5})?'; // port
$Regex .= '\/?'; // slash?
$Regex .= '(\/?[0-9a-z\-_.,&=@~%\/:;()+|!#]+)*'; // /file
if (!empty($Extension)) {
// query string
if ($Inline) {
$Regex .= '(\?([0-9a-z\-_.,%\/\@~&=:;()+*\^$!#|?]|\[\d*\])*)?';
} else {
$Regex .= '(\?[0-9a-z\-_.,%\/\@[\]~&=:;()+*\^$!#|?]*)?';
$Regex .= '(#[a-z0-9\-_.,%\/\@[\]~&=:;()+*\^$!]*)?'; // #anchor
$Regex .= '$/i';
return preg_match($Regex, $Str, $Matches);
public function local_url($Str) {
$URLInfo = parse_url($Str);
if (!$URLInfo) {
return false;
$Host = $URLInfo['host'];
// If for some reason your site does not require subdomains or contains a directory in the SITE_URL, revert to the line below.
//if ($Host == NONSSL_SITE_URL || $Host == SSL_SITE_URL || $Host == 'www.'.NONSSL_SITE_URL) {
if (empty($URLInfo['port']) && preg_match('/(\S+\.)*'.NONSSL_SITE_URL.'/', $Host)) {
$URL = '';
if (!empty($URLInfo['path'])) {
$URL .= ltrim($URLInfo['path'], '/'); // Things break if the path starts with '//'
if (!empty($URLInfo['query'])) {
$URL .= "?$URLInfo[query]";
if (!empty($URLInfo['fragment'])) {
$URL .= "#$URLInfo[fragment]";
return $URL ? "/$URL" : false;
} else {
return false;
How parsing works
Parsing takes $Str, breaks it into blocks, and builds it into $Array.
Blocks start at the beginning of $Str, when the parser encounters a [, and after a tag has been closed.
This is all done in a loop.
1) Find the next tag (regex)
1a) If there aren't any tags left, write everything remaining to a block and return (done parsing)
1b) If the next tag isn't where the pointer is, write everything up to there to a text block.
2) See if it's a [[wiki-link]] or an ordinary tag, and get the tag name
3) If it's not a wiki link:
3a) check it against the $this->ValidTags array to see if it's actually a tag and not [bullshit]
If it's [not a tag], just leave it as plaintext and move on
3b) Get the attribute, if it exists [name=attribute]
4) Move the pointer past the end of the tag
5) Find out where the tag closes (beginning of [/tag])
5a) Different for different types of tag. Some tags don't close, others are weird like [*]
5b) If it's a normal tag, it may have versions of itself nested inside - e.g.:
[quote=joe]I am a redneck!**[/quote]
Me too!
If we're at the position *, the first [/quote] tag is denoted by **.
However, our quote tag doesn't actually close there. We must perform
a loop which checks the number of opening [quote] tags, and make sure
they are all closed before we find our final [/quote] tag (***).
5c) Get the contents between [open] and [/close] and call it the block.
In many cases, this will be parsed itself later on, in a new parse() call.
5d) Move the pointer past the end of the [/close] tag.
6) Depending on what type of tag we're dealing with, create an array with the attribute and block.
In many cases, the block may be parsed here itself. Stick them in the $Array.
7) Increment array pointer, start again (past the end of the [/close] tag)
private function parse ($Str) {
$i = 0; // Pointer to keep track of where we are in $Str
$Len = strlen($Str);
$Array = array();
$ArrayPos = 0;
while ($i < $Len) {
$Block = '';
// 1) Find the next tag (regex)
// [name(=attribute)?]|[[wiki-link]]
$IsTag = preg_match("/((\[[a-zA-Z*#]+)(=(?:[^\n'\"\[\]]|\[\d*\])+)?\])|(\[\[[^\n\"'\[\]]+\]\])/", $Str, $Tag, PREG_OFFSET_CAPTURE, $i);
// 1a) If there aren't any tags left, write everything remaining to a block
if (!$IsTag) {
// No more tags
$Array[$ArrayPos] = substr($Str, $i);
// 1b) If the next tag isn't where the pointer is, write everything up to there to a text block.
$TagPos = $Tag[0][1];
if ($TagPos > $i) {
$Array[$ArrayPos] = substr($Str, $i, $TagPos - $i);
$i = $TagPos;
// 2) See if it's a [[wiki-link]] or an ordinary tag, and get the tag name
if (!empty($Tag[4][0])) { // Wiki-link
$WikiLink = true;
$TagName = substr($Tag[4][0], 2, -2);
$Attrib = '';
} else { // 3) If it's not a wiki link:
$WikiLink = false;
$TagName = strtolower(substr($Tag[2][0], 1));
//3a) check it against the $this->ValidTags array to see if it's actually a tag and not [bullshit]
if (!isset($this->ValidTags[$TagName])) {
$Array[$ArrayPos] = substr($Str, $i, ($TagPos - $i) + strlen($Tag[0][0]));
$i = $TagPos + strlen($Tag[0][0]);
$MaxAttribs = $this->ValidTags[$TagName];
// 3b) Get the attribute, if it exists [name=attribute]
if (!empty($Tag[3][0])) {
$Attrib = substr($Tag[3][0], 1);
} else {
$Attrib = '';
// 4) Move the pointer past the end of the tag
$i = $TagPos + strlen($Tag[0][0]);
// 5) Find out where the tag closes (beginning of [/tag])
// Unfortunately, BBCode doesn't have nice standards like XHTML
// [*], [img=...], and http:// follow different formats
// Thus, we have to handle these before we handle the majority of tags
//5a) Different for different types of tag. Some tags don't close, others are weird like [*]
if ($TagName == 'img' && !empty($Tag[3][0])) { //[img=...]
$Block = ''; // Nothing inside this tag
// Don't need to touch $i
} elseif ($TagName == 'inlineurl') { // We did a big replace early on to turn http:// into [inlineurl]http://
// Let's say the block can stop at a newline or a space
$CloseTag = strcspn($Str, " \n\r", $i);
if ($CloseTag === false) { // block finishes with URL
$CloseTag = $Len;
if (preg_match('/[!,.?:]+$/',substr($Str, $i, $CloseTag), $Match)) {
$CloseTag -= strlen($Match[0]);
$URL = substr($Str, $i, $CloseTag);
if (substr($URL, -1) == ')' && substr_count($URL, '(') < substr_count($URL, ')')) {
$URL = substr($URL, 0, -1);
$Block = $URL; // Get the URL
// strcspn returns the number of characters after the offset $i, not after the beginning of the string
// Therefore, we use += instead of the = everywhere else
$i += $CloseTag; // 5d) Move the pointer past the end of the [/close] tag.
} elseif ($WikiLink == true || $TagName == 'n') {
// Don't need to do anything - empty tag with no closing
} elseif ($TagName === '*' || $TagName === '#') {
// We're in a list. Find where it ends
$NewLine = $i;
do { // Look for \n[*]
$NewLine = strpos($Str, "\n", $NewLine + 1);
} while ($NewLine !== false && substr($Str, $NewLine + 1, 3) == "[$TagName]");
$CloseTag = $NewLine;
if ($CloseTag === false) { // block finishes with list
$CloseTag = $Len;
$Block = substr($Str, $i, $CloseTag - $i); // Get the list
$i = $CloseTag; // 5d) Move the pointer past the end of the [/close] tag.
} else {
//5b) If it's a normal tag, it may have versions of itself nested inside
$CloseTag = $i - 1;
$InTagPos = $i - 1;
$NumInOpens = 0;
$NumInCloses = -1;
$InOpenRegex = '/\[('.$TagName.')';
if ($MaxAttribs > 0) {
$InOpenRegex .= "(=[^\n'\"\[\]]+)?";
$InOpenRegex .= '\]/i';
// Every time we find an internal open tag of the same type, search for the next close tag
// (as the first close tag won't do - it's been opened again)
do {
$CloseTag = stripos($Str, "[/$TagName]", $CloseTag + 1);
if ($CloseTag === false) {
$CloseTag = $Len;
} else {
$NumInCloses++; // Majority of cases
// Is there another open tag inside this one?
$OpenTag = preg_match($InOpenRegex, $Str, $InTag, PREG_OFFSET_CAPTURE, $InTagPos + 1);
if (!$OpenTag || $InTag[0][1] > $CloseTag) {
} else {
$InTagPos = $InTag[0][1];
} while ($NumInOpens > $NumInCloses);
// Find the internal block inside the tag
$Block = substr($Str, $i, $CloseTag - $i); // 5c) Get the contents between [open] and [/close] and call it the block.
$i = $CloseTag + strlen($TagName) + 3; // 5d) Move the pointer past the end of the [/close] tag.
// 6) Depending on what type of tag we're dealing with, create an array with the attribute and block.
switch ($TagName) {
case 'inlineurl':
$Array[$ArrayPos] = array('Type'=>'inlineurl', 'Attr'=>$Block, 'Val'=>'');
case 'url':
$Array[$ArrayPos] = array('Type'=>'img', 'Attr'=>$Attrib, 'Val'=>$Block);
if (empty($Attrib)) { // [url]http://...[/url] - always set URL to attribute
$Array[$ArrayPos] = array('Type'=>'url', 'Attr'=>$Block, 'Val'=>'');
} else {
$Array[$ArrayPos] = array('Type'=>'url', 'Attr'=>$Attrib, 'Val'=>$this->parse($Block));
case 'quote':
$Array[$ArrayPos] = array('Type'=>'quote', 'Attr'=>$this->Parse($Attrib), 'Val'=>$this->parse($Block));
case 'img':
case 'image':
if (empty($Block)) {
$Block = $Attrib;
$Array[$ArrayPos] = array('Type'=>'img', 'Val'=>$Block);
case 'aud':
case 'mp3':
case 'audio':
if (empty($Block)) {
$Block = $Attrib;
$Array[$ArrayPos] = array('Type'=>'aud', 'Val'=>$Block);
case 'user':
$Array[$ArrayPos] = array('Type'=>'user', 'Val'=>$Block);
case 'artist':
$Array[$ArrayPos] = array('Type'=>'artist', 'Val'=>$Block);
case 'torrent':
$Array[$ArrayPos] = array('Type'=>'torrent', 'Val'=>$Block);
case 'tex':
$Array[$ArrayPos] = array('Type'=>'tex', 'Val'=>$Block);
case 'rule':
$Array[$ArrayPos] = array('Type'=>'rule', 'Val'=>$Block);
case 'pre':
case 'code':
case 'plain':
$Block = strtr($Block, array('[inlineurl]' => ''));
$Callback = function ($matches) {
$n = $matches[2];
$text = '';
if ($n < 5 && $n > 0) {
$e = str_repeat('=', $matches[2] + 1);
$text = $e . $matches[3] . $e;
return $text;
$Block = preg_replace_callback('/\[(headline)\=(\d)\](.*?)\[\/\1\]/i', $Callback, $Block);
$Block = preg_replace('/\[inlinesize\=3\](.*?)\[\/inlinesize\]/i', '====$1====', $Block);
$Block = preg_replace('/\[inlinesize\=5\](.*?)\[\/inlinesize\]/i', '===$1===', $Block);
$Block = preg_replace('/\[inlinesize\=7\](.*?)\[\/inlinesize\]/i', '==$1==', $Block);
$Array[$ArrayPos] = array('Type'=>$TagName, 'Val'=>$Block);
case 'hide':
$Array[$ArrayPos] = array('Type'=>'hide', 'Attr'=>$Attrib, 'Val'=>$this->parse($Block));
case 'mature':
$Array[$ArrayPos] = array('Type'=>'mature', 'Attr'=>$Attrib, 'Val'=>$this->parse($Block));
case '#':
case '*':
$Array[$ArrayPos] = array('Type'=>'list');
$Array[$ArrayPos]['Val'] = explode("[$TagName]", $Block);
$Array[$ArrayPos]['ListType'] = $TagName === '*' ? 'ul' : 'ol';
$Array[$ArrayPos]['Tag'] = $TagName;
foreach ($Array[$ArrayPos]['Val'] as $Key=>$Val) {
$Array[$ArrayPos]['Val'][$Key] = $this->parse(trim($Val));
case 'n':
break; // n serves only to disrupt bbcode (backwards compatibility - use [pre])
if ($WikiLink == true) {
$Array[$ArrayPos] = array('Type'=>'wiki','Val'=>$TagName);
} else {
// Basic tags, like [b] or [size=5]
$Array[$ArrayPos] = array('Type'=>$TagName, 'Val'=>$this->parse($Block));
if (!empty($Attrib) && $MaxAttribs > 0) {
$Array[$ArrayPos]['Attr'] = strtolower($Attrib);
$ArrayPos++; // 7) Increment array pointer, start again (past the end of the [/close] tag)
return $Array;
* Generates a navigation list for TOC
* @param int $Min Minimum number of headlines required for a TOC list
public function parse_toc ($Min = 3) {
if (count($this->Headlines) > $Min) {
$list = '
'.$Block['Val'].''; break; case 'code': $Str .= '
case 'list':
$Str .= "<$Block[ListType] class=\"postlist\">";
foreach ($Block['Val'] as $Line) {
$Str .= ''; } if (!empty($Block['Attr'])) { $Exploded = explode('|', $this->to_html($Block['Attr'])); if (isset($Exploded[1]) && (is_numeric($Exploded[1]) || (in_array($Exploded[1][0], array('a', 't', 'c', 'r')) && is_numeric(substr($Exploded[1], 1))))) { // the part after | is either a number or starts with a, t, c or r, followed by a number (forum post, artist comment, torrent comment, collage comment or request comment, respectively) $PostID = trim($Exploded[1]); $Str .= ''.$Exploded[0].' wrote: '; } else { $Str .= ''.$Exploded[0].' wrote: '; } } $Str .= ''.$this->to_html($Block['Val']).''; if ($this->InQuotes == $this->NestsBeforeHide) { //Close quote the deeply nested quote [hide]. $Str .= '
'.$this->to_html($Block['Val']).''; break; case 'mature': if (G::$LoggedUser['EnableMatureContent']) { if (!empty($Block['Attr'])) { $Str .= 'Mature content: ' . $Block['Attr'] . '
'.$this->to_html($Block['Val']).''; } else { $Str .= 'Use of the [mature] tag requires a description. The correct format is as follows: [mature=description] ...content... [/mature], where "description" is a mandatory description of the post. Misleading descriptions will be penalized. For further information on our mature content policies, please refer to this wiki.'; } } else { $Str .= 'Mature content has been blocked. You can choose to view mature content by editing your settings.'; } break; case 'img': if ($this->NoImg > 0 && $this->valid_url($Block['Val'])) { $Str .= ''.$Block['Val'].' (image)'; break; } if (!$this->valid_url($Block['Val'], '\.(jpe?g|gif|png|bmp|tiff)')) { $Str .= '[img]'.$Block['Val'].'[/img]'; } else { $LocalURL = $this->local_url($Block['Val']); if ($LocalURL) { $Str .= '