User:Spencerk/source
click 'edit' to get the proper formatting!
<?php
///////////////hi
$text=$_POST['text'];
//$text="test place";
global $text;
$debug=0;
//here's the plan. idiomparser(); translateparser(); //get language data basicparser(); pronounparser();
slowexplode(); //prepare input data joiner();
echohtml(); // now doitgo nuts brain(); hardwordslist();
//init set_time_limit(60); ini_set("memory_limit","20M");
function idiomparser(){ if (!($fp=@fopen("idiom.xml", "r")))
die ("Couldn't open XML.");
global $idiomdata; global $unwind; global $idiomcount; $idiomcount=0; $idiomdata=array(); $unwind=array(); $state=0;
if (!($xml_parser3 = xml_parser_create()))
die("Couldn't create parser.");
function starttag3 ($parser,$name,$attrib){
global $state; global $currentidiom;
$state=1;
$currentidiom=$name;
//echo "cs= $currentsimp";
}
function endtag3 ($parser,$name){ global $idiomcount; global $unwind; if($name=="SIMPLE") {$idiomcount++; }}
function data3 ($parser, $data) { global $currentidiom; global $state; global $idiomdata; global $unwind; global $ti; global $tu; if ( $state!=1){return;}
if($currentidiom==="SIMPLE"){ $unwind[$tu]=$data; $tu++; } if($currentidiom==="IDIOM"){$idiomdata[$ti]=$data; $ti++;} $state=0; }
xml_set_element_handler($xml_parser3,"starttag3","endtag3"); xml_set_character_data_handler( $xml_parser3, "data3"); while( $data = fread($fp, 4096)){ if(!xml_parse($xml_parser3, $data, feof($fp))) { break;}} xml_parser_free($xml_parser3); }//idiomparser
function translateparser(){
if (!($fp=@fopen("translate.xml", "r")))
die ("Couldn't open XML.");
global $simpledata; global $harddata; global $simplecount; $simplecount=0; $simpledata=array(); $harddata=array(); $state=0; if (!($xml_parser2 = xml_parser_create()))
die("Couldn't create parser.");
function starttag2 ($parser,$name,$attrib){
global $state; global $currentsimp;
$state=1;
$currentsimp=$name;
//echo "cs= $currentsimp";
}
function endtag2 ($parser,$name){ global $simplecount; global $simpledata; if($name=="SIMPLE") {$simplecount++;}}
function data2 ($parser, $data) { global $simplecount; global $currentsimp; global $state; global $simpledata; global $harddata; global $tsd; global $th; if ( $state!=1){return;}
if($currentsimp==="SIMPLE"){ $simpledata[$tsd]=$data; $tsd++; } if($currentsimp==="HARD"){$harddata[$th]=$data; $th++;} $state=0; }
xml_set_element_handler($xml_parser2,"starttag2","endtag2"); xml_set_character_data_handler( $xml_parser2, "data2"); while( $data = fread($fp, 4096)){ if(!xml_parse($xml_parser2, $data, feof($fp))) { break;}} xml_parser_free($xml_parser2); }//translateparser
function basicparser(){
global $basicdata, $basiccount; // get contents of a file into a string $filename = "basic.txt"; $handle = fopen($filename, "r"); $contents = fread($handle, filesize($filename)); $basicdata=explode("\n",$contents); fclose($handle); $basiccount=count($basicdata); //var_dump($basicdata); }//simpleparser
function pronounparser(){ global $pronoundata, $pronouncount; // get contents of a file into a string $filename = "pronoun.txt"; $handle = fopen($filename, "r"); $contents = fread($handle, filesize($filename)); $pronoundata=explode("\n",$contents); fclose($handle); $pronouncount=count($pronoundata); }//pronounparser
function echohtml(){ global $titlechunk; echo'<html>
<head> <title>everything is ok.</title>
<STYLE type="text/css">
a.words{ color: green; text-decoration: none }
words{ background: #EBEBEB; width: 750px; float: left; }
hardlist{ position:relative; top:5px; clear:both; float: left; width: 750px; background: #596F80; colour:white; font-size: large; } down { position:relative; top:5px; clear:both; float: left; width: 750px; colour:white; font-size: small; } pre { white-space: pre-wrap; /* css-3 */ white-space: -moz-pre-wrap !important; /* Mozilla, since 1999 */ white-space: -pre-wrap; /* Opera 4-6 */ white-space: -o-pre-wrap; /* Opera 7 */ word-wrap: break-word; /* Internet Explorer 5.5+ */ }
</STYLE> <script type="text/javascript"> function fnSelect(objId) { fnDeSelect(); if (document.selection) { var range = document.body.createTextRange();
range.moveToElementText(document.getElementById(objId));
range.select(); } else if (window.getSelection) { var range = document.createRange(); range.selectNode(document.getElementById(objId)); window.getSelection().addRange(range); } }
function fnDeSelect() { if (document.selection) document.selection.empty(); else if (window.getSelection)
window.getSelection().removeAllRanges();
} function toggle(a){
var div=document.getElementById(a);
var tmp=div.id.substring(0,3); //document.write(tmp); if (tmp=="new"){ var ntmp=div.id.replace(/^.../,""); var num=ntmp*1; var other="old"+num; document.getElementById(other).style.display="inline"; div.style.display="none";
}
if (tmp=="old"){ var ntmp=div.id.replace(/^.../,""); var num=ntmp*1; var other="new"+num; document.getElementById(other).style.display="inline"; div.style.display="none";
} }
</script>
</head>
<body bgcolor="#647D8F" link="#003D6B" alink="#003D6B" vlink="#003D6B">
'; //css,blah
if ($_POST['title']!==""&&$_POST['title']!==null){
echo ' ';
echo " "; //title echo $_POST['title']; echo" "; }
echo'<words>
<div id="selecto">'; } ////////////////////////////// function slowexplode(){ global$pagetitle, $text; $pagetitle=$_POST['title']; $bodytext=$text; //from above // //some punct $bodytext = str_replace("\'", "'", $bodytext); $bodytext = str_replace('\"', '"', $bodytext); $bodytext = str_replace("<", " <", $bodytext); $bodytext = str_replace(">", "> ", $bodytext); $bodytext = str_replace("*", "* ", $bodytext); $bodytext = str_replace("\r", " </br> ", $bodytext); $bodytext = str_replace("(", "( ", $bodytext); //dont ask. //formatting //change idioms here global $unwind; global $idiomcount; global $idiomdata; global $changes; $changes=0;/* for ($o=0;$o<$idiomcount; $o++) { if (preg_match("/$idiomdata[$o]/i",$bodytext)){ $bodytext = str_replace($idiomdata[$o], " <font color=\"green\"> <span id=\"new1\"style=\"display:inline;\"><a href=\"javascript:toggle('new1')\">$unwind[$o]</a></span> <span id=\"old1\" style=\"display:none;\"><a href=\"javascript:toggle('old1')\">$idiomdata[$o]</a></span> </font> ", $bodytext); $changes++; } }*/ //space explode global $words ; $words= explode (" ", $bodytext); //spaces // // // // // // //potential length problems }//slowexplode function joiner(){ //rejoin some now global $x; global $words; global $wordcount; $wordcount=count($words); //bigloop for ($x=0;$x<=$wordcount; $x++) {// if (fnmatch(" </font><font color=\"green\">*", $words[$x]) ) { if (!(preg_match("</font><font color=\"green\">[^&]*</font>/", $words[$x]))) {$flag=0; $inc=1; while($flag==0){ $new=$x+$inc; $words[$x].=" $words[$new]";$words[$new]=""; if (preg_match("</font><font color=\"green\">[^&]*</font>/", $words[$x])){ $flag=1; } if (strlen($words[$x])>4000){ if($debug){echo"**font error**";} $flag=1; } //panick $inc++; }//while } }//if if (fnmatch("'''*", $words[$x]) ) { if (preg_match("/'{3,6}[^']*'{3,6}.{0,3}/", $words[$x])) {$words[$x]="</font><font color=\"SteelBlue\">$words[$x]</font>";} else {$flag=0; $inc=1; $words[$x]="</font><font color=\"SteelBlue\">$words[$x]"; while($flag==0){ $new=$x+$inc; $words[$x].=" $words[$new]";$words[$new]=""; if (preg_match("/'{3,6}[^']*'{3,6}.{0,3}/", $words[$x])){ $words[$x].="</font>"; $flag=1; } if ($inc>29||strlen($words[$x])>4000){ if($debug){echo"**'''error**";} $flag=1; //panick } $inc++; }//while } }//if if (fnmatch("''*", $words[$x]) ) { if (preg_match("/'{2,6}[^']*'{2,6}.{0,3}/", $words[$x])) {$words[$x]="</font><font color=\"SteelBlue\">$words[$x]</font>";} else {$flag=0; $inc=1; $words[$x]="</font><font color=\"SteelBlue\">$words[$x]"; while($flag==0){ $new=$x+$inc; $words[$x].=" $words[$new]";$words[$new]=""; if (preg_match("/'{2,6}[^']*'{2,6}.{0,3}/", $words[$x])){ $words[$x].="</font>"; $flag=1; } if ($inc>29||strlen($words[$x])>4000){ if($debug){echo"**''error**";} $flag=1; //panick } $inc++; }//while } }//if if (fnmatch("'*", $words[$x]) ) { if (preg_match("/'{1,6}[^']*'{1,6}.{0,3}/", $words[$x])) {$words[$x]="</font><font color=\"SteelBlue\">$words[$x]</font>";} else {$flag=0; $inc=1; $words[$x]="</font><font color=\"SteelBlue\">$words[$x]"; while($flag==0){ $new=$x+$inc; $words[$x].=" $words[$new]";$words[$new]=""; if (preg_match("/'{1,6}[^']*'{1,6}.{0,3}/", $words[$x])){ $words[$x].="</font>"; $flag=1; } if ($inc>29||strlen($words[$x])>4000){ if($debug){echo"**'error**";} $flag=1; //panick } $inc++; }//while } }//if if (fnmatch("\"*", $words[$x]) ) { if (!(preg_match("/\"{1,2}[^\"]*\"{1,2}/", $words[$x]))) {$flag=0; $inc=1; while($flag==0){ $new=$x+$inc; $words[$x].=" $words[$new]";$words[$new]=""; if (preg_match("/\"{1,2}[^\"]*\"{1,2}/", $words[$x])){ $flag=1; } if (strlen($words[$x])>4000){ if($debug){echo"**\"error**";} $flag=1; //panick } $inc++; }//while } }//if if (fnmatch("<blockquote>*", $words[$x]) ) { if (!(preg_match("/<blockquote>{1,1}[^&]*<\/ ?blockquote ?>{1,1}/", $words[$x]))) {$flag=0; $inc=1; while($flag==0){ $new=$x+$inc; $words[$x].=" $words[$new]";$words[$new]=""; if (preg_match("/<blockquote>{1,1}.*<\/ ?blockquote ?>{1,1}/", $words[$x])){ $flag=1; } if (strlen($words[$x])>4000){ if($debug){echo"**blockerror**";} $flag=1; //panic } $inc++; }//while } }//if if (fnmatch("<ref>*", $words[$x]) ) { if (!(preg_match("/<ref>{1,1}[^&]*<\/ ?ref ?>{1,1}/", $words[$x]))) {$flag=0; $inc=1; while($flag==0){ $new=$x+$inc; $words[$x].=" $words[$new]";$words[$new]=""; if (preg_match("/<ref>{1,1}[^&]*<\/ ?ref ?>{1,1}/", $words[$x])){ $flag=1; } if (strlen($words[$x])>4000){ if($debug){echo"**ref error**";} $flag=1; //panick } $inc++; }//while } }//if if (fnmatch("==*", $words[$x]) ) { if (preg_match("/={2,6}.*={2,6}.{0,4}/", $words[$x])) {$words[$x]="</font><font color=\"SteelBlue\"></br>$words[$x]</br></font>";} else {$flag=0; $inc=1; $need=substr_count($words[$x], "="); $temp=$words[$x]; while($flag==0){ $new=$x+$inc; if(substr_count($words[$new], "=")){$need=$need-(substr_count($words[$new], "="));} $temp.=" $words[$new]"; if (preg_match("/={2,6}.*={2,6}.{0,4}/", $temp)&& $need==0) {for ($i=$x; $i<=$new; $i++) {$words[$i]="";} $words[$x]="</font><font color=\"SteelBlue\"></br>$temp</br></font>"; $flag=1; } if ($inc>49||strlen($words[$x])>4000){ if($debug){echo"**==error**";} $flag=1; //panick } $inc++; }//while } }//if if (fnmatch("{*", $words[$x]) ) { if (preg_match("/\{{1,6}.*\}{1,6}.{0,4}/", $words[$x])) {$words[$x]="</font><font color=\"SteelBlue\">$words[$x]</font>";} else {$flag=0; $inc=1; $need=substr_count($words[$x], "{"); $temp=$words[$x]; while($flag==0){ $new=$x+$inc; if(substr_count($words[$new], "{")){$need=$need+(substr_count($words[$new], "{"));} if(substr_count($words[$new], "}")){$need=$need-(substr_count($words[$new], "}"));} $temp.=" $words[$new]"; if (preg_match("/\{{1,6}.*\}{1,6}.{0,4}/", $temp)&& $need==0) {for ($i=$x; $i<=$new; $i++) {$words[$i]="";} $words[$x]="</font><font color=\"SteelBlue\"></br>$temp</br></font>"; $flag=1; } if ($inc>129||$need<0||strlen($words[$x])>4000){ if($debug){echo"**{}error**";} $flag=1; //panick } $inc++; }//while } }//if if (fnmatch("\[*", $words[$x])||fnmatch("*\[*", $words[$x]) ) { if (preg_match("/.{0,18}\[{1,6}.*\]{1,6}.{0,4}/", $words[$x])) {$words[$x]="</font><font color=\"SteelBlue\">$words[$x]</font>";} else {$flag=0; $inc=1; $need=substr_count($words[$x], "["); $temp=$words[$x]; while($flag==0){ $new=$x+$inc; if(substr_count($words[$new], "[")){$need=$need+(substr_count($words[$new], "["));} if(substr_count($words[$new], "]")){$need=$need-(substr_count($words[$new], "]"));} $temp.=" $words[$new]"; if (preg_match("/\[{1,6}.*\]{1,6}.{0,4}/", $temp)&& $need==0) {for ($i=$x; $i<=$new; $i++) {$words[$i]="";} $words[$x]="</font><font color=\"SteelBlue\">$temp</font>"; $flag=1; } if ($inc>129||$need<0||strlen($words[$x])>4000){ if($debug){echo"**[]error**";} $flag=1; //panick } $inc++; }//while } }//if }//for }//joins function brain(){ global $words ; global $titlechunk; global $hardwords; global $colour; global $basicdata; global $basiccount; global $simpledata; global $simplecount; global $harddata; global $pronoundata; global $pronouncount; global $wordcount; global $changes; //bigloop for($t=0; $t<=count($words); $t++) { $found=0; if ( strlen($words[$t])>4000){echo $words[$t];$found=1;} if ($found==0){$theword= strtolower($words[$t]);} if ($theword===""||$theword===" "||$theword==="\r"||$theword==="</br>"){$found=1;$wordcount--;} //ok //////// //for annoying doubling of ' if (substr_count($theword, "'")==4){$theword=str_replace('\'\'' , "'", $theword); } if (substr_count($theword, "'")==8){$theword=str_replace('\'\'\'\'' , "''", $theword); } if (substr_count($theword, "'")==12){$theword=str_replace('\'\'\'\'\'\'' , "'''", $theword); } //special text if ($found==0){ if (fnmatch("\"*", $theword)) { if (fnmatch("*\"", $theword)) { if ($colour!=="steel"){echo '</font><font color="SteelBlue">'; $colour="steel"; } echo "$words[$t] "; $found=1; } }}// quote if ($found==0){ if (fnmatch("<ref*", $theword)) { if (fnmatch("*\/ref>", $theword)) { if ($colour!=="SteelBlue"){echo '</font><font color="SteelBlue">'; $colour="SteelBlue"; } echo "$words[$t] "; $found=1; $wordcount--; } }}// tag if ($found==0){ //already channged with idiom if (fnmatch("</font><font color=\"green\">*", $theword)) { if (fnmatch("<\/font>", $theword)) { $colour="green"; echo "<b>$words[$t] </b>"; $found=1; }}}// if ($found==0){ //already channged if (fnmatch("</font><font color=\"SteelBlue\">*", $theword)) { if (fnmatch("<\/font>", $theword)) { $colour="steel"; echo "$words[$t] "; $found=1; }}}// //#s or punctuation if ($found==0){if (preg_match("/[0-9,\(,\),\+,&,%,$,#,=,\|,\*]/", $theword)) { if ($colour!=="steel"){echo '</font><font color="SteelBlue">'; $colour="steel"; } echo "$words[$t] "; $found=1; $wordcount--; }} if (fnmatch("\**", $theword)||fnmatch("\*", $theword)) {echo "</br>$words[$t]"; $found=1;} ////////////////////// //ok now lets get it on ////////////////// /* //fix a-an if there is one before a translating word if ($found==0){ //translate if a/an problem if ($theword==="a"||$theword==="an"){ $nextword=$theword[$t+1]; for ($i=0;$i<$simplecount; $i++) { if (fnmatch($harddata[$i], $nextword) || fnmatch("$harddata[$i]*", $nextword)){ if (preg_match("/^{$harddata[$i]}[,!,\.,\?,\),s,\',:,;]?$/i", $nextword))//(slower) { $tail= str_replace("$harddata[$i]" , "", $theword); if ($colour!=="green"){echo '</font><font color="green">'; $colour="green"; } if (preg_match("/\b[aeiou]/i", $simpledata[$i])) //if newtranslation begins in vowel {echo "an "; }//vowels else {echo "a ";} echo "<b>$simpledata[$i]$trail </b>"; //translate $t++; //because nextword is done tooimportant that smelting $found=1; $changes++; break; }}//ifs }//for }//if }//a/antranslator */ //// if ($found==0){ //normaltranslate for ($i=0;$i<$simplecount; $i++) { //echo $harddata[$i]; if (fnmatch("$harddata[$i]", $theword) || fnmatch("$harddata[$i]*", $theword)){//echo"is*"; if (preg_match("/^{$harddata[$i]}[ ,!,\.,\?,\),s,\',:,;]?$/i", $theword)) { $tail= str_replace("$harddata[$i]" , "", $theword); if ($colour!=="green"){echo '</font><font color="green">'; $colour="green"; } echo "<span id=\"new$changes\"style=\"display:inline;\"><a href=\"javascript:toggle('new$changes')\"class=\"words\"><b>$simpledata[$i]$tail </b></a></span><span id=\"old$changes\"style=\"display:none;\"><a href=\"javascript:toggle('old$changes')\"class=\"words\"><b>$theword </b></a></span>"; $found=1; $changes++;break; }}//ifs } //for }//translate /////////////////// /////////////// if ($found==0){ //basiccheck for ($i=0;$i<$basiccount; $i++) { //echo"$basicdata[$i]\n"; if (fnmatch($basicdata[$i], $theword) || fnmatch("$basicdata[$i]*", $theword)){ if (preg_match("/^{$basicdata[$i]}[\!,\.,\?,\),s,\',\:,]?$/i", $theword)) { if ($colour!=="blue"){echo '</font><font color="blue">'; $colour="blue"; } echo"$words[$t] " ; $found=1; break; }}//ifs } } /////// if ($found==0){ //or if it's thetitle if (fnmatch($pagetitle, $theword) || fnmatch("$pagetitle*", $theword)){ if (preg_match("/^{$pagetitle}[,!,\.,\?,\),s,\',:,;]?$/i", $theword)) { if ($colour!=="steel"){echo '</font><font color="SteelBlue">'; $colour="steel"; echo "$words[$t] " ; $found=1;} }}//ifs } //pronoun search (last resort) if($found==0){ for ($ip=0; $ip<=$pronouncount; $ip++) {if (fnmatch("$pronoundata[$ip]", $theword)){ if (preg_match("/^{$pronoundata[$ip]}[\,,\!,\.,\?,\),s,\']?$/i", $theword)) {$procaps=$pronoundata[$ip]; $procaps{0}= strtoupper($procaps{0}); if ($colour!=="blue"){echo '</font><font color="blue">'; $colour="blue"; } echo "$procaps "; $found=1; break; }}//ifs } } //for remaining if($found==0){ if ($colour!=="Chocolate"){echo '</font><font color="Chocolate">'; $colour="Chocolate"; } echo "$words[$t] "; $hardwords[$theword]++; //echo "[[wikt:$theword]]"; //wikt:tag } } //foreachwordloop echo " </div>
</words>";
}//brain
function hardwordslist(){
global $hardwords,$wordcount, $changes;
echo"<hardlist>
<a href=\"javascript:fnSelect('selecto');\">(select)</a>";
echo "
blue are simple,
(or special),
green have changed,
and these haven't.
";
if ($changes>0 && $wordcount>0){
$numtrans=(int)(($changes/$wordcount)*100);
if ( $changes!=0){echo" $changes translations ($numtrans%)
";}
else{echo"no translations
";}
}
else{echo"no translations
";}
//hardcount if ($hardwords>0&&$wordcount>0){ for ($h=0; $h<=4; $h++){//sort $tmp=0; foreach ($hardwords as $key => $the) { if ($the>$tmp && $key!==$big[0] && $key!=$big[1]&& $key!==$big[2]&& $key!==$big[3]) {$big[$h]=$key; $tmp=$the;} }}//
echo"
Most common hard words:
";
echo""; for ($h=0; $h<count($big); $h++){echo "$big[$h], ";} echo""; $numhard=(int)((count($hardwords)/$wordcount)*100);
echo"
$numhard% hard.";
}//if
echo"</hardlist>";
}
//
echo "
<down>
'; } else { echo'<a href="http://www.spencerwaterbed.com/soft/simple/randm.php">again</a> '; } ?>
"; //if was random if ($_POST['title']!==""&&$_POST['title']!==null){ echo '<a href="';echo "http://simple.wikipedia.org/w/index.php?title=";echo $_POST['title']; echo '&action=edit&redlink=1">put it in simple.wikipedia</a> '; echo" | ";
echo'<a href="http://www.spencerwaterbed.com/soft/simple/randm.php">simplify again</a> |
byspencer.opensource. <a href="http://www.spencerwaterbed.com/soft/simple/about.html"><about></a> |
</down>
</body> </html>