User:PeerInfinity/Scripts/SyncArticleLinks.php

From Lesswrongwiki

Jump to: navigation, search

<?php

// SyncArticleLinks.php // synchronize the list of article links on the concept pages with the list of concepts on the "All Articles" pages

// for the latest version of this script's output, see:

///stderr output - the stderr output of this script

///SyncArticleLinksOutput.txt - the output written to the text file, containing the Sync results

//
  $stderr = fopen( "php://stderr", "wt" );  //*  $XMLfile = "daily_XML_dump.xml";   // the arrays for the data read from the All Articles pages:  // it would be more proper to create a struct for this, but for now I'll just do it the quick and dirty way  $NextAllArticleIndex    = 0;  // these arrays are indexed by $NextAllArticleIndex  $ArrayAllArticleTitle              = array(); $ArrayAllArticleLink               = array(); $ArrayAllArticleIndexedConcepts    = array(); // this is an array of arrays! $ArrayAllArticleFoundConcepts      = array(); // this is an array of arrays! $ArrayAllArticleNotIndexedConcepts = array(); // this is an array of arrays! $ArrayAllArticleAuthor             = array(); $ArrayAllArticleDate               = array(); // currently unused, This would need to be read by following the link to the article, which might be a good idea to implement eventually $ArrayAllArticleOfficialSummaries  = array(); // this is an array of arrays!  //this one is indexed by article name! $ArrayAllArticleUsedSummaries      = array(); // this is an array of arrays!   // the array for all Concepts found  // these arrays are indexed by concept title  $ConceptFound = array(); $ConceptNotInIndex = array(); $ConceptThatAreRedirects = array(); $ConceptSeeAlso = array(); // this is an array of arrays!  // these arrays are indexed by concept title  $PagesWithOvercomingBiasLinks = array(); $PagesWithComments = array(); $PagesWithOvercomingBiasArticlesHeader = array(); $PagesWithExternalReferences = array(); $PagesWithSeeAlso = array(); $PagesWithExternalAuthorLinks = array(); $PagesWithNewlineAfterWikiLink = array(); $PagesWithSeeAlsoBeforeBlogPosts = array();  // ugh... this is ugly:  // these arrays are indexed by the index variables below  $ArticleLinksWithoutEndingSlash = array(); $ArticleLinksWithWrongTitle = array(); $ArticleLinksWithoutAuthor = array(); $ArticleLinksWithAvailableSummary = array();  $ArticleLinksWithoutEndingSlashConcept = array(); $ArticleLinksWithWrongTitleConcept = array(); $ArticleLinksWithoutAuthorConcept = array(); $ArticleLinksWithAvailableSummaryConcept = array();  $ArticleLinksWithoutEndingSlashNextIndex = 0; $ArticleLinksWithWrongTitleNextIndex = 0; $ArticleLinksWithoutAuthorConceptNextIndex = 0; $ArticleLinksWithAvailableSummaryIndex = 0;   $CurrentTitle = ""; $CurrentBody = "";  $IsReadingTitle      = false; $IsReadingText       = false;  $IsReadingAllArticlesPages = false; $IsReadingSummariesPages   = false; $IsReadingConceptPages     = false;  $IsReadingOneAllArticlesPage = false; $IsReadingOneSummariesPage   = false; $IsReadingOneConceptPage     = false;  $FirstYearToRead = 2006; $FinalYearToRead = 2010; //todo - update this in 2011!!!  $NextYearToRead = $FirstYearToRead;  $YearCurrentlyBeingRead = $FirstYearToRead;  $SuccessfullyReadOneAllArticlesPage = false; $SuccessfullyReadOneSummariesPage   = false; $FailedToReadAllArticlesPage        = false;  $SuccessfullyReadOneConceptPage = false; $FailedToReadConceptPage        = false;  $PagesRead = 0;  $Debug = true;   // for doxygen $fontsize = 12;  // to make the script easier to read, pad each section to a specific number of characters, to make everything line up $PaddingValue1 = 100; $PaddingValue2 = 50;     //open a text file for the output of this script //the script will send its output both to this text file and to stdout (or was it stderr?)  $fp = fopen('SyncArticleLinksOutput.txt', 'w');      // now for some code that I still haven't figured out a good way to untangle // this processing currently needs to be done in these functions, and can't be moved to a more sensible place   function startElement($parser, $name, $attribs) { 	global $CurrentTitle ; 	global $CurrentBody  ; 	 	global $IsReadingTitle      ; 	global $IsReadingText       ; 	 	global $stderr; 	 	// remember what element we're reading, so that we know what to do in characterData() 	 	if( $name == "TITLE" ) 	{ 		$IsReadingTitle = true; 	} 	else 	{ 		$IsReadingTitle = false; 	} 	 	if( $name == "TEXT" ) 	{ 		$IsReadingText = true; 	} 	else 	{ 		$IsReadingText = false; 	} }  function characterData($parser, $data) { 	global $stderr;  	global $XMLfile; 	 	global $NextAllArticleIndex;  	global $ArrayAllArticleTitle             ; 	global $ArrayAllArticleLink              ; 	global $ArrayAllArticleIndexedConcepts   ; 	global $ArrayAllArticleFoundConcepts     ; 	global $ArrayAllArticleNotIndexedConcepts; 	global $ArrayAllArticleAuthor            ; 	global $ArrayAllArticleDate              ; 	global $ArrayAllArticleOfficialSummaries ; 	global $ArrayAllArticleUsedSummaries     ;  	global $ConceptFound; 	global $ConceptNotInIndex; 	global $ConceptThatAreRedirects; 	global $ConceptSeeAlso; 	 	global $PagesWithOvercomingBiasLinks; 	global $PagesWithComments; 	global $PagesWithOvercomingBiasArticlesHeader; 	global $PagesWithExternalReferences; 	global $PagesWithSeeAlso; 	global $PagesWithExternalAuthorLinks; 	global $PagesWithNewlineAfterWikiLink; 	global $PagesWithSeeAlsoBeforeBlogPosts;  	global $ArticleLinksWithoutEndingSlash; 	global $ArticleLinksWithWrongTitle; 	global $ArticleLinksWithoutAuthor; 	global $ArticleLinksWithAvailableSummary;  	global $ArticleLinksWithoutEndingSlashConcept; 	global $ArticleLinksWithWrongTitleConcept; 	global $ArticleLinksWithoutAuthorConcept; 	global $ArticleLinksWithAvailableSummaryConcept;  	global $ArticleLinksWithoutEndingSlashNextIndex; 	global $ArticleLinksWithWrongTitleNextIndex; 	global $ArticleLinksWithoutAuthorNextIndex; 	global $ArticleLinksWithAvailableSummaryNextIndex;  	global $CurrentTitle; 	global $CurrentBody  ;  	global $IsReadingTitle; 	global $IsReadingText ;  	global $IsReadingAllArticlesPages; 	global $IsReadingSummariesPages  ; 	global $IsReadingConceptPages    ;  	global $IsReadingOneAllArticlesPage; 	global $IsReadingOneSummariesPage  ; 	global $IsReadingOneConceptPage    ;  	global $FirstYearToRead; 	global $FinalYearToRead;  	global $NextYearToRead;  	global $YearCurrentlyBeingRead;  	global $SuccessfullyReadOneAllArticlesPage; 	global $SuccessfullyReadOneSummariesPage  ; 	global $FailedToReadAllArticlesPage       ;  	global $SuccessfullyReadOneConceptPage; 	global $FailedToReadConceptPage; 	 	global $PagesRead; 	 	global $Debug;      	//open a text file for the output of this script 	//the script will send its output both to this text file and to stdout (or was it stderr?)  	$fp = fopen('SyncArticleLinksOutput.txt', 'w');    	 	 	 	// if we're reading the title, then remember the title, and check if we want to read the page content 	 	if( $IsReadingTitle ) 	{ 		$CurrentBody = ""; 		 		$CurrentTitle = $data; 		$IsReadingTitle = false;  		$IsReadingOneAllArticlesPage = false; 		$IsReadingOneSummariesPage   = false; 		$IsReadingOneConceptPage     = false; 		 		// skip category pages, template pages, etc.  		if( substr_count($CurrentTitle, "Category:") > 0 ) 		{ 		} 		else if( substr_count($CurrentTitle, "Template:") > 0 ) 		{ 		} 		else if( substr_count($CurrentTitle, "Talk:") > 0 ) 		{ 		} 		else if( substr_count($CurrentTitle, "Category talk:") > 0 ) 		{ 		} 		else if 			( 				$CurrentTitle == "Catch Phrases" || 				$CurrentTitle == "Categories" || 				$CurrentTitle == "Chat Logs/2009-04-11" || 				$CurrentTitle == "Disagreements on Less Wrong" || 				$CurrentTitle == "Series" || 				$CurrentTitle == "Using the wiki" || 				$CurrentTitle == "Acronyms used on Less Wrong" || 				$CurrentTitle == "Less Wrong/Errors from moving Eliezer's posts from OB to LW" || 				false 			) 		{ 		} 		else 		{ 			if( $IsReadingAllArticlesPages && $NextYearToRead <= $FinalYearToRead ) 			{ 				// check if the page title is the next year we want to read 				if 					( 						( $NextYearToRead == 2006 && substr_count($CurrentTitle, "Less Wrong/2006 Articles") > 0 ) || 						( $NextYearToRead == 2007 && substr_count($CurrentTitle, "Less Wrong/2007 Articles") > 0 ) || 						( $NextYearToRead == 2008 && substr_count($CurrentTitle, "Less Wrong/2008 Articles") > 0 ) || 						( $NextYearToRead == 2009 && substr_count($CurrentTitle, "Less Wrong/2009 Articles") > 0 ) || 						( $NextYearToRead == 2010 && substr_count($CurrentTitle, "Less Wrong/2010 Articles") > 0 ) 						//todo - update this in 2011!!! 					) 				{ 					$YearCurrentlyBeingRead = $NextYearToRead; 					$NextYearToRead++; 					$IsReadingOneAllArticlesPage = true;  					fwrite( $stderr, "Processing the wikipage for $YearCurrentlyBeingRead \n\n" ); 				} 			} 			else if( $IsReadingSummariesPages && $NextYearToRead <= $FinalYearToRead ) 			{ 				// check if the page title is the next year we want to read 				if 					( 						( $NextYearToRead == 2006 && substr_count($CurrentTitle, "Less Wrong/2006 Articles/Summaries") > 0 ) || 						( $NextYearToRead == 2007 && substr_count($CurrentTitle, "Less Wrong/2007 Articles/Summaries") > 0 ) || 						( $NextYearToRead == 2008 && substr_count($CurrentTitle, "Less Wrong/2008 Articles/Summaries") > 0 ) || 						( $NextYearToRead == 2009 && substr_count($CurrentTitle, "Less Wrong/2009 Articles/Summaries") > 0 ) || 						( $NextYearToRead == 2010 && substr_count($CurrentTitle, "Less Wrong/2010 Articles/Summaries") > 0 ) 						//todo - update this in 2011!!! 					) 				{ 					$YearCurrentlyBeingRead = $NextYearToRead; 					$NextYearToRead++; 					$IsReadingOneSummariesPage = true;  					fwrite( $stderr, "Processing the summaries page for $YearCurrentlyBeingRead \n\n" ); 				} 			} 			else if( $IsReadingConceptPages ) 			{ 				// don't process the All Articles pages! 				if 					( 						substr_count($CurrentTitle, "Less Wrong/2006 Articles") <= 0 && 						substr_count($CurrentTitle, "Less Wrong/2007 Articles") <= 0 && 						substr_count($CurrentTitle, "Less Wrong/2008 Articles") <= 0 && 						substr_count($CurrentTitle, "Less Wrong/2009 Articles") <= 0 && 						substr_count($CurrentTitle, "Less Wrong/2010 Articles") <= 0 						//todo - update this in 2011!!! 					) 				{ 					// no special processing here, just remember the title  					$IsReadingOneConceptPage     = true;   					if( $PagesRead % 100 == 0 ) 					{ 						fwrite( $stderr, "Pages read: $PagesRead\n" ); 					}  					//fwrite( $stderr, "Found the article $CurrentTitle \n\n" );  					$PagesRead++; 				} 			} 			else if( $IsReadingAllArticlesPages ) 			{ 				//fwrite( $stderr, "reading All Articles pages, past the final year\n\n" ); 			} 			else if( $IsReadingSummariesPages ) 			{ 				//fwrite( $stderr, "reading summary pages, past the final year\n\n" ); 			} 			else 			{ 				fwrite( $stderr, "Error: not reading all articles, summaries, or concept pages\n\n" ); 			} 		} 	} 	 	// if we're reading the text, then store the content of the article 	// we'll process the data in endElement() 	 	// there is probably a more efficient way to do this 	 	if( $IsReadingText ) 	{ 		if( $IsReadingOneAllArticlesPage ) 		{ 			$CurrentBody .= $data; 		} 		 		if( $IsReadingOneSummariesPage ) 		{ 			$CurrentBody .= $data; 		} 		 		if( $IsReadingOneConceptPage ) 		{ 			$CurrentBody .= $data; 		} 	} }  function endElement($parser, $name) { 	global $stderr;  	global $XMLfile; 	 	global $NextAllArticleIndex;  	global $ArrayAllArticleTitle             ; 	global $ArrayAllArticleLink              ; 	global $ArrayAllArticleIndexedConcepts   ; 	global $ArrayAllArticleFoundConcepts     ; 	global $ArrayAllArticleNotIndexedConcepts; 	global $ArrayAllArticleAuthor            ; 	global $ArrayAllArticleDate              ; 	global $ArrayAllArticleOfficialSummaries ; 	global $ArrayAllArticleUsedSummaries     ;  	global $ConceptFound; 	global $ConceptNotInIndex; 	global $ConceptThatAreRedirects; 	global $ConceptSeeAlso; 	 	global $PagesWithOvercomingBiasLinks; 	global $PagesWithComments; 	global $PagesWithOvercomingBiasArticlesHeader; 	global $PagesWithExternalReferences; 	global $PagesWithSeeAlso; 	global $PagesWithExternalAuthorLinks; 	global $PagesWithNewlineAfterWikiLink; 	global $PagesWithSeeAlsoBeforeBlogPosts;  	global $ArticleLinksWithoutEndingSlash; 	global $ArticleLinksWithWrongTitle; 	global $ArticleLinksWithoutAuthor; 	global $ArticleLinksWithAvailableSummary;  	global $ArticleLinksWithoutEndingSlashConcept; 	global $ArticleLinksWithWrongTitleConcept; 	global $ArticleLinksWithoutAuthorConcept; 	global $ArticleLinksWithAvailableSummaryConcept;  	global $ArticleLinksWithoutEndingSlashNextIndex; 	global $ArticleLinksWithWrongTitleNextIndex; 	global $ArticleLinksWithoutAuthorNextIndex; 	global $ArticleLinksWithAvailableSummaryNextIndex;  	global $CurrentTitle; 	global $CurrentBody  ;  	global $IsReadingTitle; 	global $IsReadingText ;  	global $IsReadingAllArticlesPages; 	global $IsReadingSummariesPages  ; 	global $IsReadingConceptPages    ;  	global $IsReadingOneAllArticlesPage; 	global $IsReadingOneSummariesPage  ; 	global $IsReadingOneConceptPage    ;  	global $FirstYearToRead; 	global $FinalYearToRead;  	global $NextYearToRead;  	global $YearCurrentlyBeingRead;  	global $SuccessfullyReadOneAllArticlesPage; 	global $SuccessfullyReadOneSummariesPage ; 	global $FailedToReadAllArticlesPage      ;  	global $SuccessfullyReadOneConceptPage; 	global $FailedToReadConceptPage; 	 	global $PagesRead; 	 	global $Debug; 	 	 	// now process all the text that we read 	 	if( $name == "TEXT" ) 	{ 		// this check is probably unnecessary 		if( $IsReadingText ) 		{ 			if( $IsReadingOneAllArticlesPage ) 			{ 				//fwrite( $stderr, "-------------------------------------------reading body of year $YearCurrentlyBeingRead , $CurrentTitle \n\n" );  				//fwrite( $stderr, $CurrentBody ); 				 				// now parse the data from this page and store it into the arrays 				 				$CurrentGroupStartPos = 0; 				 				// skip to the first '|-' 				$NextGroupStartPos = strpos( $CurrentBody, "|-", $CurrentGroupStartPos+1 ); 				 				// keep going until there are no more groups 				while( $NextGroupStartPos !== FALSE && $CurrentGroupStartPos != $NextGroupStartPos ) 				{ 					// find the start and end of teh current group 					$CurrentGroupStartPos = $NextGroupStartPos; 					 					$NextGroupStartPos = strpos( $CurrentBody, "|-", $CurrentGroupStartPos+1 ); 					 					// if there is no next group, then set the end of this group to the end of the whole string 					if( $NextGroupStartPos === FALSE ) 					{ 						$NextGroupStartPos = strlen( $CurrentBody ) - 1; 					} 					 					// now find and store the parts 					 					// there is probably a much simpler way to do all this using regexes 					 					$DataIsValid = true; 					 					$CurrentArticleConceptArray = array(); 					 					$TokenStartPos = strpos($CurrentBody, "[", $CurrentGroupStartPos) + 1; 					$TokenEndPos = strpos($CurrentBody, " ", $TokenStartPos); 					 					$CurrentArticleLink = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos ); 					 					if( substr_count( $CurrentArticleLink, "http://" ) <= 0 ) 					{ 						$DataIsValid = false; 					}   					$TokenStartPos = $TokenEndPos + 1; 					$TokenEndPos = strpos($CurrentBody, "]", $TokenStartPos); 					 					$CurrentArticleTitle = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos ); 					 					$CurrentArticleTitle = str_replace(""", "\"", $CurrentArticleTitle);  					 					$NextLineStartPos = strpos($CurrentBody, "|", $TokenEndPos); 					$NextLineEndPos = strpos($CurrentBody, "|", $NextLineStartPos+1); 					 					$ConceptLine = substr( $CurrentBody, $NextLineStartPos, $NextLineEndPos-$NextLineStartPos );  					//fwrite( $stderr, "concept line start: $NextLineStartPos \n concept line end: $NextLineEndPos \n concept line: $ConceptLine\n\n" ); 					 					$FinishedFindingConcepts = false; 					 					$ConceptIndex = 0;  					$TokenStartPos = 0; 					$TokenEndPos = 0; 					 					while( !$FinishedFindingConcepts ) 					{ 						$TokenStartPos = strpos($ConceptLine, "[[", $TokenEndPos); 						$TokenEndPos = strpos($ConceptLine, "]]", $TokenStartPos); 						 						$CurrentConcept = substr( $ConceptLine, $TokenStartPos+2, $TokenEndPos-$TokenStartPos-2 );  						if 							( 								$TokenEndPos > $TokenStartPos && 								substr_count($CurrentConcept, "[[") <= 0 && 								substr_count($CurrentConcept, "]]") <= 0 								) 						{ 							//fwrite( $stderr, "found concept: $CurrentConcept\n\n" ); 							 							$CurrentArticleConceptArray[$ConceptIndex] = $CurrentConcept; 							 							$ConceptIndex++; 							 							$FinishedFindingConcepts = false; 						} 						else 						{ 							$FinishedFindingConcepts = true; 						}  						//fwrite( $stderr, "concept start: $TokenStartPos \n concept end: $TokenEndPos \n concept index: $ConceptIndex \n concept line: $ConceptLine\n\n" ); 					}  					$NextLineStartPos = strpos($CurrentBody, "|", $NextLineEndPos); 					$NextLineEndPos = strpos($CurrentBody, "|", $NextLineStartPos+1);  					$TokenStartPos = strpos($CurrentBody, "[", $NextLineStartPos); 					$TokenEndPos = strpos($CurrentBody, "]", $TokenStartPos);  					$CurrentArticleAuthor = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos+1 ); 					 					if( $DataIsValid ) 					{ 						$ArrayAllArticleTitle              [$NextAllArticleIndex] = $CurrentArticleTitle;  						$ArrayAllArticleLink               [$NextAllArticleIndex] = $CurrentArticleLink;  						//$CurrentArticleConceptArray[] = "concept1";  						$ArrayAllArticleIndexedConcepts    [$NextAllArticleIndex] = $CurrentArticleConceptArray;  						$ArrayAllArticleFoundConcepts      [$NextAllArticleIndex] = array();  						$ArrayAllArticleNotIndexedConcepts [$NextAllArticleIndex] = array();  						$ArrayAllArticleAuthor             [$NextAllArticleIndex] = $CurrentArticleAuthor;  						$ArrayAllArticleDate               [$NextAllArticleIndex] = "date";  						$ArrayAllArticleUsedSummaries      [$NextAllArticleIndex] = array();  						//$TempString = print_r($ArrayAllArticleIndexedConcepts[$NextAllArticleIndex]);  						//fwrite( $stderr, "start: $CurrentGroupStartPos \n end: $NextGroupStartPos \n index: $NextAllArticleIndex \n $ArrayAllArticleTitle[$NextAllArticleIndex] \n $ArrayAllArticleLink[$NextAllArticleIndex] \n $TempString \n $ArrayAllArticleAuthor[$NextAllArticleIndex] \n $ArrayAllArticleDate[$NextAllArticleIndex] \n\n" );  						$NextAllArticleIndex++; 					} 				}  				$SuccessfullyReadOneAllArticlesPage = true; 			} 			 			if( $IsReadingOneSummariesPage ) 			{ 				//fwrite( $stderr, "reading summaries page for $YearCurrentlyBeingRead \n\n" );  				$NextSummaryTitleStart = 0; 				$NextSummaryTitleEnd = 0; 				$NextSummaryTextBlockStart = 0; 				$NextSummaryTextBlockEnd = 0; 				$NextSummaryTextStart = 0; 				$NextSummaryTextEnd = 0; 				 				$AlternateSummaryCount = 0; 				 				$FinishedReadingSummaries = false;  				while( !$FinishedReadingSummaries ) 				{ 					$NextSummaryTitleStart = strpos($CurrentBody, "=====[", $NextSummaryTitleEnd);  					//fwrite( $stderr, "CurrentBody: $CurrentBody \n\n" ); 					//fwrite( $stderr, "NextSummaryTitleStart: $NextSummaryTitleStart \n\n" ); 				 					if( $NextSummaryTitleStart === FALSE ) 					{ 						$FinishedReadingSummaries = true; 					} 					else 					{ 						$NextSummaryTitleStart += 6; 						 						$NextSummaryTitleEnd = strpos($CurrentBody, "]=====", $NextSummaryTitleStart); 						 						$FirstSpacePos = strpos($CurrentBody, " ", $NextSummaryTitleStart); 						 						$CurrentSummaryLink = substr( $CurrentBody, $NextSummaryTitleStart, $FirstSpacePos-$NextSummaryTitleStart ); 						 						$CurrentSummaryTitle = substr( $CurrentBody, $FirstSpacePos+1, $NextSummaryTitleEnd-$FirstSpacePos-1 );  						$CurrentSummaryTitle = str_replace(""", "\"", $CurrentSummaryTitle);  						//fwrite( $stderr, "reading summary of $CurrentSummaryTitle \n\n" ); 						 						$NextSummaryTextBlockStart = $NextSummaryTitleEnd + 8; 						 						$NextSummaryTextBlockEnd = strpos($CurrentBody, "=====[", $NextSummaryTitleEnd); 						 						if( $NextSummaryTextBlockEnd === FALSE ) 						{ 							$NextSummaryTextBlockEnd = strlen($CurrentBody); 						} 						 						$NextSummaryTextBlock = substr( $CurrentBody, $NextSummaryTextBlockStart, $NextSummaryTextBlockEnd-$NextSummaryTextBlockStart );  						$AlternateSummaryCount = substr_count( $NextSummaryTextBlock, "(alternate summary:)" );  						$NextSummaryTextEnd = 0; 						 						$ArrayAllArticleOfficialSummaries[$CurrentSummaryTitle] = array(); 						 						for( $AlternateSummaryNum = 0; $AlternateSummaryNum < $AlternateSummaryCount; $AlternateSummaryNum++ ) 						{ 							$NextSummaryTextStart = $NextSummaryTextEnd; 							$NextSummaryTextEnd = strpos( $NextSummaryTextBlock, "\n\n(alternate summary:)\n\n", $NextSummaryTextStart ); 							 							$SummaryToAdd = substr( $NextSummaryTextBlock, $NextSummaryTextStart, $NextSummaryTextEnd-$NextSummaryTextStart ); 							 							$ArrayAllArticleOfficialSummaries[$CurrentSummaryTitle][$AlternateSummaryNum] = $SummaryToAdd; 							 							$NextSummaryTextEnd += strlen( "\n\n(alternate summary:)\n\n" ); 						}  						$NextSummaryTextStart = $NextSummaryTextEnd; 						 						$SummaryToAdd = substr( $NextSummaryTextBlock, $NextSummaryTextStart ); 						 						$SummaryToAdd = trim( $SummaryToAdd, "\n" ); 						 						if( strlen( $SummaryToAdd ) > 0 ) 						{ 							if( strpos( $SummaryToAdd, "__NOTOC__" ) !== FALSE ) 							{ 								//todo - decide how to deal with this case! 							} 							else 							{ 								$ArrayAllArticleOfficialSummaries[$CurrentSummaryTitle][$AlternateSummaryCount] = $SummaryToAdd; 							} 						} 					} 				}    				$SuccessfullyReadOneSummariesPage = true; 			}  			if( $IsReadingOneConceptPage ) 			{ 				//fwrite( $stderr, "------reading body of $CurrentTitle \n\n" ); 				 				// don't bother processing the page unless it has a "Blog posts" section  				$TokenStartPos = 0; 				$TokenEndPos = 0;  	 				// keep track of which pages have links to OvercomingBias.com articles 				 				if( substr_count( $CurrentBody, "http://www.overcomingbias.com/20" ) > 0 ) 				{ 					// the following pages are "allowed" to have OvercomingBias.com articles - these pages were already checked manually for links that should point to lesswrong.com instead 					if 						( 							$CurrentTitle == "Aumann's agreement theorem" || 							$CurrentTitle == "Bias" || 							$CurrentTitle == "Bite the bullet" || 							$CurrentTitle == "Black swan" || 							$CurrentTitle == "Catch Phrases" || 							$CurrentTitle == "Chat Logs/2009-04-11" || 							$CurrentTitle == "Cognitive style" || 							$CurrentTitle == "Coherence" || 							$CurrentTitle == "Connotation" || 							$CurrentTitle == "Consistency" || 							$CurrentTitle == "Cryonics" || 							$CurrentTitle == "Dark arts" || 							$CurrentTitle == "Disagreement" || 							$CurrentTitle == "Disagreements on Less Wrong" || 							$CurrentTitle == "Emotion" || 							$CurrentTitle == "Extraordinary evidence" || 							$CurrentTitle == "Forecast" || 							$CurrentTitle == "Hypocrisy" || 							$CurrentTitle == "Impossible world" || 							$CurrentTitle == "Intellectual roles" || 							$CurrentTitle == "Likelihood ratio" || 							$CurrentTitle == "Meme lineage" || 							$CurrentTitle == "Modesty argument" || 							$CurrentTitle == "Near/far thinking" || 							$CurrentTitle == "Overcoming Bias" || 							$CurrentTitle == "Overconfidence" || 							$CurrentTitle == "Prediction market" || 							$CurrentTitle == "Scales of justice fallacy" || 							$CurrentTitle == "Series" || 							$CurrentTitle == "Signaling" || 							$CurrentTitle == "Signalling" || 							$CurrentTitle == "Status" || 							$CurrentTitle == "Stereotype" || 							false 						) 					{ 						// do nothing 					} 					else 					{ 						$PagesWithOvercomingBiasLinks[$CurrentTitle] = true; 					} 				}  				if( substr_count( $CurrentBody, "<!--" ) > 0 ) 				{ 					// the following pages are "allowed" to have comments 					if 						( 							$CurrentTitle == "LessWrong Wiki" 						) 					{ 						// do nothing 					} 					else 					{ 						$PagesWithComments[$CurrentTitle] = true; 					} 				}  				if( substr_count( $CurrentBody, "==Overcoming Bias Articles==" ) > 0 ) 				{ 					$PagesWithOvercomingBiasArticlesHeader[$CurrentTitle] = true; 				}  				if( substr_count( $CurrentBody, "External references" ) > 0 ) 				{ 					$PagesWithExternalReferences[$CurrentTitle] = true; 				}  				if( substr_count( $CurrentBody, "See Also" ) > 0 ) 				{ 					$PagesWithSeeAlso[$CurrentTitle] = true; 				} 				 				if( substr_count( $CurrentBody, "by [http" ) > 0 ) 				{ 					$PagesWithExternalAuthorLinks[$CurrentTitle] = true; 				} 				 				if( substr_count( $CurrentBody, "wikilink}}\n\n" ) > 0 ) 				{ 					$PagesWithNewlineAfterWikiLink[$CurrentTitle] = true; 				} 				 				$SeeAlsoPos = strpos($CurrentBody, "==See also=="); 				$BlogPostsPos = strpos($CurrentBody, "==Blog posts=="); 				 				if( $SeeAlsoPos !== FALSE && $BlogPostsPos !== FALSE ) 				{ 					if( $SeeAlsoPos < $BlogPostsPos ) 					{ 						$PagesWithSeeAlsoBeforeBlogPosts[$CurrentTitle] = true; 					} 				} 				  				if( substr_count( $CurrentBody, "#REDIRECT" ) > 0 ) 				{ 					if( array_key_exists( $CurrentTitle, $ConceptFound ) ) 					{ 						$ConceptThatAreRedirects[$CurrentTitle] = true; 					} 				}   				//todo - reconsider this!!! 				//if( substr_count( $CurrentArticleLink, "==Blog posts==" ) > 0 ) 				if( substr_count( $CurrentBody, "http://lesswrong.com/lw/" ) > 0 ) 				{ 					// first check if the concept exists in the index  					$ConceptIsInIndex = false;  					if( array_key_exists( $CurrentTitle, $ConceptFound ) ) 					{ 						$ConceptIsInIndex = true; 					}  					if( $ConceptIsInIndex ) 					{ 						// if the concept exists in the index, then mark the concept as found 						 						$ConceptFound[$CurrentTitle] = true; 						 						//fwrite( $stderr, "++++++found: $CurrentTitle\n\n" ); 					} 					else 					{ 						// if the concept doesn't exist in the index, then mark the concept as not found 						 						$ConceptNotInIndex[$CurrentTitle] = true; 					} 					 					 					// now keep track of the "See also" links 					if( array_key_exists($CurrentTitle, $ConceptFound ) ) 					{ 						if( $ConceptFound[$CurrentTitle] == true ) 						{ 							$SeeAlsoStartPos = strpos($CurrentBody, "==See also==", 0);  							if( $SeeAlsoStartPos !== FALSE ) 							{ 								$SeeAlsoStartPos += 12; 							} 							else 							{ 								$SeeAlsoStartPos = strpos($CurrentBody, "==Related concepts==", 0);  								if( $SeeAlsoStartPos !== FALSE ) 								{ 									$SeeAlsoStartPos += 20; 								} 								else 								{ 								} 							}  							if( $SeeAlsoStartPos !== FALSE ) 							{ 								$SeeAlsoEndPos = strpos($CurrentBody, "==", $SeeAlsoStartPos); 								 								if( $SeeAlsoEndPos === FALSE ) 								{ 									$SeeAlsoEndPos = strlen($CurrentBody); 								}  								$TokenStartPos = $SeeAlsoStartPos; 								$TokenEndPos = $SeeAlsoStartPos;  								$DoneSeeAlso = false;  								$EntriesFound = 0;  								$ConceptSeeAlso[$CurrentTitle] = array();  								//fwrite( $stderr, "Concept: " . $CurrentTitle . "\n" );  								while( !$DoneSeeAlso ) 								{ 									$TokenStartPos = strpos($CurrentBody, "[[", $TokenEndPos); 									$TokenEndPos = strpos($CurrentBody, "]]", $TokenStartPos);  									if 										( 											$TokenStartPos < $SeeAlsoEndPos && 											$TokenStartPos >= $SeeAlsoStartPos && 											$TokenStartPos !== FALSE && 											$TokenEndPos !== FALSE 										) 									{ 										$CurrentSeeAlso = substr( $CurrentBody, $TokenStartPos+2, $TokenEndPos-$TokenStartPos-2 ); 										 										if( substr_count( $CurrentSeeAlso, "Category:") <= 0 ) 										{ 											$ConceptSeeAlso[$CurrentTitle][$EntriesFound] = $CurrentSeeAlso;  											//fwrite( $stderr, $ConceptSeeAlso[$CurrentTitle][$EntriesFound] . "\n" );  											$EntriesFound++; 										} 									} 									else 									{ 										$DoneSeeAlso = true; 									}  									//fwrite( $stderr, "SeeAlsoStartPos: $SeeAlsoStartPos SeeAlsoEndPos: $SeeAlsoEndPos TokenStartPos: $TokenStartPos TokenEndPos $TokenEndPos" . "\n" ); 								} 							} 						} 					} 					   					$TokenStartPos = strpos($CurrentBody, "http://lesswrong.com/lw/", $TokenEndPos); 					$TokenEndPos = strpos($CurrentBody, " ", $TokenStartPos);  					$NextTokenStartPos = strpos($CurrentBody, "http://lesswrong.com/lw/", $TokenEndPos); 					 					if( $NextTokenStartPos === FALSE ) 					{ 						$NextTokenStartPos = strlen($CurrentBody); 					}  					$CurrentLink = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos ); 					 					$FinishedFindingLinks = false;  					while( !$FinishedFindingLinks ) 					{ 						// check if the link appears before or after the first header 						 						$FirstHeaderPos = strpos($CurrentBody, "==", 0);  						$LinkIsAfterHeader = false;  						if( $FirstHeaderPos === FALSE ) 						{ 							$LinkIsAfterHeader = false; 						} 						else 						{ 							if( $FirstHeaderPos < $TokenStartPos ) 							{ 								$LinkIsAfterHeader = true; 							} 							else 							{ 								$LinkIsAfterHeader = false; 							} 						}    						// ignore the following links: 						 						if 							( 								substr_count( $CurrentLink, "http://lesswrong.com/lw/b1/persuasiveness_vs_soundness/789") <= 0 && 								substr_count( $CurrentLink, "http://lesswrong.com/lw/14v/the_usefulness_of_correlations/11iu") <= 0 							)  						{ 							// check if the link ends with a /  							$LastCharInLink = substr( $CurrentLink, -1 ); 							 							$SlashCountBeforeAdd = substr_count( $CurrentLink, "/");  							if 								( 									strcmp( $LastCharInLink, "/" ) != 0 								) 							{ 								$CurrentLink .= "/"; 							}    							// check if the link's title is correct  							$TitleStartPos = $TokenEndPos+1; 							$TitleEndPos = strpos($CurrentBody, "]", $TitleStartPos);  							$CurrentArticleTitle = substr( $CurrentBody, $TitleStartPos, $TitleEndPos-$TitleStartPos );  							$CorrectArticleTitle = "(not found)"; 							$CorrectArticleAuthor = "(not found)";  							$SearchResult = array_search( $CurrentLink, $ArrayAllArticleLink );  							if( $SearchResult !== FALSE ) 							{ 								$CorrectArticleTitle  = $ArrayAllArticleTitle [$SearchResult]; 								$CorrectArticleAuthor = $ArrayAllArticleAuthor[$SearchResult]; 								 								 								 								//don't do any processing for summaries unless the link we're looking at is after the first header 								if( $LinkIsAfterHeader ) 								{ 									$LinkEndPos = strpos($CurrentBody, "]", $TokenStartPos) + 1;  									$NewLinePos = strpos($CurrentBody, "\n", $LinkEndPos);  									$NextArticleSummary = "";  									if( $NewLinePos === FALSE ) 									{ 										$NextArticleSummary = substr( $CurrentBody, $LinkEndPos ); 									} 									else 									{ 										//$NextArticleSummary .= "\nsummary:\n" + substr( $CurrentBody, $TokenStartPos, $NextTokenStartPos-$TokenStartPos ); 										$NextArticleSummary = substr( $CurrentBody, $LinkEndPos, $NewLinePos-$LinkEndPos ); 									}  									// search through the array of official summaries for this article, to see if any of them are found in the current article 									// if a match is found, then don't add the summary to the array of used summaries  									$SummaryExistsForThisArticle = false; 									$FoundAMatchingSummary = false; 									$FirstFoundSummary = "";  									if( array_key_exists( $CorrectArticleTitle, $ArrayAllArticleOfficialSummaries ) ) 									{ 										$SummaryExistsForThisArticle = true;  										foreach( $ArrayAllArticleOfficialSummaries[$CorrectArticleTitle] as $key => $val ) 										{ 											if( substr_count( $CurrentBody, $val ) > 0 ) 											{ 												if( !$FoundAMatchingSummary ) 												{ 													$FirstFoundSummary = $val; 												}  												$FoundAMatchingSummary = true; 											} 										} 									}  									$AddThisSummaryToTheArray = false;  									if( strpos( $NextArticleSummary, "by [[" ) > 0 ) 									{ 										//todo - consider adding a check for improperly formatted author links  										//todo - consider adding a check for an author link followed by a summary  										//todo - consider adding a check for a summary that doesn't appear until after the newline 									} 									else if( strlen( $NextArticleSummary ) > 0 ) 									{ 										if( $FoundAMatchingSummary ) 										{ 											// if we found a matching summary, then we don't need to do anything here 										} 										else 										{ 											$AddThisSummaryToTheArray = true; 										} 									} 									else if( $SummaryExistsForThisArticle ) 									{ 										// a summary is available for this article, but the summary isn't on this page. 										// report this to   										//$ArticleLinksWithAvailableSummary[$ArticleLinksWithAvailableSummaryNextIndex] = "*[$CurrentLink $CorrectArticleTitle] - " . $ArrayAllArticleOfficialSummaries[$CorrectArticleTitle][0]; 										//$ArticleLinksWithAvailableSummary[$ArticleLinksWithAvailableSummaryNextIndex] = "*[$CurrentLink $CorrectArticleTitle] - " . $FirstFoundSummary;  										// just accessing the array directly doesn't work, and neither did that $FirstFoundSummary trick.  for some reason, I need to use a foreach  										//fwrite( $stderr, "$FirstFoundSummary\n" ); 										//fwrite( $stderr, "$ArrayAllArticleOfficialSummaries[$CorrectArticleTitle][0]\n" );  										$IsFirstIteration = true;  										foreach( $ArrayAllArticleOfficialSummaries[$CorrectArticleTitle] as $key => $val ) 										{ 											if( $IsFirstIteration ) 											{ 												$ArticleLinksWithAvailableSummary[$ArticleLinksWithAvailableSummaryNextIndex] = "*[$CurrentLink $CorrectArticleTitle] - " . $val;  												$IsFirstIteration = false; 											} 											//fwrite( $stderr, "$val\n" ); 										}  										$ArticleLinksWithAvailableSummaryConcept[$ArticleLinksWithAvailableSummaryNextIndex] = $CurrentTitle;  										$ArticleLinksWithAvailableSummaryNextIndex++; 									}  									if( $AddThisSummaryToTheArray ) 									{ 										// ignore known invalid summaries 										if( strlen( $NextArticleSummary ) > 10 ) 										{ 											$NextArticleSummary = trim( $NextArticleSummary, " ,-—'" );  											if 												( 													substr_count( $NextArticleSummary, "'' and ''[http://lesswrong.com/lw/hm/new_improved_lottery/ New Improved Lottery]''" ) > 0 || 													substr_count( $NextArticleSummary, "(and [http://lesswrong.com/lw/ht/beware_the_unsurprised/ Beware the Unsurprised])" ) > 0 || 													substr_count( $NextArticleSummary, "In particular, the [[Litany of Tarski]]." ) > 0 || 													substr_count( $NextArticleSummary, "(but first read: [http://lesswrong.com/lw/m2/the_litany_against_gurus/ The Litany Against Gurus])" ) > 0 || 													substr_count( $NextArticleSummary, ", [http://lesswrong.com/lw/" ) > 0 || 													substr_count( $NextArticleSummary, "and [http://lesswrong.com/lw/" ) > 0 || 													substr_count( $NextArticleSummary, "'' (prerequisite: [http://lesswrong.com/lw" ) > 0 || 													substr_count( $NextArticleSummary, "'' and ''[http://lesswrong.com/lw" ) > 0 || 													strcmp( $NextArticleSummary, "setting up the problem." ) == 0 || 													strcmp( $NextArticleSummary, "[[Eliezer Yudkowsky]]" ) == 0 || 													strcmp( $NextArticleSummary, "by Salamon and Rayhawk." ) == 0 || 													strcmp( $NextArticleSummary, "by  [[Eliezer Yudkowsky]]" ) == 0 || 													strcmp( $NextArticleSummary, "by talisman" ) == 0 || 													strcmp( $NextArticleSummary, "(short story)" ) == 0 || 													strcmp( $NextArticleSummary, "of a particular study design. Debiasing [http://lesswrong.com/lw/jk/burdensome_details/ won't be as simple] as practicing specific questions, it requires certain general habits of thought." ) == 0 || 													strcmp( $NextArticleSummary, "as practicing specific questions, it requires certain general habits of thought." ) == 0 || 													strcmp( $NextArticleSummary, "'' their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 || 													strcmp( $NextArticleSummary, "all of their complicated ''other'' preferences into their choice of ''exactly'' which acts they try to ''[http://lesswrong.com/lw/kq/fake_justification/ justify using]'' their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 || 													strcmp( $NextArticleSummary, "to this post tries to explain the cognitive twists whereby people [http://lesswrong.com/lw/ld/the_hidden_complexity_of_wishes/ smuggle] all of their complicated ''other'' preferences into their choice of ''exactly'' which acts they try to ''[http://lesswrong.com/lw/kq/fake_justification/ justify using]'' their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 || 													strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/n1/allais_malaise/ followups]) - Offered choices between gambles, people make decision-theoretically inconsistent decisions." ) == 0 || 													strcmp( $NextArticleSummary, ") - Offered choices between gambles, people make decision-theoretically inconsistent decisions." ) == 0 || 													strcmp( $NextArticleSummary, "and ''[http://lesswrong.com/lw/oo/explaining_vs_explaining_away/ Explaining vs. Explaining Away]'' - elementary [[reductionism]]." ) == 0 || 													strcmp( $NextArticleSummary, "\" which essentially answered \"Not on the present state of the Art\"" ) == 0 || 													strcmp( $NextArticleSummary, "(and its [[Privileging the hypothesis | requisites]], like [[Locating the hypothesis]])" ) == 0 || 													strcmp( $NextArticleSummary, "and ''[http://lesswrong.com/lw/hm/new_improved_lottery/ New Improved Lottery]" ) == 0 || 													strcmp( $NextArticleSummary, "their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 || 													strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/w6/recursion_magic/ ...Recursion, Magic]" ) == 0 || 													strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/wf/hard_takeoff/ Hard Takeoff]" ) == 0 || 													strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/wg/permitted_possibilities_locality/ Permitted Possibilities, & Locality]" ) == 0 || 													strcmp( $NextArticleSummary, "(in the martial arts)" ) == 0 || 													strcmp( $NextArticleSummary, "(in both psychotherapy and martial arts)" ) == 0 || 													strcmp( $NextArticleSummary, "Description and account of the game." ) == 0 || 													false 												) 											{ 												// don't add the invalid summary 											} 											else 											{ 												$ArrayAllArticleUsedSummaries[$SearchResult][] = $NextArticleSummary; 											} 										} 									} 								} 							}  							// for authors who have their own wikipages, link to the wikipage instead of the LW user page 							$CorrectArticleAuthor = str_replace("[http://lesswrong.com/user/Eliezer_Yudkowsky Eliezer_Yudkowsky]", "[[Eliezer Yudkowsky]]", $CorrectArticleAuthor);    							// check if the link ends with a /  							if 								( 									strcmp( $LastCharInLink, "/" ) != 0 && 									$SlashCountBeforeAdd < 6 								) 							{ 								$ArticleLinksWithoutEndingSlash[$ArticleLinksWithoutEndingSlashNextIndex] = "*[$CurrentLink $CorrectArticleTitle] by $CorrectArticleAuthor";  								$ArticleLinksWithoutEndingSlashConcept[$ArticleLinksWithoutEndingSlashNextIndex] = $CurrentTitle;  								$ArticleLinksWithoutEndingSlashNextIndex++; 							}    							if( strcmp($CurrentArticleTitle, $CorrectArticleTitle) != 0 && $LinkIsAfterHeader ) 							{ 								// don't report an error for the following links: 								if 									( 										substr_count( $CurrentArticleTitle, "ranges over anything, not just internal subjective experiences") <= 0 && 										substr_count( $CurrentArticleTitle, "sequence leading up") <= 0 && 										substr_count( $CurrentArticleTitle, "smuggle") <= 0 && 										substr_count( $CurrentArticleTitle, "justify using") <= 0 && 										substr_count( $CurrentArticleTitle, "choose other acts to justify") <= 0 && 										substr_count( $CurrentArticleTitle, "Timeless decision theory") <= 0 && 										substr_count( $CurrentArticleTitle, "philosophical majoritarianism") <= 0 && 										substr_count( $CurrentArticleTitle, "critical comments") <= 0 && 										substr_count( $CurrentArticleTitle, "Positive Bias") <= 0 && 										substr_count( $CurrentArticleTitle, "Hindsight Bias") <= 0 && 										substr_count( $CurrentArticleTitle, "not an isolated artifact") <= 0 && 										substr_count( $CurrentArticleTitle, "won't be as simple") <= 0 && 										substr_count( $CurrentArticleTitle, "Illusion of Transparency") <= 0 && 										substr_count( $CurrentArticleTitle, "Affect Heuristic") <= 0 && 										substr_count( $CurrentArticleTitle, "Evaluability") <= 0 && 										substr_count( $CurrentArticleTitle, "Unbounded Scales, Huge Jury Awards, and Futurism") <= 0 && 										substr_count( $CurrentArticleTitle, "subsequent") <= 0 && 										substr_count( $CurrentArticleTitle, "followups") <= 0 && 										substr_count( $CurrentArticleTitle, "Do We Believe <i>Everything</i> We're Told?") <= 0 && 										substr_count( $CurrentArticleTitle, "Quantum Physics") <= 0 && 										substr_count( $CurrentArticleTitle, "Shut Up and Do the Impossible") <= 0 && 										substr_count( $CurrentArticleTitle, "You ''Can'' Face Reality") <= 0 && 										substr_count( $CurrentArticleTitle, "Absence of Evidence ''Is'' Evidence of Absence") <= 0 && 										substr_count( $CurrentArticleTitle, "Doublethink: Choosing to be Biased") <= 0 && 										substr_count( $CurrentArticleTitle, "Anti-Epistemology") <= 0 && 										substr_count( $CurrentArticleTitle, "Is Humanism a Religion-Substitute?") <= 0 && 										substr_count( $CurrentArticleTitle, "Your Strength As A Rationalist") <= 0 && 										substr_count( $CurrentArticleTitle, "Absence of Evidence '''is''' Evidence of Absence") <= 0 && 										substr_count( $CurrentArticleTitle, "Reversed Stupidity is Not Intelligence") <= 0 && 										substr_count( $CurrentArticleTitle, "A Human's Guide to Words") <= 0 && 										substr_count( $CurrentArticleTitle, "here") <= 0 && 										true 									) 								{ 									$ArticleLinksWithWrongTitle[$ArticleLinksWithWrongTitleNextIndex] = "*[$CurrentLink $CorrectArticleTitle] by $CorrectArticleAuthor\n**(title was [$CurrentLink $CurrentArticleTitle])";  									$ArticleLinksWithWrongTitleConcept[$ArticleLinksWithWrongTitleNextIndex] = $CurrentTitle;  									$ArticleLinksWithWrongTitleNextIndex++;  									if( $Debug ) 									{ 										//fwrite( $stderr, "article title: $CurrentArticleTitle\ncorrect title: $CorrectArticleTitle\n\n" ); 									} 								} 							}  							//fwrite( $stderr, "article title: $CurrentArticleTitle\n\n" );  							$ByCharacters = substr( $CurrentBody, $TitleEndPos, 6 );  							$FirstHeaderPos = strpos($CurrentBody, "==", 0);      							// check if the link has an author  							// ignore missing authors if the link appears before any headers 							if( strcmp($ByCharacters, "] by [") != 0 && $LinkIsAfterHeader ) 							{ 								$LineUpToAuthor = substr( $CurrentBody, $TokenStartPos-1, ($TitleEndPos+6)-($TokenStartPos-1) ); 								$LineUpToAuthor = str_replace("\n", "\\n", $LineUpToAuthor);  								$ArticleLinksWithoutAuthor[$ArticleLinksWithoutAuthorNextIndex] = "*[$CurrentLink $CorrectArticleTitle] by $CorrectArticleAuthor\n**(was $LineUpToAuthor)";  								$ArticleLinksWithoutAuthorConcept[$ArticleLinksWithoutAuthorNextIndex] = $CurrentTitle;  								$ArticleLinksWithoutAuthorNextIndex++;  								if( $Debug ) 								{ 									//fwrite( $stderr, "Concept: $CurrentTitle, by characters: <$ByCharacters>\n\n" ); 								} 							}  							//todo - change this to check if the author is correct! 						} 					 					 					 					 					 						//fwrite( $stderr, "link start: $TokenStartPos \n link end: $TokenEndPos \n current link: $CurrentLink\n\n" ); 					 						// find the current link in the array, if it exists  						$SearchResult = array_search( $CurrentLink, $ArrayAllArticleLink );  						if( $SearchResult !== FALSE ) 						{ 							// find if this concept exists in the array 							if( in_array( $CurrentTitle, $ArrayAllArticleIndexedConcepts[$SearchResult] ) ) 							{ 								// mark the link as found 								$ArrayAllArticleFoundConcepts     [$SearchResult][$CurrentTitle] = true; 							} 							else 							{ 								// add the concept to the array of unindexed concepts 								$ArrayAllArticleNotIndexedConcepts[$SearchResult][] = $CurrentTitle; 							} 						} 						 						$TokenStartPos = strpos($CurrentBody, "http://lesswrong.com/lw/", $TokenEndPos); 						$TokenEndPos = strpos($CurrentBody, " ", $TokenStartPos);  						$CurrentLink = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos ); 						 						if 							( 								$TokenEndPos <= $TokenStartPos || 								$TokenStartPos === FALSE || 								substr_count( $CurrentLink, "http://lesswrong.com/lw/" ) <= 0 							) 						{ 							$FinishedFindingLinks = true; 						} 					} 				} 				 				 				//$IsReadingConceptPages = false;  				//fwrite( $stderr, $CurrentBody );  				//unfinished!!!  				//for each page:  				// first check if the page is in the $ConceptFound list  				// read the list of articles in the "Blog posts" section. 				// report any links that appear in the All Articles page, for that concept, but don't appear in the wiki page 				// report any links that appear in the wiki page, but don't appear in the All Articles page, for that concept   				$SuccessfullyReadOneConceptPage = true; 			} 		} 	} }  function defaultHandler($parser, $data) { }  function new_xml_parser($file) {     global $parser_file; 	     $xml_parser = xml_parser_create();     xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, 1);     xml_set_element_handler($xml_parser, "startElement", "endElement");     xml_set_character_data_handler($xml_parser, "characterData");     xml_set_default_handler($xml_parser, "defaultHandler"); 	     if( !($fp = @fopen($file, "r")) )     {         return false;     }     if( !is_array($parser_file) )     {         settype($parser_file, "array");     }     $parser_file[$xml_parser] = $file;     return array($xml_parser, $fp); }    function ReadOnceThroughTheWholeXMLFile() { 	global $XMLfile;  	// create the XML parser  	if( !(list($xml_parser, $fp) = new_xml_parser($XMLfile)) ) 	{ 		die("could not open XML input"); 	}  	// read the XML file  	while( $data = fread($fp, 4096) ) 	{ 		if( !xml_parse($xml_parser, $data, feof($fp)) ) 		{ 			die( sprintf("XML error: %s at line %d\n", 				xml_error_string(xml_get_error_code($xml_parser)), 				xml_get_current_line_number($xml_parser))); 		} 	}  	// we're done with the XML file now, so close it  	fclose($fp); }   fwrite( $stderr, "Reading through the All Articles pages\n\n" );  // first read through the All Articles pages  $IsReadingAllArticlesPages = true;  $NextYearToRead = $FirstYearToRead;  $FailedToReadAllArticlesPage = false;  // keep looping through the whole XML file // abort when we've successfully read all of the All Articles pages, // or if we've looped through the whole file without finding the next one  // there's probably a more efficient way than looping repeatedly through the whole XML file, //  but that would probably make the code even more tangled than it is now.  // this looping should be unnecessary, because the All Articles pages should be in order in the XML file, but I'll leave this code as it is for now.  while 	( 		$NextYearToRead <= $FinalYearToRead && 		! $FailedToReadAllArticlesPage 	) { 	fwrite( $stderr, "processing the XML file, year $NextYearToRead \n\n" ); 	 	$SuccessfullyReadOneAllArticlesPage = false;  	ReadOnceThroughTheWholeXMLFile(); 	 	if( ! $SuccessfullyReadOneAllArticlesPage ) 	{ 		$FailedToReadAllArticlesPage = true; 		 		fwrite( $stderr, "error: failed to read the next All Articles page, year $NextYearToRead \n\n" ); 	} }  $IsReadingAllArticlesPages = false;    fwrite( $stderr, "Reading through the Summaries pages\n\n" );  // next read through the summaries pages  $IsReadingSummariesPages = true;  $NextYearToRead = $FirstYearToRead;  $FailedToReadSummariesPage = false;  // keep looping through the whole XML file // abort when we've successfully read all of the Summaries pages, // or if we've looped through the whole file without finding the next one  // there's probably a more efficient way than looping repeatedly through the whole XML file, //  but that would probably make the code even more tangled than it is now.  // this looping should be unnecessary, because the Summaries pages should be in order in the XML file, but I'll leave this code as it is for now.  while 	( 		$NextYearToRead <= $FinalYearToRead && 		! $FailedToReadSummariesPage 	) { 	fwrite( $stderr, "processing the XML file for summaries, year $NextYearToRead \n\n" ); 	 	$SuccessfullyReadOneSummariesPage = false;  	ReadOnceThroughTheWholeXMLFile(); 	 	if( ! $SuccessfullyReadOneSummariesPage ) 	{ 		$FailedToReadSummariesPage = true; 		 		fwrite( $stderr, "error: failed to read the next summaries page, year $NextYearToRead \n\n" ); 	} }  $IsReadingSummariesPages = false;    fwrite( $stderr, "marking all concepts as not found yet\n\n" );  // now read through the array of concepts, and mark them all as unfound  foreach( $ArrayAllArticleIndexedConcepts as $key => $val ) { 	foreach( $val as $key2 => $val2 ) 	{ 		$ConceptFound[$val2] = false; 	} }    fwrite( $stderr, "Reading through the Concept pages\n\n" );  // next read through the Concept pages  $IsReadingConceptPages     = true;  $FailedToReadConceptPage = false;  // just read once through the whole XML file // there's no need to read through the concept pages in any particular order  ReadOnceThroughTheWholeXMLFile();  fwrite( $stderr, "Pages read: $PagesRead\n" );  if( ! $SuccessfullyReadOneConceptPage ) { 	$FailedToReadConceptPage = true;  	fwrite( $stderr, "error: failed to read any Concept page \n\n" ); }    fwrite( $stderr, "Doing final processing, step 1 of 3...\n\n" );   // now go through the $ConceptFound array  // for any concept that doesn't already have a wikipage, output a template for a blank page, with the "Blog posts" section filled in with all of the blog posts that mention this topic   if( false ) { 	// this section was added just to clean up the pages that use the old OB link for Eliezer's post instead of the new LW link 	// disabled now because it gives too many false positives 	fwrite( $fp, "\n\n==The following concept pages link to OvercomingBias.com articles:==\n\n" );  	foreach( $PagesWithOvercomingBiasLinks as $key => $val ) 	{ 		fwrite( $fp, "*[[$key]]\n" ); 	} }    fwrite( $fp, "\n\n==The following concept pages have comments:==\n\n" );  foreach( $PagesWithComments as $key => $val ) { 	fwrite( $fp, "*[[$key]]\n" ); }    // this section was added just to clean up the pages that use the old OB link for Eliezer's post instead of the new LW link // disabled now because it gives too many false positives fwrite( $fp, "\n\n==The following concept pages have the \"Overcoming Bias Articles\" header:==\n\n" );  foreach( $PagesWithOvercomingBiasArticlesHeader as $key => $val ) { 	fwrite( $fp, "*[[$key]]\n" ); }    fwrite( $fp, "\n\n==The following concept pages have \"External references\" instead of \"References\":==\n\n" );  foreach( $PagesWithExternalReferences as $key => $val ) { 	fwrite( $fp, "*[[$key]]\n" ); }    fwrite( $fp, "\n\n==The following concept pages have a miscapitalized \"See Also\" header:==\n\n" );  foreach( $PagesWithSeeAlso as $key => $val ) { 	fwrite( $fp, "*[[$key]]\n" ); }    fwrite( $fp, "\n\n==The following concept pages have an author link that links to an external site:==\n\n" );  foreach( $PagesWithExternalAuthorLinks as $key => $val ) { 	fwrite( $fp, "*[[$key]]\n" ); }    fwrite( $fp, "\n\n==The following concept pages have an extra newline after the wikilink template:==\n\n" );  foreach( $PagesWithNewlineAfterWikiLink as $key => $val ) { 	fwrite( $fp, "*[[$key]]\n" ); }    fwrite( $fp, "\n\n==The following concept pages have the See Also section before the Blog Posts section:==\n\n" );  foreach( $PagesWithSeeAlsoBeforeBlogPosts as $key => $val ) { 	fwrite( $fp, "*[[$key]]\n" ); }   // disabled because of a weird bug that was introduced when I added the "see also" checking /* fwrite( $fp, "\n\n==The following article links are missing the / at the end, or aren't in the index:==\n\n" );  $PreviousConcept = "";  foreach( $ArticleLinksWithoutEndingSlash as $key => $val ) { 	if( strcmp( $PreviousConcept, $ArticleLinksWithoutEndingSlashConcept[$key] ) != 0 ) 	{ 		fwrite( $fp, "\n*[[$ArticleLinksWithoutEndingSlashConcept[$key]]]\n" ); 	} 	fwrite( $fp, "*$val\n" ); 	 	$PreviousConcept = $ArticleLinksWithoutEndingSlashConcept[$key]; } */   fwrite( $fp, "\n\n==The following article links have a wrong or improperly formatted title:==\n\n" );  $PreviousConcept = "";  foreach( $ArticleLinksWithWrongTitle as $key => $val ) { 	if( strcmp( $PreviousConcept, $ArticleLinksWithWrongTitleConcept[$key] ) != 0 ) 	{ 		fwrite( $fp, "\n*[[$ArticleLinksWithWrongTitleConcept[$key]]]\n" ); 	} 	fwrite( $fp, "*$val\n" ); 	 	$PreviousConcept = $ArticleLinksWithWrongTitleConcept[$key]; }   fwrite( $fp, "\n\n==The following article links have a summary available that was not added to the page:==\n\n" );  $PreviousConcept = "";  foreach( $ArticleLinksWithAvailableSummary as $key => $val ) { 	if( strcmp( $PreviousConcept, $ArticleLinksWithAvailableSummaryConcept[$key] ) != 0 ) 	{ 		fwrite( $fp, "\n*[[$ArticleLinksWithAvailableSummaryConcept[$key]]]\n" ); 	} 	fwrite( $fp, "*$val\n" ); 	 	$PreviousConcept = $ArticleLinksWithAvailableSummaryConcept[$key]; }      if( false ) { 	/// this section was disabled because there were way too many false positives 	fwrite( $fp, "\n\n==The following article links have a missing or improperly formatted author:==\n\n" );  	$PreviousConcept = "";  	foreach( $ArticleLinksWithoutAuthor as $key => $val ) 	{ 		if( strcmp( $PreviousConcept, $ArticleLinksWithoutAuthorConcept[$key] ) != 0 ) 		{ 			fwrite( $fp, "\n*[[$ArticleLinksWithoutAuthorConcept[$key]]]\n" ); 		} 		fwrite( $fp, "*$val\n" );  		$PreviousConcept = $ArticleLinksWithoutAuthorConcept[$key]; 	} }    fwrite( $fp, "\n\n==The following concepts don't have wikipages with links to LessWrong.com articles yet:==\n\n" );  foreach( $ConceptFound as $key => $val ) { 	if( ! $val ) 	{ 		fwrite( $fp, "*[[$key]]\n" ); 	} }    fwrite( $fp, "\n\n==The following concepts are not in the All Articles pages:==\n\n" );  foreach( $ConceptNotInIndex as $key => $val ) { 	fwrite( $fp, "*[[$key]]\n" ); }    fwrite( $fp, "\n\n==The following concepts are in the All Articles page, but are redirects:==\n\n" );  foreach( $ConceptThatAreRedirects as $key => $val ) { 	fwrite( $fp, "*[[$key]]\n" ); }     fwrite( $stderr, "Doing final processing, step 2 of 3...\n\n" );    // now output the list of which articles in the All Articles index are missing an entry:  fwrite( $fp, "\n\n==The following articles in the [[Less Wrong/All Articles|All Articles]] index are missing an entry:==\n" );  foreach( $ArrayAllArticleNotIndexedConcepts as $key => $val ) { 	if( count( $val ) > 0 ) 	{ 		fwrite( $fp, "\n*[$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]] is missing the following concepts:\n" ); 		 		foreach( $val as $key2 => $val2 ) 		{ 			fwrite( $fp, "**[[$val2]]\n" ); 		} 	} }    fwrite( $stderr, "Doing final processing, step 3 of 3...\n\n" );   // now output the list of which article links need to be added to the concept pages:  fwrite( $fp, "\n\n==The following article links need to be added to the concept pages:==\n" );  // for each concept foreach( $ConceptFound as $key => $val ) { 	 	// make an array to store the missing links 	$MissingLinks = array();  	// for each article 	foreach( $ArrayAllArticleIndexedConcepts as $key2 => $val2 ) 	{ 		// check if the article contains the concept 		if( array_search($key, $val2) ) 		{ 			//fwrite( $stderr, "concept: $key \n link: $ArrayAllArticleLink[$key2] \n title: $ArrayAllArticleTitle[$key2]\n\n" ); 			 			// check if the concept's link was not found 			if( ! array_key_exists($key, $ArrayAllArticleFoundConcepts[$key2]) ) 			{ 				// add it to the list of concepts whose links were not found 				$MissingLinks[] = "**[$ArrayAllArticleLink[$key2] $ArrayAllArticleTitle[$key2]] by $ArrayAllArticleAuthor[$key2]"; 				 				//fwrite( $stderr, "concept: $key \n link: $ArrayAllArticleLink[$key2] \n title: $ArrayAllArticleTitle[$key2]\n\n" ); 			} 		} 	} 	 	// if there are any not found article links, then output the list 	if( count($MissingLinks) > 0 ) 	{ 		fwrite( $fp, "\n*[[$key]] is missing the following article links:\n" ); 		 		foreach( $MissingLinks as $key3 => $val3 ) 		{ 			fwrite( $fp, "$val3\n" ); 		} 	} }   // now output the list of which article links need to be added to the concept pages:  fwrite( $fp, "\n\n==The following See Also links only go one way:==\n" );  // for each concept foreach( $ConceptSeeAlso as $key => $val ) { 	foreach( $val as $key2 => $val2 ) 	{ 		$Concept1 = $key; 		$Concept2 = $val2; 		 		$MatchFound = false;  		foreach( $ConceptSeeAlso as $key3 => $val3 ) 		{ 			foreach( $val3 as $key4 => $val4 ) 			{ 				$Concept3 = $key3; 				$Concept4 = $val4; 				 				if( $Concept1 == $Concept4 && $Concept2 == $Concept3 ) 				{ 					$MatchFound = true; 				} 			} 		} 		 		if( !$MatchFound ) 		{ 			fwrite( $fp, "\n*[[$Concept1]] -> [[$Concept2]]" ); 		} 	} }    fwrite( $fp, "\n\n==The following is a list of all concept pages:==\n\n" );  $SortedConcepts = $ConceptFound;  ksort($SortedConcepts);  foreach( $SortedConcepts as $key => $val ) { 	if( $val ) 	{ 		fwrite( $fp, "*[[$key]]\n" ); 	} }   fwrite( $fp, "\n\n==Links to the All Articles pages:==\n" ); fwrite( $fp, "*[[Less Wrong/All Articles]]\n" ); fwrite( $fp, "*[[Less Wrong/2006 Articles]]\n" ); fwrite( $fp, "*[[Less Wrong/2007 Articles]]\n" ); fwrite( $fp, "*[[Less Wrong/2008 Articles]]\n" ); fwrite( $fp, "*[[Less Wrong/2009 Articles]]\n" ); fwrite( $fp, "*[[Less Wrong/2010 Articles]]\n" );   fwrite( $fp, "\n\n==Links to the Summaries pages:==\n" ); fwrite( $fp, "*[[Less Wrong/Article summaries]]\n" ); fwrite( $fp, "*[[Less Wrong/2006 Articles/Summaries]]\n" ); fwrite( $fp, "*[[Less Wrong/2007 Articles/Summaries]]\n" ); fwrite( $fp, "*[[Less Wrong/2008 Articles/Summaries]]\n" ); fwrite( $fp, "*[[Less Wrong/2009 Articles/Summaries]]\n" ); fwrite( $fp, "*[[Less Wrong/2010 Articles/Summaries]]\n" );    fclose($fp);   $fp = fopen('ConceptGraph.dot.txt', 'w');  //{_COPYBLOCK1 fwrite($fp, "digraph G {\n\nnode [fontsize=\"$fontsize\"]\n\n");  // output the data for each concept foreach( $ConceptSeeAlso as $key => $val ) { 	$curLine = ""; 	 	$NameWithSpaces = $key; 	$NameWithoutSpaces = str_replace(" ", "_", $NameWithSpaces);  	$curName  = "\"" . $NameWithoutSpaces . "\""; 	$curName  = str_pad( $curName, $PaddingValue1 ); 	$curLine .= $curName;  	$curLine .= "[label=\"";  	$curName  = $NameWithSpaces; 	$curName .= "\""; 	$curName  = str_pad( $curName, $PaddingValue2 ); 	$curLine .= $curName;  //	$curLine .= ", color="; // //	$curLine .= $NewArrayScenarioOutlineColors[$key]; // //	$curLine .= ", shape="; // //	$curLine .= $NewArrayScenarioShapes[$key]; // //	$curLine .= ", style="; // //	$curLine .= $NewArrayScenarioStyles[$key]; // //	$curLine .= ", fillcolor=\"#"; // //	$curLine .= $NewArrayScenarioFillColors[$key]; // //	$curLine .= "\"";  	//if( strcmp($NewArrayScenarioNamesWithoutSpaces[$key], $ArrayScenarioNamesWithoutSpaces[$basekey]) == 0 ) 	//{ 	//	$curLine .= ", peripheries=3"; 	//}  	$newURL = "http://wiki.lesswrong.com/wiki/" . str_replace(" ", "_", $NameWithSpaces);  	$curLine .= ", URL=\"$newURL\"";  	$curLine .= "];\n";  	fwrite($fp, $curLine); }  fwrite($fp, "\n\n");  // output the data for the edges foreach( $ConceptSeeAlso as $key => $val ) { 	foreach( $val as $key2 => $val2 ) 	{ 		$curLine = "";  		$Name1WithSpaces = $key; 		$Name1WithoutSpaces = str_replace(" ", "_", $Name1WithSpaces);  		$Name2WithSpaces = $val2; 		$Name2WithoutSpaces = str_replace(" ", "_", $Name2WithSpaces);  		$curName  = "\"" . $Name1WithoutSpaces . "\""; 		$curName  = str_pad( $curName, $PaddingValue1 ); 		$curLine .= $curName;  		$curLine .= "-> ";  		$curLine .= "\"" . $Name2WithoutSpaces . "\"";  		$curLine .= "\n";  		fwrite($fp, $curLine);  	} }  fwrite($fp, "\n\n}");  //}_COPYBLOCK1   fclose($fp);    $fp = fopen('AllArticles.txt', 'w');   fwrite( $stderr, "Outputting new All Articles page...\n\n" );   $ProgressCounter = 0;  foreach( $ArrayAllArticleTitle as $key => $val ) { 	$ConceptString = ""; 	 	$TempConceptArray = array();  	foreach( $ArrayAllArticleIndexedConcepts[$key] as $key2 => $val2 ) 	{ 		if( ! in_array($val2, $TempConceptArray) ) 		{ 			$ConceptString .= "[[$val2]], "; 			$TempConceptArray[] = $val2; 		} 	} 	 	foreach( $ArrayAllArticleNotIndexedConcepts[$key] as $key2 => $val2 ) 	{ 		if( ! in_array($val2, $TempConceptArray) ) 		{ 			$ConceptString .= "[[$val2]], "; 			$TempConceptArray[] = $val2; 		} 	} 	 	if( strlen($ConceptString) > 2 ) 	{ 		$ConceptString = substr($ConceptString, 0, -2); 	}  	fwrite( $fp, "|-valign=\"top\"\n" ); 	fwrite( $fp, "| [$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]]\n" ); 	fwrite( $fp, "| $ConceptString\n" ); 	fwrite( $fp, "| $ArrayAllArticleAuthor[$key]\n" );  	if( $ProgressCounter % 100 == 0 ) 	{ 		//fwrite( $stderr, "Progress: $ProgressCounter\n" ); 	} 	 	$ProgressCounter++; }  fwrite( $stderr, "Progress: $ProgressCounter\n" );    fclose($fp);     $fp = fopen('ArticleSummaries.txt', 'w');   fwrite( $stderr, "Outputting article summaries...\n\n" );   $ProgressCounter = 0;  foreach( $ArrayAllArticleTitle as $key => $val ) { 	$ConceptString = ""; 	 	$TempConceptArray = array();  	$IsFirstIteration = true;  	foreach( $ArrayAllArticleUsedSummaries[$key] as $key2 => $val2 ) 	{ 		if( !$IsFirstIteration ) 		{ 			fwrite( $fp, "\n\n(alternate summary:)\n\n" ); 		} 		else 		{ 			fwrite( $fp, "\n\n=====[$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]]=====\n\n" ); 	 			$IsFirstIteration = false; 		} 		 		fwrite( $fp, $val2 ); 	}  	if( $ProgressCounter % 100 == 0 ) 	{ 		//fwrite( $stderr, "Progress: $ProgressCounter\n" ); 	} 	 	$ProgressCounter++; }  fwrite( $stderr, "Progress: $ProgressCounter\n" );    fclose($fp);     $fp = fopen('ArticleSummaries2.txt', 'w');   fwrite( $stderr, "Outputting article summaries...\n\n" );   $ProgressCounter = 0;  foreach( $ArrayAllArticleTitle as $key => $val ) { 	$ConceptString = ""; 	 	$TempConceptArray = array();  	fwrite( $fp, "\n\n=====[$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]]=====\n\n" ); 	 	$IsFirstIteration = true; 	 	if( array_key_exists( $ArrayAllArticleTitle[$key], $ArrayAllArticleOfficialSummaries ) ) 	{ 		foreach( $ArrayAllArticleOfficialSummaries[$ArrayAllArticleTitle[$key]] as $key2 => $val2 ) 		{ 			if( !$IsFirstIteration ) 			{ 				fwrite( $fp, "\n\n(alternate summary:)\n\n" ); 			} 			else 			{ 				$IsFirstIteration = false; 			}  			fwrite( $fp, $val2 ); 		} 	}  	foreach( $ArrayAllArticleUsedSummaries[$key] as $key2 => $val2 ) 	{ 		if( !$IsFirstIteration ) 		{ 			fwrite( $fp, "\n\n(alternate summary:)\n\n" ); 		} 		else 		{ 			$IsFirstIteration = false; 		} 		 		fwrite( $fp, $val2 ); 	}  	if( $ProgressCounter % 100 == 0 ) 	{ 		//fwrite( $stderr, "Progress: $ProgressCounter\n" ); 	} 	 	$ProgressCounter++; }  fwrite( $stderr, "Progress: $ProgressCounter\n" );    fclose($fp);   //*/   fwrite( $stderr, "Processing Recent Post List...\n\n" );    $handle = fopen('http://lesswrong.com/recentposts', 'r');  $RecentPostRawData = "";  while( ( $buf = fread( $handle, 8192 ) ) != '' ) { 	$RecentPostRawData .= $buf; }  if( $buf === FALSE ) { 	return "error reading file"; }  fclose($handle);    $fp = fopen('RecentPosts.txt', 'w');  // read the data from the recent posts page, then output the data in reverse order  $ArrayRecentPostLines = array();   $PostLineStartPos  = strpos( $RecentPostRawData, "<h3>", 0); $PostLineEndPos    = $PostLineStartPos;  $EndPos = strpos( $RecentPostRawData, "<p class=\"nextprev\">View more:", 0);   $NextArticleTitle = ""; $IterationCount = 0; $PastTheEnd = false;   while 	( //		! in_array($NextArticleTitle, $ArrayAllArticleTitle) && 		$IterationCount < 1000 && 		! $PastTheEnd && 		true 	) { 	$PostLineStartPos  = strpos( $RecentPostRawData, "<a href=\"", $PostLineEndPos ) + 9; 	$PostLineEndPos    = strpos( $RecentPostRawData, "\"", $PostLineStartPos );  	$NextArticleLink   = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos );  	$PostLineStartPos  = strpos( $RecentPostRawData, ">", $PostLineEndPos ) + 1; 	$PostLineEndPos    = strpos( $RecentPostRawData, "<", $PostLineStartPos );  	$NextArticleTitle  = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos ); 	$NextArticleTitle  = str_replace(""", "\"", $NextArticleTitle);  	$PostLineStartPos  = strpos( $RecentPostRawData, "<a href=\"", $PostLineEndPos ) + 9; 	$PostLineEndPos    = strpos( $RecentPostRawData, "\"", $PostLineStartPos );  	$NextArticleAuthorLink = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos );  	$PostLineStartPos  = strpos( $RecentPostRawData, ">", $PostLineEndPos ) + 1; 	$PostLineEndPos    = strpos( $RecentPostRawData, "<", $PostLineStartPos );  	$NextArticleAuthorName = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos ); 	 	 	if( $PostLineStartPos > $EndPos ) 	{ 		$PastTheEnd = true; 	} 	else 	{ 		$CurrentLineString = "";   		$CurrentLineString .= "|-valign=\"top\"\n"; 		$CurrentLineString .= "| [http://lesswrong.com$NextArticleLink $NextArticleTitle]\n"; 		$CurrentLineString .= "| \n"; 		$CurrentLineString .= "| [$NextArticleAuthorLink $NextArticleAuthorName]\n";  		//fwrite( $fp, $CurrentLineString );  		$ArrayRecentPostLines[$IterationCount] = $CurrentLineString;  		$IterationCount++; 	} }  //fwrite( $fp, "\n\n\n\n\n\n\n\n----------------------------------------\n\n\n\n\n\n\n\n" );   for( $index = $IterationCount-1; $index >= 0; $index-- ) //foreach( $ArrayRecentPostLines as $key => $val ) { 	fwrite( $fp, $ArrayRecentPostLines[$index] ); }    fwrite( $stderr, "Done" );   fclose($fp);   if( $Debug ) { 	if( true ) 	{ 		$fp = fopen('Debug.txt', 'w');  		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ConceptFound \n\n\n\n\n\n\n\n\n\n" );  		foreach( $ConceptFound as $key => $val ) { $data = $key . "\n" . print_r($val, true) . "\n"; fwrite( $fp, $data ); }  		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleIndexedConcepts \n\n\n\n\n\n\n\n\n\n" );  		foreach( $ArrayAllArticleIndexedConcepts as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }  		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleFoundConcepts \n\n\n\n\n\n\n\n\n\n" );  		foreach( $ArrayAllArticleFoundConcepts as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }  		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleNotIndexedConcepts \n\n\n\n\n\n\n\n\n\n" );  		foreach( $ArrayAllArticleNotIndexedConcepts as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }  		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ConceptSeeAlso \n\n\n\n\n\n\n\n\n\n" );  		foreach( $ConceptSeeAlso as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }  		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleOfficialSummaries \n\n\n\n\n\n\n\n\n\n" );  		foreach( $ArrayAllArticleOfficialSummaries as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }  		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleUsedSummaries \n\n\n\n\n\n\n\n\n\n" );  		foreach( $ArrayAllArticleUsedSummaries as $key => $val ) { $data = $ArrayAllArticleTitle[$key] . "\n" . print_r($val, true); fwrite( $fp, $data ); }   		fclose($fp); 	} }    fclose($stderr);    //

?>