WordPress/wp-includes/links-update-xml.php

<?php
// Links weblogs.com grabber
// Copyright (C) 2003 Mike Little -- mike@zed1.com

// Get the path of our parent directory:
$parentpath = dirname(dirname(__FILE__));

require_once($parentpath.'/wp-config.php');

// globals to hold state
$updated_timestamp = 0;
$all_links = array();

/**
 ** preload_links()
 ** Pre-load the visible, non-blank, links into an associative array $all_links
 ** key is url, value is array of link_id and update_time
 ** Note: update time is initialised to 0. That way we only have to update (in
 ** the db) the ones which have been updated (on weblogs.com).
 **/
function preload_links() {
	global $all_links, $wpdb;
	$links = $wpdb->get_results("SELECT link_id, link_url FROM $wpdb->links WHERE link_visible = 'Y' AND link_url <> ''");
	foreach ($links as $link) {
		$link_url = transform_url($link->link_url);
		$all_links[$link_url] = array($link->link_id, 0);
	}
}

/**
 ** update_links()
 ** Update in the db the links which have been updated ($all_links[url][1] != 0)
 **/
function update_links() {
	global $all_links, $wpdb;
	reset($all_links);
	while (list($id, $val) = each($all_links)) {
		if ($val[1]) {
			$wpdb->query("UPDATE $wpdb->links SET link_updated = '$val[1]' WHERE link_id = $val[0]");
		}
	} // end while
}

/**
 ** get_weblogs_updatedfile()
 ** Retrieves and caches a copy of the weblogs.com changed blogs xml file.
 ** If the file exists check it's age, get new copy if old.
 ** If a new or updated file has been written return true (needs processing)
 ** otherwise return false (nothing to do)
 **/
function get_weblogs_updatedfile() {
	global $ignore_weblogs_cache;
	$update = false;
	$file = ABSPATH . 'wp-content/links-update-cache.xml';
	if ($ignore_weblogs_cache) {
		$update = true;
	} else {
		if (file_exists($file)) {
			// is it old?
			$modtime = filemtime($file);
			if ((time() - $modtime) > (1.5 * 60)) {
				$update = true;
			}
		} else { // doesn't exist
			$update = true;
		}
	}

	if ($update) {
		// get a new copy
		$a = @file(get_settings('weblogs_xml_url'));
		if ($a != false && count($a) && $a[0]) {
			$contents = implode('', $a);

			// Clean up the input, because weblogs.com doesn't output clean XML	
			$contents = preg_replace("/'/",'&#39;',$contents);
			$contents = preg_replace('|[^[:space:][:punct:][:alpha:][:digit:]]|','',$contents);

			$cachefp = fopen(ABSPATH . 'wp-content/links-update-cache.xml', "w");
			fwrite($cachefp, $contents);
			fclose($cachefp);
		} else {
			return false; //don't try to process
		}
	}
	return $update;
}

/**
 ** startElement()
 ** Callback function. Called at the start of a new xml tag.
 **/
function startElement($parser, $tagName, $attrs) {
	global $updated_timestamp, $all_links;
	if ($tagName == 'WEBLOGUPDATES') {
		//convert 'updated' into php date variable
		$updated_timestamp = strtotime($attrs['UPDATED']);
		//echo('got timestamp of ' . gmdate('F j, Y, H:i:s', $updated_timestamp) . "\n");
	} else if ($tagName == 'WEBLOG') {
		// is this url in our links?
		$link_url = transform_url($attrs['URL']);
		if (isset($all_links[$link_url])) {
			$all_links[$link_url][1] = date('YmdHis', $updated_timestamp - $attrs['WHEN']);
			//echo('set link id ' . $all_links[$link_url][0] . ' to date ' . $all_links[$link_url][1] . "\n");
		}
	}
}

/**
 ** endElement()
 ** Callback function. Called at the end of an xml tag.
 **/
function endElement($parser, $tagName) {
	// nothing to do.
}

/**
 ** transform_url()
 ** Transforms a url to a minimal identifier.
 **
 ** Remove www, remove index.* or default.*, remove
 ** trailing slash
 **/
function transform_url($url) {
	//echo("transform_url(): $url ");
	$url = str_replace('www.', '', $url);
	$url = str_replace('WWW.', '', $url);
	$url = preg_replace('/(?:index|default)\.[a-z]{2,}/i', '', $url);
	if (substr($url, -1, 1) == '/') {
		$url = substr($url, 0, -1);
	}
	//echo(" now equals $url\n");
	return $url;
} // end transform_url

// get/update the cache file.
// true return means new copy
if (get_weblogs_updatedfile()) {
	//echo('<pre>');
	// pre-load the links
	preload_links();

	// Create an XML parser
	$xml_parser = xml_parser_create();

	// Set the functions to handle opening and closing tags
	xml_set_element_handler($xml_parser, "startElement", "endElement");

	// Open the XML file for reading
	$fp = fopen(ABSPATH . 'wp-content/links-update-cache.xml', "r")
		  or die("Error reading XML data.");

	// Read the XML file 16KB at a time
	while ($data = fread($fp, 16384)) {
		// Parse each 4KB chunk with the XML parser created above
		xml_parse($xml_parser, $data, feof($fp))
				or die(sprintf("XML error: %s at line %d",
					xml_error_string(xml_get_error_code($xml_parser)),
					xml_get_current_line_number($xml_parser)));
	}

	// Close the XML file
	fclose($fp);

	// Free up memory used by the XML parser
	xml_parser_free($xml_parser);

	// now update the db with latest times
	update_links();

	//echo('</pre>');
} // end if updated cache file

?>
Moved wp-links files to wp-includes or wp-images where appropiate. Updated necessary paths. git-svn-id: http://svn.automattic.com/wordpress/trunk@630 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2003-12-18 11:12:34 +01:00			`<?php`
			`// Links weblogs.com grabber`
			`// Copyright (C) 2003 Mike Little -- mike@zed1.com`

Fixed include path problem for links-update-xml git-svn-id: http://svn.automattic.com/wordpress/trunk@737 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2004-01-07 22:02:26 +01:00			`// Get the path of our parent directory:`
			`$parentpath = dirname(dirname(__FILE__));`

			`require_once($parentpath.'/wp-config.php');`
Moved wp-links files to wp-includes or wp-images where appropiate. Updated necessary paths. git-svn-id: http://svn.automattic.com/wordpress/trunk@630 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2003-12-18 11:12:34 +01:00
			`// globals to hold state`
			`$updated_timestamp = 0;`
			`$all_links = array();`

			`/**`
			`** preload_links()`
			`** Pre-load the visible, non-blank, links into an associative array $all_links`
			`** key is url, value is array of link_id and update_time`
			`** Note: update time is initialised to 0. That way we only have to update (in`
			`** the db) the ones which have been updated (on weblogs.com).`
			`**/`
			`function preload_links() {`
Giant commit, sorry mailing list people. Move all table names to new $wpdb versions. Works but the whole app needs thorough testing now. git-svn-id: http://svn.automattic.com/wordpress/trunk@1355 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2004-05-24 10:22:18 +02:00			`global $all_links, $wpdb;`
			`$links = $wpdb->get_results("SELECT link_id, link_url FROM $wpdb->links WHERE link_visible = 'Y' AND link_url <> ''");`
Moved wp-links files to wp-includes or wp-images where appropiate. Updated necessary paths. git-svn-id: http://svn.automattic.com/wordpress/trunk@630 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2003-12-18 11:12:34 +01:00			`foreach ($links as $link) {`
			`$link_url = transform_url($link->link_url);`
			`$all_links[$link_url] = array($link->link_id, 0);`
			`}`
			`}`

			`/**`
			`** update_links()`
			`** Update in the db the links which have been updated ($all_links[url][1] != 0)`
			`**/`
			`function update_links() {`
Giant commit, sorry mailing list people. Move all table names to new $wpdb versions. Works but the whole app needs thorough testing now. git-svn-id: http://svn.automattic.com/wordpress/trunk@1355 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2004-05-24 10:22:18 +02:00			`global $all_links, $wpdb;`
Moved wp-links files to wp-includes or wp-images where appropiate. Updated necessary paths. git-svn-id: http://svn.automattic.com/wordpress/trunk@630 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2003-12-18 11:12:34 +01:00			`reset($all_links);`
			`while (list($id, $val) = each($all_links)) {`
			`if ($val[1]) {`
Giant commit, sorry mailing list people. Move all table names to new $wpdb versions. Works but the whole app needs thorough testing now. git-svn-id: http://svn.automattic.com/wordpress/trunk@1355 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2004-05-24 10:22:18 +02:00			`$wpdb->query("UPDATE $wpdb->links SET link_updated = '$val[1]' WHERE link_id = $val[0]");`
Moved wp-links files to wp-includes or wp-images where appropiate. Updated necessary paths. git-svn-id: http://svn.automattic.com/wordpress/trunk@630 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2003-12-18 11:12:34 +01:00			`}`
			`} // end while`
			`}`

			`/**`
			`** get_weblogs_updatedfile()`
			`** Retrieves and caches a copy of the weblogs.com changed blogs xml file.`
			`** If the file exists check it's age, get new copy if old.`
			`** If a new or updated file has been written return true (needs processing)`
			`** otherwise return false (nothing to do)`
			`**/`
			`function get_weblogs_updatedfile() {`
Lots of fixes. git-svn-id: http://svn.automattic.com/wordpress/trunk@714 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2004-01-04 08:40:15 +01:00			`global $ignore_weblogs_cache;`
Moved wp-links files to wp-includes or wp-images where appropiate. Updated necessary paths. git-svn-id: http://svn.automattic.com/wordpress/trunk@630 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2003-12-18 11:12:34 +01:00			`$update = false;`
Don't need slash at the beginning. git-svn-id: http://svn.automattic.com/wordpress/trunk@1225 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2004-05-05 08:44:59 +02:00			`$file = ABSPATH . 'wp-content/links-update-cache.xml';`
Moved wp-links files to wp-includes or wp-images where appropiate. Updated necessary paths. git-svn-id: http://svn.automattic.com/wordpress/trunk@630 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2003-12-18 11:12:34 +01:00			`if ($ignore_weblogs_cache) {`
			`$update = true;`
			`} else {`
Lots of fixes. git-svn-id: http://svn.automattic.com/wordpress/trunk@714 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2004-01-04 08:40:15 +01:00			`if (file_exists($file)) {`
Moved wp-links files to wp-includes or wp-images where appropiate. Updated necessary paths. git-svn-id: http://svn.automattic.com/wordpress/trunk@630 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2003-12-18 11:12:34 +01:00			`// is it old?`
Lots of fixes. git-svn-id: http://svn.automattic.com/wordpress/trunk@714 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2004-01-04 08:40:15 +01:00			`$modtime = filemtime($file);`
Massive options cleanup and another step of cleaning up the upgrade/install. git-svn-id: http://svn.automattic.com/wordpress/trunk@1599 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2004-09-05 02:24:28 +02:00			`if ((time() - $modtime) > (1.5 * 60)) {`
Moved wp-links files to wp-includes or wp-images where appropiate. Updated necessary paths. git-svn-id: http://svn.automattic.com/wordpress/trunk@630 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2003-12-18 11:12:34 +01:00			`$update = true;`
			`}`
			`} else { // doesn't exist`
			`$update = true;`
			`}`
			`}`

			`if ($update) {`
			`// get a new copy`
			`$a = @file(get_settings('weblogs_xml_url'));`
			`if ($a != false && count($a) && $a[0]) {`
			`$contents = implode('', $a);`

			`// Clean up the input, because weblogs.com doesn't output clean XML`
			`$contents = preg_replace("/'/",''',$contents);`
			`$contents = preg_replace('\|[^[:space:][:punct:][:alpha:][:digit:]]\|','',$contents);`

Don't need slash at the beginning. git-svn-id: http://svn.automattic.com/wordpress/trunk@1225 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2004-05-05 08:44:59 +02:00			`$cachefp = fopen(ABSPATH . 'wp-content/links-update-cache.xml', "w");`
Moved wp-links files to wp-includes or wp-images where appropiate. Updated necessary paths. git-svn-id: http://svn.automattic.com/wordpress/trunk@630 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2003-12-18 11:12:34 +01:00			`fwrite($cachefp, $contents);`
			`fclose($cachefp);`
			`} else {`
			`return false; //don't try to process`
			`}`
			`}`
			`return $update;`
			`}`

			`/**`
			`** startElement()`
			`** Callback function. Called at the start of a new xml tag.`
			`**/`
			`function startElement($parser, $tagName, $attrs) {`
			`global $updated_timestamp, $all_links;`
			`if ($tagName == 'WEBLOGUPDATES') {`
			`//convert 'updated' into php date variable`
			`$updated_timestamp = strtotime($attrs['UPDATED']);`
			`//echo('got timestamp of ' . gmdate('F j, Y, H:i:s', $updated_timestamp) . "\n");`
			`} else if ($tagName == 'WEBLOG') {`
			`// is this url in our links?`
			`$link_url = transform_url($attrs['URL']);`
			`if (isset($all_links[$link_url])) {`
			`$all_links[$link_url][1] = date('YmdHis', $updated_timestamp - $attrs['WHEN']);`
			`//echo('set link id ' . $all_links[$link_url][0] . ' to date ' . $all_links[$link_url][1] . "\n");`
			`}`
			`}`
			`}`

			`/**`
			`** endElement()`
			`** Callback function. Called at the end of an xml tag.`
			`**/`
			`function endElement($parser, $tagName) {`
			`// nothing to do.`
			`}`

			`/**`
			`** transform_url()`
			`** Transforms a url to a minimal identifier.`
			`**`
			`** Remove www, remove index.* or default.*, remove`
			`** trailing slash`
			`**/`
			`function transform_url($url) {`
			`//echo("transform_url(): $url ");`
			`$url = str_replace('www.', '', $url);`
			`$url = str_replace('WWW.', '', $url);`
			`$url = preg_replace('/(?:index\|default)\.[a-z]{2,}/i', '', $url);`
			`if (substr($url, -1, 1) == '/') {`
			`$url = substr($url, 0, -1);`
			`}`
			`//echo(" now equals $url\n");`
			`return $url;`
			`} // end transform_url`

			`// get/update the cache file.`
			`// true return means new copy`
			`if (get_weblogs_updatedfile()) {`
			`//echo('<pre>');`
			`// pre-load the links`
			`preload_links();`

			`// Create an XML parser`
			`$xml_parser = xml_parser_create();`

			`// Set the functions to handle opening and closing tags`
			`xml_set_element_handler($xml_parser, "startElement", "endElement");`

			`// Open the XML file for reading`
Don't need slash at the beginning. git-svn-id: http://svn.automattic.com/wordpress/trunk@1225 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2004-05-05 08:44:59 +02:00			`$fp = fopen(ABSPATH . 'wp-content/links-update-cache.xml', "r")`
Moved wp-links files to wp-includes or wp-images where appropiate. Updated necessary paths. git-svn-id: http://svn.automattic.com/wordpress/trunk@630 1a063a9b-81f0-0310-95a4-ce76da25c4cd 2003-12-18 11:12:34 +01:00			`or die("Error reading XML data.");`

			`// Read the XML file 16KB at a time`
			`while ($data = fread($fp, 16384)) {`
			`// Parse each 4KB chunk with the XML parser created above`
			`xml_parse($xml_parser, $data, feof($fp))`
			`or die(sprintf("XML error: %s at line %d",`
			`xml_error_string(xml_get_error_code($xml_parser)),`
			`xml_get_current_line_number($xml_parser)));`
			`}`

			`// Close the XML file`
			`fclose($fp);`

			`// Free up memory used by the XML parser`
			`xml_parser_free($xml_parser);`

			`// now update the db with latest times`
			`update_links();`

			`//echo('</pre>');`
			`} // end if updated cache file`

			`?>`