1 | <?php |
---|
2 | // Links weblogs.com grabber |
---|
3 | // Copyright (C) 2003 Mike Little -- [email protected] |
---|
4 | |
---|
5 | // Get the path of our parent directory: |
---|
6 | $parentpath = dirname(dirname(__FILE__)); |
---|
7 | |
---|
8 | require_once($parentpath.'/wp-config.php'); |
---|
9 | |
---|
10 | // globals to hold state |
---|
11 | $updated_timestamp = 0; |
---|
12 | $all_links = array(); |
---|
13 | |
---|
14 | /** |
---|
15 | ** preload_links() |
---|
16 | ** Pre-load the visible, non-blank, links into an associative array $all_links |
---|
17 | ** key is url, value is array of link_id and update_time |
---|
18 | ** Note: update time is initialised to 0. That way we only have to update (in |
---|
19 | ** the db) the ones which have been updated (on weblogs.com). |
---|
20 | **/ |
---|
21 | function preload_links() { |
---|
22 | global $all_links, $wpdb; |
---|
23 | $links = $wpdb->get_results("SELECT link_id, link_url FROM $wpdb->links WHERE link_visible = 'Y' AND link_url <> ''"); |
---|
24 | foreach ($links as $link) { |
---|
25 | $link_url = transform_url($link->link_url); |
---|
26 | $all_links[$link_url] = array($link->link_id, 0); |
---|
27 | } |
---|
28 | } |
---|
29 | |
---|
30 | /** |
---|
31 | ** update_links() |
---|
32 | ** Update in the db the links which have been updated ($all_links[url][1] != 0) |
---|
33 | **/ |
---|
34 | function update_links() { |
---|
35 | global $all_links, $wpdb; |
---|
36 | reset($all_links); |
---|
37 | while (list($id, $val) = each($all_links)) { |
---|
38 | if ($val[1]) { |
---|
39 | $wpdb->query("UPDATE $wpdb->links SET link_updated = '$val[1]' WHERE link_id = $val[0]"); |
---|
40 | } |
---|
41 | } // end while |
---|
42 | } |
---|
43 | |
---|
44 | /** |
---|
45 | ** get_weblogs_updatedfile() |
---|
46 | ** Retrieves and caches a copy of the weblogs.com changed blogs xml file. |
---|
47 | ** If the file exists check it's age, get new copy if old. |
---|
48 | ** If a new or updated file has been written return true (needs processing) |
---|
49 | ** otherwise return false (nothing to do) |
---|
50 | **/ |
---|
51 | function get_weblogs_updatedfile() { |
---|
52 | global $ignore_weblogs_cache; |
---|
53 | $update = false; |
---|
54 | $file = ABSPATH . 'wp-content/links-update-cache.xml'; |
---|
55 | if ($ignore_weblogs_cache) { |
---|
56 | $update = true; |
---|
57 | } else { |
---|
58 | if (file_exists($file)) { |
---|
59 | // is it old? |
---|
60 | $modtime = filemtime($file); |
---|
61 | if ((time() - $modtime) > (get_settings('weblogs_cacheminutes') * 60)) { |
---|
62 | $update = true; |
---|
63 | } |
---|
64 | } else { // doesn't exist |
---|
65 | $update = true; |
---|
66 | } |
---|
67 | } |
---|
68 | |
---|
69 | if ($update) { |
---|
70 | // get a new copy |
---|
71 | $a = @file(get_settings('weblogs_xml_url')); |
---|
72 | if ($a != false && count($a) && $a[0]) { |
---|
73 | $contents = implode('', $a); |
---|
74 | |
---|
75 | // Clean up the input, because weblogs.com doesn't output clean XML |
---|
76 | $contents = preg_replace("/'/",''',$contents); |
---|
77 | $contents = preg_replace('|[^[:space:][:punct:][:alpha:][:digit:]]|','',$contents); |
---|
78 | |
---|
79 | $cachefp = fopen(ABSPATH . 'wp-content/links-update-cache.xml', "w"); |
---|
80 | fwrite($cachefp, $contents); |
---|
81 | fclose($cachefp); |
---|
82 | } else { |
---|
83 | return false; //don't try to process |
---|
84 | } |
---|
85 | } |
---|
86 | return $update; |
---|
87 | } |
---|
88 | |
---|
89 | /** |
---|
90 | ** startElement() |
---|
91 | ** Callback function. Called at the start of a new xml tag. |
---|
92 | **/ |
---|
93 | function startElement($parser, $tagName, $attrs) { |
---|
94 | global $updated_timestamp, $all_links; |
---|
95 | if ($tagName == 'WEBLOGUPDATES') { |
---|
96 | //convert 'updated' into php date variable |
---|
97 | $updated_timestamp = strtotime($attrs['UPDATED']); |
---|
98 | //echo('got timestamp of ' . gmdate('F j, Y, H:i:s', $updated_timestamp) . "\n"); |
---|
99 | } else if ($tagName == 'WEBLOG') { |
---|
100 | // is this url in our links? |
---|
101 | $link_url = transform_url($attrs['URL']); |
---|
102 | if (isset($all_links[$link_url])) { |
---|
103 | $all_links[$link_url][1] = date('YmdHis', $updated_timestamp - $attrs['WHEN']); |
---|
104 | //echo('set link id ' . $all_links[$link_url][0] . ' to date ' . $all_links[$link_url][1] . "\n"); |
---|
105 | } |
---|
106 | } |
---|
107 | } |
---|
108 | |
---|
109 | /** |
---|
110 | ** endElement() |
---|
111 | ** Callback function. Called at the end of an xml tag. |
---|
112 | **/ |
---|
113 | function endElement($parser, $tagName) { |
---|
114 | // nothing to do. |
---|
115 | } |
---|
116 | |
---|
117 | /** |
---|
118 | ** transform_url() |
---|
119 | ** Transforms a url to a minimal identifier. |
---|
120 | ** |
---|
121 | ** Remove www, remove index.* or default.*, remove |
---|
122 | ** trailing slash |
---|
123 | **/ |
---|
124 | function transform_url($url) { |
---|
125 | //echo("transform_url(): $url "); |
---|
126 | $url = str_replace('www.', '', $url); |
---|
127 | $url = str_replace('WWW.', '', $url); |
---|
128 | $url = preg_replace('/(?:index|default)\.[a-z]{2,}/i', '', $url); |
---|
129 | if (substr($url, -1, 1) == '/') { |
---|
130 | $url = substr($url, 0, -1); |
---|
131 | } |
---|
132 | //echo(" now equals $url\n"); |
---|
133 | return $url; |
---|
134 | } // end transform_url |
---|
135 | |
---|
136 | // get/update the cache file. |
---|
137 | // true return means new copy |
---|
138 | if (get_weblogs_updatedfile()) { |
---|
139 | //echo('<pre>'); |
---|
140 | // pre-load the links |
---|
141 | preload_links(); |
---|
142 | |
---|
143 | // Create an XML parser |
---|
144 | $xml_parser = xml_parser_create(); |
---|
145 | |
---|
146 | // Set the functions to handle opening and closing tags |
---|
147 | xml_set_element_handler($xml_parser, "startElement", "endElement"); |
---|
148 | |
---|
149 | // Open the XML file for reading |
---|
150 | $fp = fopen(ABSPATH . 'wp-content/links-update-cache.xml', "r") |
---|
151 | or die("Error reading XML data."); |
---|
152 | |
---|
153 | // Read the XML file 16KB at a time |
---|
154 | while ($data = fread($fp, 16384)) { |
---|
155 | // Parse each 4KB chunk with the XML parser created above |
---|
156 | xml_parse($xml_parser, $data, feof($fp)) |
---|
157 | or die(sprintf("XML error: %s at line %d", |
---|
158 | xml_error_string(xml_get_error_code($xml_parser)), |
---|
159 | xml_get_current_line_number($xml_parser))); |
---|
160 | } |
---|
161 | |
---|
162 | // Close the XML file |
---|
163 | fclose($fp); |
---|
164 | |
---|
165 | // Free up memory used by the XML parser |
---|
166 | xml_parser_free($xml_parser); |
---|
167 | |
---|
168 | // now update the db with latest times |
---|
169 | update_links(); |
---|
170 | |
---|
171 | //echo('</pre>'); |
---|
172 | } // end if updated cache file |
---|
173 | |
---|
174 | ?> |
---|