= 0) { libxml_disable_entity_loader(true); } $CONFIG=parse_ini_file('vvb.ini') or die('Missing or bad config file vvb.ini'); // Read config file. $CONFIG['ARTICLES_PER_PAGE']=10; $CONFIG['DOWNLOAD_MEDIA_TYPES']=array('jpeg','jpg','gif','png','pdf','txt','odt'); // Media types which will be downloaded. $CONFIG['MEDIA_TO_DOWNLOAD']=array(); // List of media to download in background. // ================================================================================================== /* Callback for the preg_replace_callback() function in remapImageUrls() which remaps URLs to point to local cache. (src=... and href=...) */ function remap_callback($matches) { global $CONFIG; $attr = $matches[1]; $url = $matches[2]; $srchost=parse_url($url,PHP_URL_HOST); if (!mediaAuthorized($url)) { return $attr.'="'.$url.'"'; } // Not authorized: do not remap URL. if (!file_exists('media/'.sanitize($url)) ) { $CONFIG['MEDIA_TO_DOWNLOAD'][] = $url; } // If media not present in the cache, add URL to list of media to download in background. return $attr.'="?m='.$url.'"'; // Return remapped URL. } /* Remaps image URL to point to local cache (src= and href=) eg. src="http://toto.com/..." --> src="?m=http://toto.com/..." */ function remapImageUrls($html) { return preg_replace_callback("@(src|href)=[\"\'](.+?)[\"\']@i",'remap_callback',$html); } /* updateFeed(): Update articles database from a RSS2.0 feed. Articles deleted from the feed are not deleted from the database. You can force the refresh by passing ?force_the_refresh in URL. */ function updateFeed() { global $CONFIG; // Only update feed if last check was > 60 minutes // but you can force it with force_the_refresh in GET parameters. if (@filemtime('store')>time()-(3600) && !isset($_GET['force_the_refresh'])) { return; } // Read database from disk $feed_items=(file_exists('store') ? unserialize(file_get_contents('store')) : array() ); // Read the feed and update the database. $xml = simplexml_load_file($CONFIG['FEED_URL']); if (isset($xml->entry)) // ATOM feed. { foreach ($xml->entry as $item) { $pubDate=$item->published; if (!$pubDate) { $pubDate=$item->updated; } $i=array('title'=>strval($item->title),'link'=>strval($item->link['href']),'guid'=>strval($item->id),'pubDate'=>strval($pubDate), 'description'=>'','content'=>remapImageUrls(strval($item->content))); $i['dateiso'] = date('Ymd_His', strtotime($i['pubDate'])); $feed_items[$i['dateiso']] = $i; } } elseif (isset($xml->item)) // RSS 1.0 /RDF { foreach ($xml->item as $item) { $guid =$item->attributes('http://www.w3.org/1999/02/22-rdf-syntax-ns#')->about; $date =$item->children('http://purl.org/dc/elements/1.1/')->date; $content = $item->children('http://purl.org/rss/1.0/modules/content/'); $i=array('title'=>strval($item->title),'link'=>strval($item->link),'guid'=>strval($guid),'pubDate'=>strval($date), 'description'=>strval($item->description),'content'=>remapImageUrls(strval($content))); $i['dateiso'] = date('Ymd_His', strtotime($i['pubDate'])); $feed_items[$i['dateiso']] = $i; } } elseif (isset($xml->channel->item)) // RSS 2.0 { foreach ($xml->channel->item as $item) { $content = strval($item->children('http://purl.org/rss/1.0/modules/content/')); // Get if (!$content) { $content = strval($item->description); } // Some feeds put content in the description. $pubDate = $item->pubDate; if (!$pubDate) { $pubDate=$item->children('http://purl.org/dc/elements/1.1/')->date; } // To read the tag content. $i=array('title'=>strval($item->title),'link'=>strval($item->link),'guid'=>strval($item->guid),'pubDate'=>strval($pubDate), 'description'=>strval($item->description),'content'=>remapImageUrls($content)); $i['dateiso'] = date('Ymd_His', strtotime($i['pubDate'])); $feed_items[$i['dateiso']] = $i; } } krsort($feed_items); // Sort array, latest articles first. file_put_contents('store', serialize($feed_items)); // Write database to disk } /* feed(): Returns the feed as an associative array (latest articles first). Key is timestamp in compact iso format (eg. '20110628_073208') Value is an associative array (title,link,content,pubDate...) */ function feed() { $data=file_get_contents('store'); if ($data===FALSE) { $feed_items=array(); } else { $feed_items = unserialize($data); } return $feed_items; } /* Remove accents (é-->e) */ function replace_accents($str) { $str = htmlentities($str, ENT_COMPAT, "UTF-8"); $str = preg_replace('/&([a-zA-Z])(uml|acute|grave|circ|tilde);/','$1',$str); return html_entity_decode($str); } // Sanitize strings for use in filename or URLs function sanitize($name) { $fname=replace_accents($name); $replace="_"; $pattern="/([[:alnum:]_\.-]*)/"; // The autorized characters. $fname=str_replace(str_split(preg_replace($pattern,$replace,$fname)),$replace,$fname); return $fname; } // Tells if a string start with a substring or not. function startsWith($haystack,$needle,$case=true) { if($case){return (strcmp(substr($haystack, 0, strlen($needle)),$needle)===0);} return (strcasecmp(substr($haystack, 0, strlen($needle)),$needle)===0); } // Tells if a string ends with a substring or not. function endsWith($haystack,$needle,$case=true) { if($case){return (strcmp(substr($haystack, strlen($haystack) - strlen($needle)),$needle)===0);} return (strcasecmp(substr($haystack, strlen($haystack) - strlen($needle)),$needle)===0); } /* Returns the CSS stylesheet to include in HTML document */ function css() { return << HTML; } /* Render a single article $article : the article itself (associative array with title,pubDate,content,dateiso keys.) */ function renderArticle($article) { echo '
'; echo '

'.$article['title'].'

'.$article['pubDate']; if ($article['link']!='') { echo ' - (source)'; } echo '
'.$article['content'].'
'; echo '
'; } function rssHeaderLink() { return ''; } function searchForm() { return ''; } function powered() { return '
Powered by VroumVroumBlog 0.1.32 - RSS Feed
Download config articles
'; } function canonical_metatag($url) { return ''; } /* Show a single article $articleid = article identifier (eg.'20110629_010334') */ function showArticle($articleid) { global $CONFIG; header('Content-Type: text/html; charset=utf-8'); $feed=feed();if (!array_key_exists($articleid,$feed)) { die('Article not found.'); } $a=$feed[$articleid]; echo ''.$a['title'].' - '.$CONFIG['SITE_TITLE'].''.canonical_metatag($a['link']).css().rssHeaderLink().''; echo '

'.$CONFIG['SITE_TITLE'].'

'.$CONFIG['SITE_DESCRIPTION'].searchForm().'
'; renderArticle($a); echo ''.powered().''; } /* Show a list of articles, starting at a specific page. $page = start page. First page is page 1. */ function showArticles($page) { global $CONFIG; header('Content-Type: text/html; charset=utf-8'); $feed=feed(); $keys=array_keys($feed); echo ''.$CONFIG['SITE_TITLE'].''.canonical_metatag($CONFIG['SITE_URL']).css().rssHeaderLink().''; echo '

'.$CONFIG['SITE_TITLE'].'

'.$CONFIG['SITE_DESCRIPTION'].searchForm().'
'; $i = ($page-1)*$CONFIG['ARTICLES_PER_PAGE']; // Start index. $end = $i+$CONFIG['ARTICLES_PER_PAGE']; while ($i<$end && $i
'; if ($i!=count($keys)) { echo ''; } echo ''; if ($page>1) { echo ''; } echo '
'.powered().''; } /* Search for text in articles content and title. $textpage = text to search. */ function search($text) { global $CONFIG; header('Content-Type: text/html; charset=utf-8'); $txt = urldecode($text); echo ''.$CONFIG['SITE_TITLE'].''.css().rssHeaderLink().''; echo '

'.$CONFIG['SITE_TITLE'].'

'.$CONFIG['SITE_DESCRIPTION'].searchForm().'
'; echo '
Search for '.htmlspecialchars($txt).' :
'; $feed=feed(); foreach($feed as $article) { if (stripos($article['content'],$txt) || stripos($article['title'],$txt)) { renderArticle($article); } } echo ''.powered().''; } /* Tells if a media URL should be downloaded or not. Input: $url = absolute URL of a media (jpeg,pdf...) Output: true= can download. false= should not download (wrong host, wrong file extension) */ function mediaAuthorized($url) { global $CONFIG; $goodhost=false; $srchost=parse_url($url,PHP_URL_HOST); foreach( explode(',',$CONFIG['DOWNLOAD_MEDIA_FROM']) as $host) // Does the URL point to an authorized host ? { if ($srchost==$host) { $goodhost=true; } } if (!$goodhost) { return false; } // Wrong host. $ext = pathinfo($url, PATHINFO_EXTENSION); // Get file extension (eg.'png','gif'...) if (!in_array(strtolower($ext),$CONFIG['DOWNLOAD_MEDIA_TYPES'])) { return false; } // Not in authorized file extensions. return true; } // Returns the MIME type corresponding to a file extension. // (I do not trust mime_content_type() because of some dodgy hosting providers with ill-configured magic.mime file.) function mime_type($filename) { $MIME_TYPES=array('.jpg'=>'image/jpeg','.jpeg'=>'image/jpeg','.png'=>'image/png','.gif'=>'image/gif', '.txt'=>'text/plain','.odt'=>'application/vnd.oasis.opendocument.text'); foreach($MIME_TYPES as $extension=>$mime_type) { if (endswith($filename,$extension,false)) { return $mime_type; } } return 'application/octet-stream'; // For an unkown extension. } // Returns a media from the local cache (and download it if not available). function showMedia($imgurl) { if (!mediaAuthorized($imgurl)) { header('HTTP/1.1 404 Not Found'); return; } downloadMedia($imgurl); // Will only download if necessary. $filename = 'media/'.sanitize($imgurl); header('Content-Type: '.mime_type($filename)); readfile($filename); } // Download a media to local cache (if necessary) function downloadMedia($imgurl) { $filename = 'media/'.sanitize($imgurl); if (!file_exists($filename) ) // Only download image if not present { if (!is_dir('media')) { mkdir('media',0705); file_put_contents('media/index.html',' '); } file_put_contents($filename, file_get_contents($imgurl,NULL, NULL, 0, 4000000)); // We download at most 4 Mb from source. } } /* Output the whole feed in RSS 2.0 format with article content (BIG!) */ function outputFeed() { global $CONFIG; header('Content-Type: application/xhtml+xml; charset=utf-8'); echo ''; echo ''.htmlspecialchars($CONFIG['SITE_TITLE']).''.htmlspecialchars($CONFIG['SITE_URL']).''; echo ''.htmlspecialchars($CONFIG['SITE_URL']).''."\n\n"; $feed=feed(); foreach($feed as $a) { echo ''.$a['title'].''.$a['guid'].'http://'.$_SERVER["HTTP_HOST"].$_SERVER["SCRIPT_NAME"].'?'.$a['dateiso'].'_'.sanitize($a['title']).''.$a['pubDate'].''; echo ''."\n\n"; } echo ''; } // ================================================================================================== // Update feed if necessary. (you can force refresh with ?force_the_refresh in URL) updateFeed(); // Handle media download requests (eg. http://myserver.com/?m=http___anotherserver.net_images_myimage.jpg) if (startswith($_SERVER["QUERY_STRING"],'m=')) { showMedia(substr($_SERVER["QUERY_STRING"],2)); } // Handle single article URI (eg. http://myserver.com/?20110506_224455-chit-chat) elseif (preg_match('/^(\d{8}_\d{6})/',$_SERVER["QUERY_STRING"],$matches)) { showArticle($matches[1]); } // Handle page URI (eg. http://myserver.com/?page5) elseif (preg_match('/^page(\d+)/',$_SERVER["QUERY_STRING"],$matches)) { showArticles($matches[1]); } // Handle RSS 2.0 feed request (http://myserver.com/?feed) elseif (startswith($_SERVER["QUERY_STRING"],'feed')) { outputFeed(); } // Handle search request (eg. http://myserver.com/?s=tuto4pc) elseif (startswith($_SERVER["QUERY_STRING"],'s=')) { search(substr($_SERVER["QUERY_STRING"],2)); } // Nothing ? Then render page1. else { showArticles(1); } // Force flush, rendered page is fully sent to browser. ob_end_flush(); flush(); // Now we've finised rendering the page and sending to the user, // it's time for some background tasks: Are there media to download ? foreach($CONFIG['MEDIA_TO_DOWNLOAD'] as $url) { downloadMedia($url); } exit; ?>