A few weeks ago, I wrote a prototype for the meme tracking feature of RSS Bandit in IronPython. The code was included in my blog post A Meme Tracker In IronPython. The script was a port of Sam Ruby's original MeMeme script which shows the most recently popular links from from a set of RSS feeds.
I was impressed with how succinct the code was in IronPython when compared to what the code eventually looked like when I ported it to C# 2.0 and integrated it into RSS Bandit. Looking over the list of new features in C# 3.0, it occurred to me that a C# 3.0 version of the script would be as concise or even more concise than the IronPython version. So I ported the script to C# 3.0 and learned a few things along the way.
I'll post something shortly that goes into some details on my perspectives on the pros and cons of the various C# 3.0 features when compared to various Python features. For now, here's the meme tracker script in C# 3.0. Comparing it to the IronPython version should provide some food for thought.
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using System.IO;
using System.Xml;
using System.Xml.Linq;
using System.Xml.XPath;
using System.Globalization;
namespace Memetracker {
enum MemeMode { PopularInUnread, PopularInPastWeek }
class RankedLink{
public string Url { get; set;}
public double Score { get; set; }
}
class Vote {
public double Weight { get; set; }
public RssItem Item { get; set; }
public string FeedTitle { get; set; }
}
class RssItem {
public string Title { get; set; }
public DateTime Date { get; set; }
public bool Read { get; set; }
public string Permalink { get; set; }
public Dictionary<string, string> OutgoingLinks { get; set; }
}
class Program {
static Dictionary<string, List<Vote>> all_links = new Dictionary<string, List<Vote>>();
static TimeSpan one_week = new TimeSpan(7, 0, 0, 0);
static MemeMode mode = MemeMode.PopularInPastWeek;
static string cache_location = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "Temp");
static string href_regex = @"<a[\s]+[^>]*?href[\s]?=[\s""']+(.*?)[\""']+.*?>([^<]+|.*?)?<\/a>";
static Regex regex = new Regex(href_regex);
static RssItem MakeRssItem(XElement itemnode) {
XElement link_node = itemnode.Element("link");
var permalink = (link_node == null ? "" : link_node.Value);
XElement title_node = itemnode.Element("title");
var title = (title_node == null ? "" : title_node.Value);
XElement date_node = itemnode.Element("pubDate");
var date = (date_node == null ? DateTime.Now : DateTime.Parse(date_node.Value, null, DateTimeStyles.AdjustToUniversal));
XAttribute read_node = itemnode.XPathEvaluate("//@*[local-name() = 'read']") as XAttribute;
var read = (read_node == null ? false : Boolean.Parse(read_node.Value));
XElement desc_node = itemnode.Element("description");
// obtain href value and link text pairs
var outgoing = (desc_node == null ? regex.Matches(String.Empty) : regex.Matches(desc_node.Value));
var outgoing_links = new Dictionary<string, string>();
//ensure we only collect unique href values from entry by replacing list returned by regex with dictionary
if (outgoing.Count > 0) {
foreach (Match m in outgoing)
outgoing_links[m.Groups[1].Value] = m.Groups[2].Value;
}
return new RssItem() { Permalink = permalink, Title = title, Date = date, Read = read, OutgoingLinks = outgoing_links };
}
static void Main(string[] args) {
if (args.Length > 0) //get directory of RSS feeds
cache_location = args[0];
if (args.Length > 1) //mode = 0 means use only unread items, mode != 0 means use all items from past week
mode = (Int32.Parse(args[1]) != 0 ? MemeMode.PopularInPastWeek : MemeMode.PopularInUnread);
Console.WriteLine("Processing items from {0} seeking items that are {1}", cache_location,
(mode == MemeMode.PopularInPastWeek ? "popular in items from the past week" : "popular in unread items"));
//decide what filter function to use depending on mode
Func<RssItem, bool> filterFunc = null;
if(mode == MemeMode.PopularInPastWeek)
filterFunc = x => (DateTime.Now - x.Date < one_week) ;
else
filterFunc = x => x.Read == false;
//in mode = 0 each entry linking to an item counts as a vote, in mode != 0 value of vote depends on item age
Func<RssItem, double> voteFunc = null;
if(mode == MemeMode.PopularInPastWeek)
voteFunc = x => 1.0 - (DateTime.Now.Ticks - x.Date.Ticks) * 1.0 / one_week.Ticks;
else
voteFunc = x => 1.0;
var di = new DirectoryInfo(cache_location);
foreach(var fi in di.GetFiles("*.xml")){
var doc = XElement.Load(Path.Combine(cache_location, fi.Name));
// for each item in feed
// 1. Get permalink, title, read status and date
// 2. Get list of outgoing links + link title pairs
// 3. Convert above to RssItem object
// 4. apply filter to pick candidate items
var items = from rssitem in
(from itemnode in doc.Descendants("item")
select MakeRssItem(itemnode))
where filterFunc(rssitem)
select rssitem;
var feedTitle = doc.XPathSelectElement("channel/title").Value;
// calculate vote for each outgoing url
foreach (RssItem item in items) {
var vote = new Vote(){ Weight=voteFunc(item), Item=item, FeedTitle=feedTitle };
//add a vote for each of the URLs
foreach (var url in item.OutgoingLinks.Keys) {
List<Vote> value = null;
if (!all_links.TryGetValue(url, out value))
value = all_links[url] = new List<Vote>();
value.Add(vote);
}
}// foreach (RssItem item in items)
}// foreach(var fi in di.GetFiles("*.xml"))
//tally the votes
List<RankedLink> weighted_links = new List<RankedLink>();
foreach (var link_n_votes in all_links) {
Dictionary<string, double> site = new Dictionary<string, double>();
foreach (var vote in link_n_votes.Value) {
double oldweight;
site[vote.FeedTitle] = site.TryGetValue(vote.FeedTitle, out oldweight) ?
Math.Min(oldweight, vote.Weight): vote.Weight;
}
weighted_links.Add(new RankedLink(){Score=site.Values.Sum(), Url=link_n_votes.Key});
}
weighted_links.Sort((x, y) => y.Score.CompareTo(x.Score));
//output the results, choose link text from first item we saw story linked from
Console.WriteLine("<html><body><ol>");
foreach(var rankedlink in weighted_links.GetRange(0, 10)){
var link_text = (all_links[rankedlink.Url][0]).Item.OutgoingLinks[rankedlink.Url];
Console.WriteLine("<li><a href='{0}'>{1}</a> {2}", rankedlink.Url, link_text, rankedlink.Score);
Console.WriteLine("<p>Seen on:");
Console.WriteLine("<ul>");
foreach (var vote in all_links[rankedlink.Url]) {
Console.WriteLine("<li>{0}: <a href='{1}'>{2}</a></li>", vote.FeedTitle, vote.Item.Permalink, vote.Item.Title);
}
Console.WriteLine("</ul></p></li>");
}
Console.WriteLine("</ol></body></html>");
Console.ReadLine();
}
}
}
Now Playing:
Lloyd Banks -
Boywonder