A few weeks ago, I wrote a prototype for the meme tracking feature of RSS Bandit in IronPython. The code was included in my blog post A Meme Tracker In IronPython. The script was a port of Sam Ruby's original MeMeme script which shows the most recently popular links from from a set of RSS feeds.
I was impressed with how succinct the code was in IronPython when compared to what the code eventually looked like when I ported it to C# 2.0 and integrated it into RSS Bandit. Looking over the list of new features in C# 3.0, it occurred to me that a C# 3.0 version of the script would be as concise or even more concise than the IronPython version. So I ported the script to C# 3.0 and learned a few things along the way.
I'll post something shortly that goes into some details on my perspectives on the pros and cons of the various C# 3.0 features when compared to various Python features. For now, here's the meme tracker script in C# 3.0. Comparing it to the IronPython version should provide some food for thought.
using System; using System.Collections; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using System.IO; using System.Xml; using System.Xml.Linq; using System.Xml.XPath; using System.Globalization; namespace Memetracker { enum MemeMode { PopularInUnread, PopularInPastWeek } class RankedLink{ public string Url { get; set;} public double Score { get; set; } } class Vote { public double Weight { get; set; } public RssItem Item { get; set; } public string FeedTitle { get; set; } } class RssItem { public string Title { get; set; } public DateTime Date { get; set; } public bool Read { get; set; } public string Permalink { get; set; } public Dictionary<string, string> OutgoingLinks { get; set; } } class Program { static Dictionary<string, List<Vote>> all_links = new Dictionary<string, List<Vote>>(); static TimeSpan one_week = new TimeSpan(7, 0, 0, 0); static MemeMode mode = MemeMode.PopularInPastWeek; static string cache_location = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "Temp"); static string href_regex = @"<a[\s]+[^>]*?href[\s]?=[\s""']+(.*?)[\""']+.*?>([^<]+|.*?)?<\/a>"; static Regex regex = new Regex(href_regex); static RssItem MakeRssItem(XElement itemnode) { XElement link_node = itemnode.Element("link"); var permalink = (link_node == null ? "" : link_node.Value); XElement title_node = itemnode.Element("title"); var title = (title_node == null ? "" : title_node.Value); XElement date_node = itemnode.Element("pubDate"); var date = (date_node == null ? DateTime.Now : DateTime.Parse(date_node.Value, null, DateTimeStyles.AdjustToUniversal)); XAttribute read_node = itemnode.XPathEvaluate("//@*[local-name() = 'read']") as XAttribute; var read = (read_node == null ? false : Boolean.Parse(read_node.Value)); XElement desc_node = itemnode.Element("description"); // obtain href value and link text pairs var outgoing = (desc_node == null ? regex.Matches(String.Empty) : regex.Matches(desc_node.Value)); var outgoing_links = new Dictionary<string, string>(); //ensure we only collect unique href values from entry by replacing list returned by regex with dictionary if (outgoing.Count > 0) { foreach (Match m in outgoing) outgoing_links[m.Groups[1].Value] = m.Groups[2].Value; } return new RssItem() { Permalink = permalink, Title = title, Date = date, Read = read, OutgoingLinks = outgoing_links }; } static void Main(string[] args) { if (args.Length > 0) //get directory of RSS feeds cache_location = args[0]; if (args.Length > 1) //mode = 0 means use only unread items, mode != 0 means use all items from past week mode = (Int32.Parse(args[1]) != 0 ? MemeMode.PopularInPastWeek : MemeMode.PopularInUnread); Console.WriteLine("Processing items from {0} seeking items that are {1}", cache_location, (mode == MemeMode.PopularInPastWeek ? "popular in items from the past week" : "popular in unread items")); //decide what filter function to use depending on mode Func<RssItem, bool> filterFunc = null; if(mode == MemeMode.PopularInPastWeek) filterFunc = x => (DateTime.Now - x.Date < one_week) ; else filterFunc = x => x.Read == false; //in mode = 0 each entry linking to an item counts as a vote, in mode != 0 value of vote depends on item age Func<RssItem, double> voteFunc = null; if(mode == MemeMode.PopularInPastWeek) voteFunc = x => 1.0 - (DateTime.Now.Ticks - x.Date.Ticks) * 1.0 / one_week.Ticks; else voteFunc = x => 1.0; var di = new DirectoryInfo(cache_location); foreach(var fi in di.GetFiles("*.xml")){ var doc = XElement.Load(Path.Combine(cache_location, fi.Name)); // for each item in feed // 1. Get permalink, title, read status and date // 2. Get list of outgoing links + link title pairs // 3. Convert above to RssItem object // 4. apply filter to pick candidate items var items = from rssitem in (from itemnode in doc.Descendants("item") select MakeRssItem(itemnode)) where filterFunc(rssitem) select rssitem; var feedTitle = doc.XPathSelectElement("channel/title").Value; // calculate vote for each outgoing url foreach (RssItem item in items) { var vote = new Vote(){ Weight=voteFunc(item), Item=item, FeedTitle=feedTitle }; //add a vote for each of the URLs foreach (var url in item.OutgoingLinks.Keys) { List<Vote> value = null; if (!all_links.TryGetValue(url, out value)) value = all_links[url] = new List<Vote>(); value.Add(vote); } }// foreach (RssItem item in items) }// foreach(var fi in di.GetFiles("*.xml")) //tally the votes List<RankedLink> weighted_links = new List<RankedLink>(); foreach (var link_n_votes in all_links) { Dictionary<string, double> site = new Dictionary<string, double>(); foreach (var vote in link_n_votes.Value) { double oldweight; site[vote.FeedTitle] = site.TryGetValue(vote.FeedTitle, out oldweight) ? Math.Min(oldweight, vote.Weight): vote.Weight; } weighted_links.Add(new RankedLink(){Score=site.Values.Sum(), Url=link_n_votes.Key}); } weighted_links.Sort((x, y) => y.Score.CompareTo(x.Score)); //output the results, choose link text from first item we saw story linked from Console.WriteLine("<html><body><ol>"); foreach(var rankedlink in weighted_links.GetRange(0, 10)){ var link_text = (all_links[rankedlink.Url][0]).Item.OutgoingLinks[rankedlink.Url]; Console.WriteLine("<li><a href='{0}'>{1}</a> {2}", rankedlink.Url, link_text, rankedlink.Score); Console.WriteLine("<p>Seen on:"); Console.WriteLine("<ul>"); foreach (var vote in all_links[rankedlink.Url]) { Console.WriteLine("<li>{0}: <a href='{1}'>{2}</a></li>", vote.FeedTitle, vote.Item.Permalink, vote.Item.Title); } Console.WriteLine("</ul></p></li>"); } Console.WriteLine("</ol></body></html>"); Console.ReadLine(); } } }