Dare Obasanjo's weblog
"You can buy cars but you can't buy respect in the hood" - Curtis Jackson
Navigation for Creating RSS Feeds For K5 Diaries - Dare Obasanjo's weblog
Content
Sidebar
Footer
September 15, 2002
@ 12:58 AM
Comments [0]
Creating RSS Feeds For K5 Diaries
using System.Xml.XPath;
using System.Xml;
using System;
using System.IO;
using System.Diagnostics;
using System.Net;
using System.Text;
/// <summary>
/// This class converts a Kuro5hin diary to RSS 0.91 or RSS 1.0 feeds.
/// </summary>
class K5Diary2RSS{
///<summary>
///Helper function for recursively printing error messages from nested exceptions.
///</summary>
///<param name="e">The exception</param>
///<param name="errStr">The exception to prepend to the Exception arguments error
///message</param>
public static string PrintError(Exception e, string errStr){
if(e == null)
return errStr;
else
return PrintError(e.InnerException, errStr + e.Message );
}
/// <summary>
/// Uses HTML Tidy available at http://tidy.sourceforge.net/ to convert the specified page
/// to XHTML.
/// </summary>
public static void TidyPage(string htmlFile){
Process tidyProc = new Process();
tidyProc.StartInfo.FileName = "tidy";
tidyProc.StartInfo.Arguments = "-asxhtml -im " + htmlFile;
tidyProc.StartInfo.UseShellExecute = false;
tidyProc.Start();
//wait no longer than 60 seconds for tidy to convert the page
tidyProc.WaitForExit(60000);
// release handles used by process
tidyProc.Close();
}
///<summary>
///Retrieves a Kuro5hin diary page from the URL and writes it to the provided output file.
///</summary>
///<param name="url">URL to the Kuro5hin Diary</param>
///<param name="outfile">Output file to write the page to.</param>
public static void GetPage(string url, string outfile){
Console.WriteLine("Connecting to {0}", url);
/* Fetch the K5 diary page from the WWW */
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse response = (HttpWebResponse) request.GetResponse();
//Open file for writing
StreamWriter writeStream = new StreamWriter(File.OpenWrite(outfile));
//Retrieve input stream from response and specify encoding
Stream receiveStream = response.GetResponseStream();
Encoding encode = System.Text.Encoding.GetEncoding("utf-8");
// Pipes the stream to a higher level stream reader with the required encoding format.
StreamReader readStream = new StreamReader( receiveStream, encode );
Char[] read = new Char[256];
// Reads 256 characters at a time.
int count = readStream.Read( read, 0, 256 );
while (count > 0) {
// Dumps the 256 characters on a string and displays the string to the console.
writeStream.Write(read, 0, count);
count = readStream.Read(read, 0, 256);
}
// Releases the resources of the response.
response.Close();
// Releases the resources of the Stream.
readStream.Close();
//close the output file
writeStream.Close();
}
/// <summary>
/// Converts a K5 diary file as XHTML to RSS 0.91
/// </summary>
/// <param name="doc">The K5 Diary XHTML document</param>
/// <param name="link">The link to the K5 diary.</param>
/// <param name="title">The title of the diary</param>
/// <returns>The RSS file as an XmlDocument object</returns>
public static XmlDocument K5Xhtml2Rss091(XmlDocument doc, string link, string title){
XmlDocument rss = new XmlDocument();
rss.LoadXml("<rss version=\"0.91\">\n<channel><title>" + title + "</title>\n" +
"<link>" + link + "</link>\n" + "<description>" + title + " : The Kuro5hin Diary" +
"</description>\n<language>en</language></channel>\n</rss>\n");
XmlNode channel = rss.SelectSingleNode("/rss/channel");
//create prefix<->namespace mappings
XmlNamespaceManager nsMgr = new XmlNamespaceManager(doc.NameTable);
nsMgr.AddNamespace("xhtml", "http://www.w3.org/1999/xhtml");
//Grab all the titles then use those to create <item>
XmlNodeList nodes = doc.SelectNodes("//xhtml:font[@color='#000000']", nsMgr);
foreach (XmlNode node in nodes){
string diaryTitle = node.InnerText;
string diaryLink = "http://www.kuro5hin.org" + node.ParentNode.Attributes["href"].Value;
string diaryDesc =
node.SelectSingleNode("./following::*[local-name() = 'font' and @size='2' and @color='#333333']").InnerXml;
channel.InnerXml = channel.InnerXml + "\n<item>\n<title>" + diaryTitle +
"</title>\n<link>" + diaryLink + "</link>\n<description>" + diaryDesc + "</description>\n";
}
return rss;
}
/// <summary>
/// Converts a K5 diary file as XHTML to RSS 1.0
/// </summary>
/// <param name="doc">The K5 Diary XHTML document</param>
/// <param name="link">The link to the K5 diary.</param>
/// <param name="title">The title of the diary</param>
/// <returns>The RSS file as an XmlDocument object</returns>
public static XmlDocument K5Xhtml2Rss10(XmlDocument doc, string link, string title){
XmlDocument rss = new XmlDocument();
rss.LoadXml("<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" " +
"xmlns:rss=\"http://purl.org/rss/1.0/\">\n" +
"<rss:channel rdf:about=\"http://www.25hoursaday.com/rss10.xml\">" +
"<rss:title>" + title + "</rss:title>\n" +
"<rss:link>" + link + "</rss:link>\n" + "<rss:description>" + title +
" : The Kuro5hin Diary" + "</rss:description>\n" +
"<rss:items>\n<rdf:Seq/>\n</rss:items>\n</rss:channel>\n" +
"</rdf:RDF>");
//create prefix<->namespace mappings
XmlNamespaceManager nsMgr = new XmlNamespaceManager(doc.NameTable);
nsMgr.AddNamespace("xhtml", "http://www.w3.org/1999/xhtml");
nsMgr.AddNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
nsMgr.AddNamespace("rss", "http://purl.org/rss/1.0/");
XmlNode Seq = rss.SelectSingleNode("//rdf:Seq", nsMgr);
XmlNode channel = Seq.ParentNode.ParentNode;
//Grab all the titles then use those to create <item>
XmlNodeList nodes = doc.SelectNodes("//xhtml:font[@color='#000000']", nsMgr);
foreach (XmlNode node in nodes){
string diaryTitle = node.InnerText;
string diaryLink = "http://www.kuro5hin.org" + node.ParentNode.Attributes["href"].Value;
string diaryDesc =
node.SelectSingleNode("./following::*[local-name() = 'font' and @size='2' and @color='#333333']").InnerXml;
Seq.InnerXml = Seq.InnerXml + "<rdf:li rdf:resource=\"" + diaryLink + "\" " +
"xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" />";
channel.InnerXml = channel.InnerXml + "\n" +
"<rss:item xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" " +
"xmlns:rss=\"http://purl.org/rss/1.0/\" rdf:about=\"" + diaryLink + "\" >\n" +
"<rss:title>" + diaryTitle + "</rss:title>\n<rss:link>" + diaryLink + "</rss:link>\n" +
"<rss:description>" + diaryDesc + "</rss:description>\n";
}
return rss;
}
/// <summary>
/// Where the magic happens.
/// </summary>
/// <param name="args">Command line parameters</param>
public static void Main(string[] args){
if(args.Length != 4){
Console.WriteLine("Usage: K5Diary2RSS <K5-diary-url> <0.91 or 1.0> <title> <outfile>");
return;
}
uint now = (uint) DateTime.Now.Ticks;
string fileName = now + ".html";
try{
string rssVersion = args[1];
//used for naming temp files
GetPage(args[0], fileName);
Console.WriteLine("Diary page retrieved from the web and saved as temp file[{0}.html]", now);
/* Convert diary page to XML [requires HTML Tidy] */
TidyPage(fileName);
//Load the file.
XmlDocument doc = new XmlDocument();
doc.Load(fileName);
/* Convert XHTML file to RSS */
XmlDocument rss = null;
if(rssVersion.Equals("0.91")){
rss = K5Xhtml2Rss091(doc, args[0], args[2]);
}else if(rssVersion.Equals("1.0")){
rss = K5Xhtml2Rss10(doc, args[0], args[2]);
}else{
Console.WriteLine("\n\n*** VERSION " + rssVersion + " IS AN UNSUPPORTED RSS VERSION***");
return;
}
rss.Save(args[3]);
/* Delete temp file */
if(File.Exists(fileName)){
File.SetAttributes(fileName, FileAttributes.Normal);
File.Delete(fileName);
}
}catch(XmlException xmle){
Console.WriteLine("ERROR: XML Parse error occured because " + PrintError(xmle, null));
}catch(FileNotFoundException fnfe){
Console.WriteLine("ERROR: " + PrintError(fnfe, null));
}catch(XPathException xe){
Console.WriteLine("ERROR: The following error occured while querying the document: " + PrintError(xe, null));
}catch(Exception e){
Console.WriteLine("UNEXPECTED ERROR: " + PrintError(e, null));
Console.WriteLine(e.StackTrace);
}
}
}
Tweet
Categories:
« Ladies and Gentlemen, Start Your Engines...
|
Home
|
What Do You Want To Be When You Grow Up ... »
Comments are closed.
RSS/Subscribe
Archives
All dates
All Posts
Search
Latest Posts
Culture Eats Strategy for Breakfast
Some Thoughts on Paul Graham’s Essay on Income Inequality
How Facebook Knows Who You’re Talking to on Tinder and OKCupid
Fabric: Why Developers Can Trust Twitter Won’t Screw Them This Time
5 Account Security Features Every Online Service Should Implement But Doesn't
Facebook’s Newsfeed Experiment: Most people have grabbed the wrong end of the stick
Change is bad unless it’s great: Lessons from user revolts against Foursquare’s Swarm and the new Skype for iPhone
Facebook’s App Links: The Good, the Bad and the Ugly
The mobile web vs apps is another front on the battle between open and closed systems
How Facebook Knows What You Looked at on Amazon
Categories
Cloud Computing (5)
Comics (1)
Competitors/Web Companies (161)
Current Affairs (31)
Das Blog (3)
dasBlog (1)
Life in the B0rg Cube (197)
Mindless Link Propagation (185)
Movie Review (13)
MSN (127)
Music (20)
Office Live (8)
Personal (54)
Platforms (59)
Programming (58)
Ramblings (145)
Rants (7)
RSS Bandit (215)
Seattle Startup Shoutout (4)
Social Software (185)
Startup Shoutout (4)
Syndication Technology (108)
Technology (169)
Trip Report (63)
Video Games (6)
Web Development (198)
Windows Live (227)
XML (203)
XML Web Services (114)
Archives
January, 2016 (2)
October, 2015 (1)
October, 2014 (1)
September, 2014 (1)
July, 2014 (1)
June, 2014 (1)
May, 2014 (1)
April, 2014 (1)
February, 2014 (1)
January, 2014 (1)
December, 2013 (1)
November, 2013 (2)
July, 2013 (1)
March, 2013 (1)
November, 2012 (2)
September, 2012 (2)
July, 2012 (3)
June, 2012 (1)
April, 2012 (1)
February, 2012 (1)
January, 2012 (1)
November, 2011 (1)
September, 2011 (3)
July, 2011 (2)
June, 2011 (1)
April, 2011 (1)
March, 2011 (1)
January, 2011 (2)
November, 2010 (2)
October, 2010 (2)
August, 2010 (3)
July, 2010 (3)
June, 2010 (3)
April, 2010 (6)
March, 2010 (3)
February, 2010 (5)
January, 2010 (4)
December, 2009 (2)
November, 2009 (8)
October, 2009 (1)
September, 2009 (3)
August, 2009 (4)
June, 2009 (1)
May, 2009 (12)
April, 2009 (5)
March, 2009 (17)
February, 2009 (11)
January, 2009 (20)
December, 2008 (12)
November, 2008 (12)
October, 2008 (22)
September, 2008 (12)
August, 2008 (15)
July, 2008 (18)
June, 2008 (19)
May, 2008 (12)
March, 2008 (2)
February, 2008 (27)
January, 2008 (20)
December, 2007 (26)
November, 2007 (26)
October, 2007 (29)
September, 2007 (16)
August, 2007 (34)
July, 2007 (26)
June, 2007 (33)
May, 2007 (37)
April, 2007 (14)
March, 2007 (32)
February, 2007 (35)
January, 2007 (29)
December, 2006 (29)
November, 2006 (47)
October, 2006 (26)
September, 2006 (46)
August, 2006 (40)
July, 2006 (38)
June, 2006 (48)
May, 2006 (33)
April, 2006 (39)
March, 2006 (38)
February, 2006 (42)
January, 2006 (42)
December, 2005 (43)
November, 2005 (40)
October, 2005 (48)
September, 2005 (45)
August, 2005 (40)
July, 2005 (31)
June, 2005 (36)
May, 2005 (38)
April, 2005 (31)
March, 2005 (39)
February, 2005 (22)
January, 2005 (25)
December, 2004 (32)
November, 2004 (27)
October, 2004 (24)
September, 2004 (26)
August, 2004 (23)
July, 2004 (25)
June, 2004 (29)
May, 2004 (39)
April, 2004 (37)
March, 2004 (41)
February, 2004 (51)
January, 2004 (54)
December, 2003 (42)
November, 2003 (40)
October, 2003 (41)
September, 2003 (8)
August, 2003 (11)
July, 2003 (9)
June, 2003 (14)
May, 2003 (12)
April, 2003 (12)
March, 2003 (13)
February, 2003 (13)
January, 2003 (13)
December, 2002 (12)
November, 2002 (8)
October, 2002 (6)
September, 2002 (5)
August, 2002 (6)
July, 2002 (6)
June, 2002 (4)
May, 2002 (8)
April, 2002 (3)
March, 2002 (1)
February, 2002 (1)
January, 2002 (4)
November, 2001 (4)
October, 2001 (3)
September, 2001 (3)
August, 2001 (1)
July, 2001 (1)
May, 2001 (0)
April, 2001 (2)
March, 2001 (2)
BlogRoll
Alexander Groß
Anthony Bouch
Ben Scheirman
Clemens Vasters
Erv Walter
Gray's Matter
Harry Pierson
James Snape
John Forsythe
Joshua Flanagan
Kris van der Mast
Mark Michaelis
Michael Earls
Omar Shahine
Paulb Vanbrenk
Scott Hanselman
Stephen Forte
Steven Rockarts
Tom Mertens
Tom Watts
Tony Bunce
Vasanth Dharmaraj
Categories
Cloud Computing
Comics
Competitors/Web Companies
Current Affairs
Das Blog
dasBlog
Life in the B0rg Cube
Mindless Link Propagation
Movie Review
MSN
Music
Office Live
Personal
Platforms
Programming
Ramblings
Rants
RSS Bandit
Seattle Startup Shoutout
Social Software
Startup Shoutout
Syndication Technology
Technology
Trip Report
Video Games
Web Development
Windows Live
XML
XML Web Services
Contact the Author
Themes
Pick a theme:
BlogXP
business
calmBlue
Candid Blue
dasBlog
dasblogger
dasblueblog
dasEmerald
DirectionalRedux
Discreet Blog Blue
Elegante
essence
Just Html
MadsSimple
Mobile
Mono
Movable Radio Blue
Movable Radio Heat
nautica022
orangeCream
Portal
Project84
Project84Grass
Slate
Sound Waves
Tricoleur
useit.com
Voidclass2
Admin
Sign In