September 15, 2002
@ 12:58 AM
using System.Xml.XPath;
using System.Xml;
using System;
using System.IO;
using System.Diagnostics;
using System.Net;
using System.Text;
/// <summary>
/// This class converts a Kuro5hin diary to RSS 0.91
or RSS 1.0 feeds.
/// </summary>
class K5Diary2RSS{
///<summary>
///Helper function for recursively printing
error messages from nested exceptions.
///</summary>
///<param name="e">The
exception</param>
///<param name="errStr">The exception to
prepend to the Exception arguments error
///message</param>
public static string PrintError(Exception e,
string errStr){
if(e == null)
return errStr;
else
return
PrintError(e.InnerException, errStr + e.Message
);
}
/// <summary>
/// Uses HTML Tidy available at
http://tidy.sourceforge.net/ to convert the specified
page
/// to XHTML.
/// </summary>
public static void TidyPage(string
htmlFile){
Process tidyProc = new
Process();
tidyProc.StartInfo.FileName =
"tidy";
tidyProc.StartInfo.Arguments =
"-asxhtml -im " + htmlFile;
tidyProc.StartInfo.UseShellExecute =
false;
tidyProc.Start();
//wait no longer than 60 seconds for
tidy to convert the page
tidyProc.WaitForExit(60000);
// release handles used by
process
tidyProc.Close();
}
///<summary>
///Retrieves a Kuro5hin diary page from the
URL and writes it to the provided output file.
///</summary>
///<param name="url">URL to the Kuro5hin
Diary</param>
///<param name="outfile">Output file to
write the page to.</param>
public static void GetPage(string url, string
outfile){
Console.WriteLine("Connecting to {0}",
url);
/* Fetch the K5 diary page from the WWW
*/
HttpWebRequest request =
(HttpWebRequest)WebRequest.Create(url);
HttpWebResponse response =
(HttpWebResponse) request.GetResponse();
//Open file for writing
StreamWriter writeStream = new
StreamWriter(File.OpenWrite(outfile));
//Retrieve input stream from response
and specify encoding
Stream receiveStream
= response.GetResponseStream();
Encoding encode =
System.Text.Encoding.GetEncoding("utf-8");
// Pipes the stream to a higher level
stream reader with the required encoding
format.
StreamReader readStream = new
StreamReader( receiveStream, encode );
Char[] read = new Char[256];
// Reads 256 characters at a
time.
int count = readStream.Read( read, 0,
256 );
while (count > 0) {
// Dumps the 256 characters on
a string and displays the string to the
console.
writeStream.Write(read, 0,
count);
count = readStream.Read(read, 0,
256);
}
// Releases the resources of the
response.
response.Close();
// Releases the resources of the
Stream.
readStream.Close();
//close the output file
writeStream.Close();
}
/// <summary>
/// Converts a K5 diary file as XHTML to RSS
0.91
/// </summary>
/// <param name="doc">The K5 Diary XHTML
document</param>
/// <param name="link">The link to the
K5 diary.</param>
/// <param name="title">The title of the
diary</param>
/// <returns>The RSS file as an
XmlDocument object</returns>
public static XmlDocument
K5Xhtml2Rss091(XmlDocument doc, string link, string
title){
XmlDocument rss = new
XmlDocument();
rss.LoadXml("<rss
version=\"0.91\">\n<channel><title>" +
title + "</title>\n" +
"<link>" + link +
"</link>\n" + "<description>" + title + "
: The Kuro5hin Diary" +
"</description>\n<language>en</language></channel>\n</rss>\n");
XmlNode channel =
rss.SelectSingleNode("/rss/channel");
//create prefix<->namespace
mappings
XmlNamespaceManager nsMgr =
new
XmlNamespaceManager(doc.NameTable);
nsMgr.AddNamespace("xhtml",
"http://www.w3.org/1999/xhtml");
//Grab all the titles then use
those to create <item>
XmlNodeList nodes =
doc.SelectNodes("//xhtml:font[@color='#000000']",
nsMgr);
foreach (XmlNode node in
nodes){
string diaryTitle =
node.InnerText;
string diaryLink =
"http://www.kuro5hin.org" +
node.ParentNode.Attributes["href"].Value;
string diaryDesc
=
node.SelectSingleNode("./following::*[local-name() =
'font' and @size='2' and
@color='#333333']").InnerXml;
channel.InnerXml =
channel.InnerXml + "\n<item>\n<title>" +
diaryTitle +
"</title>\n<link>" +
diaryLink + "</link>\n<description>" +
diaryDesc + "</description>\n";
}
return rss;
}
/// <summary>
/// Converts a K5 diary file as XHTML to RSS
1.0
/// </summary>
/// <param name="doc">The K5 Diary XHTML
document</param>
/// <param name="link">The link to the
K5 diary.</param>
/// <param name="title">The title of the
diary</param>
/// <returns>The RSS file as an
XmlDocument object</returns>
public static XmlDocument
K5Xhtml2Rss10(XmlDocument doc, string link, string
title){
XmlDocument rss = new
XmlDocument();
rss.LoadXml("<rdf:RDF
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"
" +
"xmlns:rss=\"http://purl.org/rss/1.0/\">\n"
+
"<rss:channel
rdf:about=\"http://www.25hoursaday.com/rss10.xml\">"
+
"<rss:title>" + title + "</rss:title>\n"
+
"<rss:link>"
+ link + "</rss:link>\n" +
"<rss:description>" + title +
" : The Kuro5hin
Diary" + "</rss:description>\n" +
"<rss:items>\n<rdf:Seq/>\n</rss:items>\n</rss:channel>\n"
+
"</rdf:RDF>");
//create prefix<->namespace
mappings
XmlNamespaceManager nsMgr =
new
XmlNamespaceManager(doc.NameTable);
nsMgr.AddNamespace("xhtml",
"http://www.w3.org/1999/xhtml");
nsMgr.AddNamespace("rdf",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#");
nsMgr.AddNamespace("rss",
"http://purl.org/rss/1.0/");
XmlNode Seq =
rss.SelectSingleNode("//rdf:Seq", nsMgr);
XmlNode channel =
Seq.ParentNode.ParentNode;
//Grab all the titles then use
those to create <item>
XmlNodeList nodes =
doc.SelectNodes("//xhtml:font[@color='#000000']",
nsMgr);
foreach (XmlNode node in
nodes){
string diaryTitle =
node.InnerText;
string diaryLink =
"http://www.kuro5hin.org" +
node.ParentNode.Attributes["href"].Value;
string diaryDesc
=
node.SelectSingleNode("./following::*[local-name() =
'font' and @size='2' and
@color='#333333']").InnerXml;
Seq.InnerXml
= Seq.InnerXml + "<rdf:li rdf:resource=\""
+ diaryLink + "\" " +
"xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"
/>";
channel.InnerXml =
channel.InnerXml + "\n" +
"<rss:item
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"
" +
"xmlns:rss=\"http://purl.org/rss/1.0/\" rdf:about=\""
+ diaryLink + "\" >\n" +
"<rss:title>" + diaryTitle
+ "</rss:title>\n<rss:link>" + diaryLink
+ "</rss:link>\n" +
"<rss:description>" +
diaryDesc + "</rss:description>\n";
}
return rss;
}
/// <summary>
/// Where the magic happens.
/// </summary>
/// <param name="args">Command line
parameters</param>
public static void Main(string[] args){
if(args.Length != 4){
Console.WriteLine("Usage:
K5Diary2RSS <K5-diary-url> <0.91 or 1.0>
<title> <outfile>");
return;
}
uint now = (uint)
DateTime.Now.Ticks;
string fileName = now + ".html";
try{
string rssVersion =
args[1];
//used for naming temp
files
GetPage(args[0],
fileName);
Console.WriteLine("Diary page
retrieved from the web and saved as temp
file[{0}.html]", now);
/* Convert diary page to XML
[requires HTML Tidy] */
TidyPage(fileName);
//Load the file.
XmlDocument doc = new
XmlDocument();
doc.Load(fileName);
/* Convert XHTML file to
RSS */
XmlDocument rss = null;
if(rssVersion.Equals("0.91")){
rss = K5Xhtml2Rss091(doc,
args[0], args[2]);
}else
if(rssVersion.Equals("1.0")){
rss = K5Xhtml2Rss10(doc,
args[0], args[2]);
}else{
Console.WriteLine("\n\n***
VERSION " + rssVersion + " IS AN UNSUPPORTED RSS
VERSION***");
return;
}
rss.Save(args[3]);
/* Delete temp file */
if(File.Exists(fileName)){
File.SetAttributes(fileName,
FileAttributes.Normal);
File.Delete(fileName);
}
}catch(XmlException xmle){
Console.WriteLine("ERROR: XML
Parse error occured because " + PrintError(xmle,
null));
}catch(FileNotFoundException
fnfe){
Console.WriteLine("ERROR: " +
PrintError(fnfe, null));
}catch(XPathException xe){
Console.WriteLine("ERROR: The
following error occured while querying the document:
" + PrintError(xe, null));
}catch(Exception e){
Console.WriteLine("UNEXPECTED
ERROR: " + PrintError(e, null));
Console.WriteLine(e.StackTrace);
}
}
}