Jeff Atwood has a blog post entitled Sorting for Humans : Natural Sort Order where he writes
The default sort functions in almost every programming language are poorly suited for human consumption. What do I mean by that? Well, consider the difference between sorting filenames in Windows explorer, and sorting those very same filenames via Array.Sort() code: …Implementing a natural sort is more complex than it seems, and not just for the gnarly i20n issues I've hinted at, above. But the Python implementations are impressively succinct…I tried to come up with a clever, similarly succinct C# 3.0 natural sort implementation, but I failed. I'm not interested in a one-liner contest, necessarily, but it does seem to me that a basic natural sort shouldn't require the 40+ lines of code it takes in most languages.
Array.Sort()
Since I’m still in my “learning Python by mapping it to C#” phase I thought this should be a straightforward task. Below is the equivalent IronPython code for natural sort which is slightly modified from the code posted in Jeff’s post along with what I hoped to be a succint version in C# 2.0. It would definitely be shorter in C# 3.0 [which I don’t plan to start using for another year or so]. The Python snippet below takes advantage of some interesting rules around comparing lists of objects which don’t exist in C#. I’m sure I could reduce the size of the C# code while maintaining readability but my procrastination time is over and I need to get to work.
Natural Sort in IronPython
import re def sort_nicely( l ): """ Sort the given list in the way that humans expect. """ convert = lambda x: x.isdigit() and int(x) or x alphanum = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] l.sort( key=alphanum ) #serious magic happens here return l print sort_nicely(["z22.txt", "z5.txt" , "z.txt", "z10.txt", "z300.txt", "z2.txt", "z11.txt", "y.txt", "z", "z4.txt", "za.txt" ])
import re
def sort_nicely( l ): """ Sort the given list in the way that humans expect. """ convert = lambda x: x.isdigit() and int(x) or x alphanum = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] l.sort( key=alphanum ) #serious magic happens here return l
print sort_nicely(["z22.txt", "z5.txt" , "z.txt", "z10.txt", "z300.txt", "z2.txt", "z11.txt", "y.txt", "z", "z4.txt", "za.txt" ])
Natural Sort in C# 2.0
using System;using System.Collections.Generic;using System.Text.RegularExpressions; public class Test { ///<summary>Compare two lists of strings using Python rules and natural order semantics</summary> public static int NaturalCompare(IList<string> a, IList<string> b) { int y, z, len = (a.Count < b.Count ? a.Count : b.Count); for (int i = 0; i < len; i++) { if (a[i].Equals(b[i])) continue; bool w = Int32.TryParse(a[i], out y), x = Int32.TryParse(b[i], out z); bool bothNumbers = w && x, bothNotNumbers = !w && !x; if (bothNumbers) return y.CompareTo(z); else if (bothNotNumbers) return a[i].CompareTo(b[i]); else if (w) return -1; else return 1; //numbers always less than words or letters } return (a.Count.CompareTo(b.Count)); //subset list is considered smaller } public static List<string> SortNicely(List<string> list) { Regex re = new Regex("([0-9]+)"); list.Sort(delegate(string x, string y) { return NaturalCompare(re.Split(x), re.Split(y)); }); return list; } public static void Main(string[] args) { List<string> l = new List<string>(new string[] { "z.txt", "y.txt", "z22.txt", "z5.txt", "z10.txt", "z3.txt", "z2.txt", "za.txt", "z11.txt", "z400.txt" }); foreach (string s in SortNicely(l)) Console.WriteLine(s); }}
using System;using System.Collections.Generic;using System.Text.RegularExpressions;
public class Test {
///<summary>Compare two lists of strings using Python rules and natural order semantics</summary> public static int NaturalCompare(IList<string> a, IList<string> b) { int y, z, len = (a.Count < b.Count ? a.Count : b.Count); for (int i = 0; i < len; i++) { if (a[i].Equals(b[i])) continue; bool w = Int32.TryParse(a[i], out y), x = Int32.TryParse(b[i], out z); bool bothNumbers = w && x, bothNotNumbers = !w && !x; if (bothNumbers) return y.CompareTo(z); else if (bothNotNumbers) return a[i].CompareTo(b[i]); else if (w) return -1; else return 1; //numbers always less than words or letters } return (a.Count.CompareTo(b.Count)); //subset list is considered smaller } public static List<string> SortNicely(List<string> list) { Regex re = new Regex("([0-9]+)"); list.Sort(delegate(string x, string y) { return NaturalCompare(re.Split(x), re.Split(y)); }); return list; } public static void Main(string[] args) { List<string> l = new List<string>(new string[] { "z.txt", "y.txt", "z22.txt", "z5.txt", "z10.txt", "z3.txt", "z2.txt", "za.txt", "z11.txt", "z400.txt" }); foreach (string s in SortNicely(l)) Console.WriteLine(s); }}
Now playing: Notorious B.I.G. - Real N*ggas Do Real Things