As you all know blogs.msdn.com updated to Community Server and there were some minor changes in the feeds, this caused a flood of dupes in my SharpReader. Here's a snippet to remove these dupes while keeping the "Have I read this" flag from the old post. Backuping your SR cache before running this on it is a good idea

(made on VS Feb CTP)
edit: Just noticed that if exception is thrown the file being processed has already been renamed to temporary name - bad me!
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Xml;
namespace srduperem
{
class Program
{
public static void RemoveDupes(FileInfo sharpReaderCacheXml)
{
string tempfileName = "rdupetemp.xml";
string sharpReaderCachedFileFullName = sharpReaderCacheXml.FullName;
if (File.Exists(tempfileName)) File.Delete(tempfileName);
sharpReaderCacheXml.MoveTo(tempfileName);
Dictionary foundPosts = new Dictionary();
XmlDocument sharpreaderCachedXmlDocument = new XmlDocument();
sharpreaderCachedXmlDocument.Load(tempfileName);
XmlNodeList list = sharpreaderCachedXmlDocument.SelectNodes("/rss/Items");
for (int i = 0; i < list.Count; i++)
{
XmlNode currentPost = list.Item(i);
XmlNode currentPostTitle = currentPost.SelectSingleNode("Title");
XmlNode currentPostLink = currentPost.SelectSingleNode("Link");
XmlNode currentPostIsRead = currentPost.SelectSingleNode("IsRead");
if ((currentPostLink != null) && (currentPostTitle != null))
{
string pID = currentPostTitle.InnerText + currentPostLink.InnerText;
if (foundPosts.ContainsKey(pID))
{
XmlNode firstDupePost; // Post is dupe so we get the first (newer) dupe from the list
if (foundPosts.TryGetValue(pID, out firstDupePost))
{
Console.WriteLine("- " + currentPostTitle.InnerText); // show what gets removed
XmlNode firstDupePostIsRead = firstDupePost.SelectSingleNode("IsRead");
// "hack": If you want to have the IsRead of the (new) dupes remain, you may fix this ;)
if (firstDupePostIsRead != null) firstDupePost.RemoveChild(firstDupePostIsRead); // remove the flag
// "hack": The older (original) of the dupe had read flag set, insert it after newer dupes ParseDate
// "hack": It could possibly be just appended disregarding the original position
if (currentPostIsRead != null) firstDupePost.InsertAfter(currentPostIsRead, firstDupePost.SelectSingleNode("ParseDate"));
currentPost.ParentNode.RemoveChild(currentPost);
}
}
else
{
foundPosts.Add(pID, currentPost); // The post seems unique so far
}
}
}
try
{
sharpreaderCachedXmlDocument.Save(sharpReaderCachedFileFullName);
}
finally
{
if (File.Exists(tempfileName)) File.Delete(tempfileName);
}
}
static void Main(string[] args)
{
string srcachePath = @"."; // assume we are in SharpReader cache dir Application Data\SharpReader\cache
if (args.Length > 0) srcachePath = Path.GetFullPath(args[0]); // or give the Application Data\SharpReader\cache as argument
DirectoryInfo di = new DirectoryInfo(srcachePath);
foreach (FileInfo fi in di.GetFiles("*.xml"))
{
Console.WriteLine(fi.Name);
RemoveDupes(fi);
}
}
}
}