The Sandbox Thread

1 post

Forum Read Only

This forum has been made read only by the site admins. No new threads or comments can be added.

C# snippet to remove duplicates posts from SharpReader

Back to Forum: The Sandbox
  • androidi

    As you all know blogs.msdn.com updated to Community Server and there were some minor changes in the feeds, this caused a flood of dupes in my SharpReader. Here's a snippet to remove these dupes while keeping the "Have I read this" flag from the old post. Backuping your SR cache before running this on it is a good idea Smiley (made on VS Feb CTP)

    edit: Just noticed that if exception is thrown the file being processed has already been renamed to temporary name - bad me!

    using System;
    using System.Collections.Generic;
    using System.Text;
    using System.IO;
    using System.Xml;
    namespace srduperem
    {
        class Program
        {
            public static void RemoveDupes(FileInfo sharpReaderCacheXml)
            {
                string tempfileName = "rdupetemp.xml";
                string sharpReaderCachedFileFullName = sharpReaderCacheXml.FullName;
                if (File.Exists(tempfileName)) File.Delete(tempfileName);
                
                sharpReaderCacheXml.MoveTo(tempfileName);
                Dictionary foundPosts = new Dictionary();
                XmlDocument sharpreaderCachedXmlDocument = new XmlDocument();
                sharpreaderCachedXmlDocument.Load(tempfileName);
                XmlNodeList list = sharpreaderCachedXmlDocument.SelectNodes("/rss/Items");
                for (int i = 0; i < list.Count; i++)
                {
                    XmlNode currentPost = list.Item(i);
                    XmlNode currentPostTitle = currentPost.SelectSingleNode("Title");
                    XmlNode currentPostLink = currentPost.SelectSingleNode("Link");
                    XmlNode currentPostIsRead = currentPost.SelectSingleNode("IsRead");
                    if ((currentPostLink != null) && (currentPostTitle != null))
                    {
                        string pID = currentPostTitle.InnerText + currentPostLink.InnerText;
                        if (foundPosts.ContainsKey(pID))
                        {
                            XmlNode firstDupePost; // Post is dupe so we get the first (newer) dupe from the list
                            if (foundPosts.TryGetValue(pID, out firstDupePost))
                            {
                                Console.WriteLine("- " + currentPostTitle.InnerText); // show what gets removed
                                XmlNode firstDupePostIsRead = firstDupePost.SelectSingleNode("IsRead"); 
                                // "hack": If you want to have the IsRead of the (new) dupes remain, you may fix this ;)
                                if (firstDupePostIsRead != null) firstDupePost.RemoveChild(firstDupePostIsRead); // remove the flag
                                // "hack": The older (original) of the dupe had read flag set, insert it after newer dupes ParseDate
                                // "hack": It could possibly be just appended disregarding the original position
                                if (currentPostIsRead != null) firstDupePost.InsertAfter(currentPostIsRead, firstDupePost.SelectSingleNode("ParseDate")); 
                                currentPost.ParentNode.RemoveChild(currentPost);
                            }
                        }
                        else
                        {
                            foundPosts.Add(pID, currentPost); // The post seems unique so far
                        }
                    }
                }
                try
                {
                    sharpreaderCachedXmlDocument.Save(sharpReaderCachedFileFullName);
                }
                finally
                {
                    if (File.Exists(tempfileName)) File.Delete(tempfileName);
                }
            }
            static void Main(string[] args)
            {
                string srcachePath = @"."; // assume we are in SharpReader cache dir Application Data\SharpReader\cache
                if (args.Length > 0) srcachePath = Path.GetFullPath(args[0]); // or give the Application Data\SharpReader\cache as argument
                DirectoryInfo di = new DirectoryInfo(srcachePath);
                foreach (FileInfo fi in di.GetFiles("*.xml"))
                {
                    Console.WriteLine(fi.Name);
                    RemoveDupes(fi);
                }
            }
        }
    }
    
    

Conversation locked

This conversation has been locked by the site admins. No new comments can be made.