Wednesday, July 13, 2011

C#: Mass search and replace text in Word documents

Code sequence to call (remember to call Dispose() to close all WINWORD instances):
 
DocxSearchAndReplace docxSearchAndReplace = new DocxSearchAndReplace();
DocxSearchAndReplace.SearchAndReplaceResult result = docxSearchAndReplace.SearchAndReplace(parameters...);
docxSearchAndReplace.Dispose();

And finally here is the DocxSearchAndReplace.cs class:

using System;
using System.Collections.Generic;
using System.IO;
using Word = Microsoft.Office.Interop.Word;
 
namespace KodeSharp.MassDocx
{
    public class DocxSearchAndReplace : IDisposable
    {
        public object objMissing = System.Reflection.Missing.Value;
        public object objFalse = false;
        public object objTrue = true;
 
        //wordApp is used globally to make sure multiple calls to SearchAndReplace() in a loop is performance-optimized
        public Word.Application wordApp;
 
        public struct SearchAndReplaceResult
        {
            public string FileName;
            public int NumberOfChanges;
            public string Message;
            public SearchAndReplaceResult(string fileName, int numberOfChanges, string message)
            {
                FileName = fileName;
                NumberOfChanges = numberOfChanges;
                Message = message;
            }
        }
 
        public DocxSearchAndReplace()
        {
            wordApp = new Word.Application();
        }
 
        //Dispose must be called to dispose the Word COM objects
        public void Dispose()
        {
            ((Word._Application)wordApp).Quit();
            System.Runtime.InteropServices.Marshal.ReleaseComObject(wordApp);
            wordApp = null;
        }
 
        public SearchAndReplaceResult SearchAndReplace(FileInfo wordFile, string searchText, string replaceText,
            bool matchCase, bool matchWholeWord, bool matchWildcard, bool matchSoundsLike, bool matchAllWordForms,
            string sourceFamily, string sourceSize, bool sourceStyleCheck, bool sourceBold, bool sourceItalic, bool sourceUnderline,
            string targetFamily, string targetSize, bool targetStyleCheck, bool targetBold, bool targetItalic, bool targetUnderline,
            bool dummySave, bool trackChanges)
        {
            //Ignore temporary files that Word creates during a session and are left alone when Word is closed improperly or forcibly
            if (wordFile.Name.StartsWith("~$") || wordFile.Extension.ToLower() != ".docx")
                return new SearchAndReplaceResult(wordFile.Name, 0, "Invalid file");
 
            Word.Document wordDoc = wordApp.Documents.Open(wordFile.FullName);
 
            wordApp.Selection.Find.ClearFormatting();
            wordApp.Selection.Find.Replacement.ClearFormatting();
 
            if (!String.IsNullOrEmpty(sourceFamily)) wordApp.Selection.Find.Font.Name = sourceFamily;
            if (!String.IsNullOrEmpty(sourceSize)) wordApp.Selection.Find.Font.Size = (float)Convert.ToDouble(sourceSize);
            if (sourceStyleCheck)
            {
                wordApp.Selection.Find.Font.Bold = sourceBold ? 1 : 0;
                wordApp.Selection.Find.Font.Italic = sourceItalic ? 1 : 0;
                wordApp.Selection.Find.Font.Underline = sourceUnderline ? Word.WdUnderline.wdUnderlineSingle : Word.WdUnderline.wdUnderlineNone;
            }
 
            if (!String.IsNullOrEmpty(targetFamily)) wordApp.Selection.Find.Replacement.Font.Name = targetFamily;
            if (!String.IsNullOrEmpty(targetSize)) wordApp.Selection.Find.Replacement.Font.Size = (float)Convert.ToDouble(targetSize);
            if (targetStyleCheck)
            {
                wordApp.Selection.Find.Replacement.Font.Bold = targetBold ? 1 : 0;
                wordApp.Selection.Find.Replacement.Font.Italic = targetItalic ? 1 : 0;
                wordApp.Selection.Find.Replacement.Font.Underline = targetUnderline ? Word.WdUnderline.wdUnderlineSingle : Word.WdUnderline.wdUnderlineNone;
            }
 
            List<bool> trackChangeStatus = GetTrackChangeStatus(wordDoc);
            if (!dummySave)
                SetTrackChangeStatus(wordDoc, trackChanges);
 
            int numChanged = 0;
 
            //Body
            wordApp.Selection.WholeStory();
            Word.Selection wordSelection = wordApp.Selection;
            numChanged += MakeChanges(ref wordApp, ref wordDoc, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
 
            //TextBoxes within body
            Word.Shapes wordShapes = wordDoc.Shapes;
            numChanged += MakeChangesWithinShapes(ref wordApp, ref wordDoc, ref wordShapes, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
 
            //Headers, Footers and Textboxes within
            foreach (Word.Section wordSection in wordDoc.Sections)
            {
                //Headers
                wordSection.Headers[Word.WdHeaderFooterIndex.wdHeaderFooterEvenPages].Range.Select();
                numChanged += MakeChanges(ref wordApp, ref wordDoc, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
                wordSection.Headers[Word.WdHeaderFooterIndex.wdHeaderFooterFirstPage].Range.Select();
                numChanged += MakeChanges(ref wordApp, ref wordDoc, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
                wordSection.Headers[Word.WdHeaderFooterIndex.wdHeaderFooterPrimary].Range.Select();
                numChanged += MakeChanges(ref wordApp, ref wordDoc, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
 
                //Footers
                wordSection.Footers[Word.WdHeaderFooterIndex.wdHeaderFooterEvenPages].Range.Select();
                numChanged += MakeChanges(ref wordApp, ref wordDoc, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
                wordSection.Footers[Word.WdHeaderFooterIndex.wdHeaderFooterFirstPage].Range.Select();
                numChanged += MakeChanges(ref wordApp, ref wordDoc, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
                wordSection.Footers[Word.WdHeaderFooterIndex.wdHeaderFooterPrimary].Range.Select();
                numChanged += MakeChanges(ref wordApp, ref wordDoc, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
 
                //Textboxes within Headers
                wordShapes = wordSection.Headers[Word.WdHeaderFooterIndex.wdHeaderFooterEvenPages].Shapes;
                numChanged += MakeChangesWithinShapes(ref wordApp, ref wordDoc, ref wordShapes, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
                wordShapes = wordSection.Headers[Word.WdHeaderFooterIndex.wdHeaderFooterFirstPage].Shapes;
                numChanged += MakeChangesWithinShapes(ref wordApp, ref wordDoc, ref wordShapes, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
                wordShapes = wordSection.Headers[Word.WdHeaderFooterIndex.wdHeaderFooterPrimary].Shapes;
                numChanged += MakeChangesWithinShapes(ref wordApp, ref wordDoc, ref wordShapes, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
 
                //Textboxes within Footers
                wordShapes = wordSection.Footers[Word.WdHeaderFooterIndex.wdHeaderFooterEvenPages].Shapes;
                numChanged += MakeChangesWithinShapes(ref wordApp, ref wordDoc, ref wordShapes, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
                wordShapes = wordSection.Footers[Word.WdHeaderFooterIndex.wdHeaderFooterFirstPage].Shapes;
                numChanged += MakeChangesWithinShapes(ref wordApp, ref wordDoc, ref wordShapes, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
                wordShapes = wordSection.Footers[Word.WdHeaderFooterIndex.wdHeaderFooterPrimary].Shapes;
                numChanged += MakeChangesWithinShapes(ref wordApp, ref wordDoc, ref wordShapes, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
            }
 
            if (!dummySave)
                SetTrackChangeStatus(wordDoc, trackChangeStatus);
 
            ((Word._Document)wordDoc).Close(!dummySave, objMissing, objMissing);
            wordDoc = null;
 
            //Log
            string message;
            if (numChanged > 0)
                message = "Matches " + (dummySave ? "intercepted" : "successfully replaced");
            else
                message = "No match found";
 
            return new SearchAndReplaceResult(wordFile.Name, numChanged, message);
        }
 
        public int MakeChangesWithinShapes(ref Word.Application wordApp, ref Word.Document wordDoc, ref Word.Shapes wordShapes,
            string searchText, string replaceText,
            bool matchCase, bool matchWholeWord, bool matchWildcard, bool matchSoundsLike, bool matchAllWordForms,
            bool dummySave)
        {
            int numChanged = 0;
            for (int i = 0; i < wordShapes.Count; i++)
            {
                object idx = i + 1;
                Word.Shape wordShape = wordShapes.get_Item(ref idx);
                if (wordShape.Type == Microsoft.Office.Core.MsoShapeType.msoTextBox)
                {
                    wordShape.TextFrame.TextRange.Select();
                    numChanged += MakeChanges(ref wordApp, ref wordDoc, searchText, replaceText, matchCase, matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, dummySave);
                }
            }
            return numChanged;
        }
 
        public int MakeChanges(ref Word.Application wordApp, ref Word.Document wordDoc,
            string searchText, string replaceText,
            bool matchCase, bool matchWholeWord, bool matchWildcard, bool matchSoundsLike, bool matchAllWordForms,
            bool dummySave)
        {
            //Make dummy change to get count first
            wordApp.Selection.WholeStory();
            int numCharsBefore = wordApp.Selection.Characters.Count;
            string tempReplaceText = "@" + searchText;
            wordApp.Selection.Find.Execute(searchText, matchCase,
                matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, objTrue, Word.WdFindWrap.wdFindContinue, objMissing,
                tempReplaceText, Word.WdReplace.wdReplaceAll,
                objMissing, objMissing, objMissing, objMissing);
            wordApp.Selection.WholeStory();
            int numCharsAfter = wordApp.Selection.Characters.Count;
            int numChanged = numCharsAfter - numCharsBefore;
 
            //Undo changes if dummy mode
            if (dummySave)
            {
                wordDoc.Undo();
                wordDoc.UndoClear();
            }
            //Make actual change if otherwise
            else
            {
                wordApp.Selection.Find.Execute(tempReplaceText, matchCase,
                    matchWholeWord, matchWildcard, matchSoundsLike, matchAllWordForms, objTrue, Word.WdFindWrap.wdFindContinue, objMissing,
                    replaceText, Word.WdReplace.wdReplaceAll,
                    objMissing, objMissing, objMissing, objMissing);
            }
 
            return numChanged;
        }
 
        public List<bool> GetTrackChangeStatus(Word.Document wordDoc)
        {
            List<bool> trackChangeStatus = new List<bool>();
            trackChangeStatus.Add(wordDoc.TrackMoves);
            trackChangeStatus.Add(wordDoc.TrackFormatting);
            trackChangeStatus.Add(wordDoc.TrackRevisions);
            return trackChangeStatus;
        }
 
        public void SetTrackChangeStatus(Word.Document wordDoc, List<bool> trackChangeStatus)
        {
            wordDoc.TrackMoves = trackChangeStatus[0];
            wordDoc.TrackFormatting = trackChangeStatus[1];
            wordDoc.TrackRevisions = trackChangeStatus[2];
        }
 
        public void SetTrackChangeStatus(Word.Document wordDoc, bool trackChangeStatus)
        {
            wordDoc.TrackMoves = trackChangeStatus;
            wordDoc.TrackFormatting = trackChangeStatus;
            wordDoc.TrackRevisions = trackChangeStatus;
        }
    }
}