How to replace text in Word file using Apache POI

Last couple of days, I faced an issue with replace text in Microsoft Word file, using Apache POI library, version 3.15.

The existing solution to replace the text I did follow as below:

package org.kodejava.example.poi;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;

public class WordReplaceText {
    public static final String SOURCE_FILE = "lipsum.doc";
    public static final String OUTPUT_FILE = "new-lipsum.doc";

    public static void main(String[] args) throws Exception {
        WordReplaceText instance = new WordReplaceText();
        HWPFDocument doc = instance.openDocument(SOURCE_FILE);
        if (doc != null) {
            doc = instance.replaceText(doc, "o", "0");
            instance.saveDocument(doc, OUTPUT_FILE);
        }
    }

    private HWPFDocument replaceText(HWPFDocument doc, String findText, String replaceText) {
        Range r = doc.getRange();
        for (int i = 0; i < r.numSections(); ++i) {
            Section s = r.getSection(i);
            for (int j = 0; j < s.numParagraphs(); j++) {
                Paragraph p = s.getParagraph(j);
                for (int k = 0; k < p.numCharacterRuns(); k++) {
                    CharacterRun run = p.getCharacterRun(k);
                    String text = run.text();
                    if (text.contains(findText)) {
                        run.replaceText(findText, replaceText);
                    }
                }
            }
        }
        return doc;
    }

    private HWPFDocument openDocument(String file) throws Exception {
        URL res = getClass().getClassLoader().getResource(file);
        HWPFDocument document = null;
        if (res != null) {
            document = new HWPFDocument(new POIFSFileSystem(
                    new File(res.getPath())));
        }
        return document;
    }

    private void saveDocument(HWPFDocument doc, String file) {
        try (FileOutputStream out = new FileOutputStream(file)) {
            doc.write(out);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

The problem is: when I have multiple “key” on the documents which is appear on paragraphs or in the table, the above function cannot replace all of them. Some keys are still there, cannot be replaced. I found an other solution on github that submitted by deividasstr. This solution solved my issues. You can explore at: https://github.com/deividasstr/docx-word-replacer. There are some main java files:

  • OnWordFoundCallback.java
  • TextReplacer.java
  • WordFinder.java
  • WordReplacer.java
package com.xandryex.utils;

import org.apache.poi.xwpf.usermodel.XWPFRun;

import java.util.List;

interface OnWordFoundCallback {

    void onWordFoundInRun(XWPFRun run);
    void onWordFoundInPreviousCurrentNextRun(List<XWPFRun> runs, int currentRun);
}
package com.xandryex.utils;

import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFRun;

import java.util.List;
import java.util.regex.Pattern;

public class TextReplacer extends WordFinder {

    private static int DEFAULT_TEXT_POS = 0;

    private String replacement;
    private String bookmark;

    public void replaceInText(XWPFDocument document, String bookmark, String replacement) {
        this.replacement = replacement;
        this.bookmark = bookmark;
        findWordsInText(document, bookmark);
    }

    public void replaceInTable(XWPFDocument document, String bookmark, String replacement) {
        this.replacement = replacement;
        this.bookmark = bookmark;
        findWordsInTable(document, bookmark);
    }

    @Override
    public void onWordFoundInRun(XWPFRun run) {
        replaceWordInRun(run);
    }

    @Override
    public void onWordFoundInPreviousCurrentNextRun(List<XWPFRun> runs, int currentRun) {
        replaceWordInPreviousCurrentNextRuns(runs, currentRun);
    }

    private void replaceWordInPreviousCurrentNextRuns(List<XWPFRun> runs, int currentRun) {
        boolean replacedInPreviousRun = replaceRunTextStart(runs.get(currentRun - 1));
        if (replacedInPreviousRun) {
            deleteTextFromRun(runs.get(currentRun));
        } else {
            replaceRunTextStart(runs.get(currentRun));
        }
        cleanRunTextStart(runs.get(currentRun + 1));
    }

    private void deleteTextFromRun(XWPFRun run) {
        run.setText("", DEFAULT_TEXT_POS);
    }

    //replaceAll() first parameter is used as regex pattern so normally special chars have to be escaped.
    //Pattern.quote() transforms given string into literal where special chars are ignored, thus can be used without escaping
    private void replaceWordInRun(XWPFRun run) {
        String replacedText = run.getText(DEFAULT_TEXT_POS).replaceAll(Pattern.quote(bookmark), replacement);
        run.setText(replacedText, DEFAULT_TEXT_POS);
    }

    private boolean replaceRunTextStart(XWPFRun run) {
        String text = run.getText(DEFAULT_TEXT_POS);
        String remainingBookmark = getRemainingBookmarkStart(text, bookmark);
        if (!remainingBookmark.isEmpty()) {
            text = text.replace(remainingBookmark, replacement);
            run.setText(text, DEFAULT_TEXT_POS);
            return true;
        }
        return false;
    }

    private void cleanRunTextStart(XWPFRun run) {
        String text = run.getText(DEFAULT_TEXT_POS);
        String remainingBookmark = getRemainingBookmarkEnd(text, bookmark);
        text = text.replace(remainingBookmark, "");
        run.setText(text, DEFAULT_TEXT_POS);
    }

    private String getRemainingBookmarkEnd(String text, String bookmark) {
        if (!text.startsWith(bookmark)) {
            return getRemainingBookmarkEnd(text, bookmark.substring(1, bookmark.length()));
        } else {
            return bookmark;
        }
    }

    private String getRemainingBookmarkStart(String text, String bookmark) {
        if (!text.endsWith(bookmark)) {
            return getRemainingBookmarkStart(text, bookmark.substring(0, bookmark.length() - 1));
        } else {
            return bookmark;
        }
    }
}
package com.xandryex.utils;

import org.apache.poi.xwpf.usermodel.*;

import java.util.*;

abstract class WordFinder implements OnWordFoundCallback {

    private static int DEFAULT_POS = 0;
    private static int DEFAULT_LAST_USED_RUN = -1;

    private String bookmark;

    /**
     * Checks if XWPFDocument tables contain a given bookmark. Checks runs of all paragraphs if searchable text is in one or
     * scattered in runs around it. It does not check separate paragraphs if text is scattered amongst them.
     *
     * @param doc  XWPFDocument
     * @param word to be searched
     */
    void findWordsInTable(XWPFDocument doc, String word) {
        this.bookmark = word;
        for (XWPFTable t : doc.getTables()) {
            checkTable(t);
        }
    }

    /**
     * Checks if XWPFDocument text contains a given bookmark. Checks runs of all paragraphs if searchable text is in one or
     * scattered in runs around it. It does not check separate paragraphs if text is scattered amongst them.
     *
     * @param doc  XWPFDocument
     * @param word to be searched
     */
    void findWordsInText(XWPFDocument doc, String word) {
        this.bookmark = word;
        for (XWPFParagraph p : doc.getParagraphs()) {
            if (paragraphNotNullAndHasRuns(p)) {
                checkInParagraph(p);
            }
        }
    }

    private void checkTable(XWPFTable t) {
        if (t.getRows() == null) return;
        for (XWPFTableRow r : t.getRows()) {
            checkRow(r);
        }
    }

    private void checkRow(XWPFTableRow r) {
        if (r.getTableCells() == null) return;
        for (XWPFTableCell cell : r.getTableCells()) {
            checkCell(cell);
        }
    }

    private void checkCell(XWPFTableCell cell) {
        if (cell.getParagraphs() == null) return;
        for (XWPFParagraph p : cell.getParagraphs()) {
            if (paragraphNotNullAndHasRuns(p)) {
                checkInParagraph(p);
            }
        }
    }

    private void checkInParagraph(XWPFParagraph p) {
        List<XWPFRun> runs = p.getRuns();
        int lastUsedRun = DEFAULT_LAST_USED_RUN;
        for (int runIndex = 0; runIndex < runs.size(); runIndex++) {
            XWPFRun run = p.getRuns().get(runIndex);
            if (isRunNotNullAndNotEmpty(run)) {
                String text = run.getText(DEFAULT_POS);
                //System.out.println(runIndex + " " + text);  //Uncomment for printing the runs
                if (text.contains(bookmark)) {
                    onWordFoundInRun(run);
                    lastUsedRun = runIndex;
                } else if (nextRunHasText(runs, runIndex)
                        && !nextRunsText(runs, runIndex).contains(bookmark)
                        && isWordInPreviousCurrentNextRuns(runs, lastUsedRun, runIndex)) {
                    onWordFoundInPreviousCurrentNextRun(runs, runIndex);
                }
            }
        }
    }

    private boolean isWordInPreviousCurrentNextRuns(List<XWPFRun> runs, int lastUsedRun, int runIndex) {
        return isNotFirstRun(runIndex)
                && previousRunHasText(runs, runIndex)
                && previousRunWasNotUsed(lastUsedRun, runIndex)
                && lastThisNextRunText(runs, runIndex).contains(bookmark);
    }

    private boolean previousRunWasNotUsed(int lastUsedRun, int runIndex) {
        return lastUsedRun != runIndex - 1;
    }

    private boolean paragraphNotNullAndHasRuns(XWPFParagraph p) {
        return p != null && !p.getRuns().isEmpty();
    }

    private String lastThisNextRunText(List<XWPFRun> runs, int runIndex) {
        String text = runs.get(runIndex).getText(DEFAULT_POS);
        return lastAndCurrentRunsText(runs, runIndex, text) + nextRunsText(runs, runIndex);
    }

    private boolean nextRunHasText(List<XWPFRun> runs, int runIndex) {
        return runs.size() > runIndex + 1
                && runs.get(runIndex + 1).getText(DEFAULT_POS) != null
                && !runs.get(runIndex + 1).getText(DEFAULT_POS).isEmpty();
    }

    private String nextRunsText(List<XWPFRun> runs, int i) {
        return runs.get(i + 1).getText(DEFAULT_POS);
    }

    private String lastAndCurrentRunsText(List<XWPFRun> runs, int runIndex, String text) {
        return runs.get(runIndex - 1).getText(DEFAULT_POS) + text;
    }

    private boolean previousRunHasText(List<XWPFRun> runs, int runIndex) {
        return runs.get(runIndex - 1).getText(DEFAULT_POS) != null
                && !runs.get(runIndex - 1).getText(DEFAULT_POS).isEmpty();
    }

    private boolean isNotFirstRun(int runIndex) {
        return runIndex > 0;
    }

    private boolean isRunNotNullAndNotEmpty(XWPFRun run) {
        return run != null && run.getText(DEFAULT_POS) != null;
    }
}
package com.xandryex;

import org.apache.poi.xwpf.usermodel.XWPFDocument;
import com.xandryex.utils.TextReplacer;

import java.io.*;

public class WordReplacer {

    private XWPFDocument document;
    private TextReplacer replacer;

    /**
     * Creates WordReplacer with file to modify.
     *
     * @param docxFile file of type docx.
     * @throws IOException thrown if file is not found or is not required type.
     */
    public WordReplacer(File docxFile) throws IOException {
        InputStream inputStream = new FileInputStream(docxFile);
        init(new XWPFDocument(inputStream));
    }

    /**
     * Creates WordReplacer with XWPFDocument to modify.
     * @param xwpfDoc to modify.
     */
    public WordReplacer(XWPFDocument xwpfDoc) {
        init(xwpfDoc);
    }

    private void init(XWPFDocument xwpfDoc) {
        if (xwpfDoc == null) throw new NullPointerException();
        document = xwpfDoc;
        replacer = new TextReplacer();
    }

    /**
     * Replaces all occurrences of a bookmark only in the text of the file with a replacement string.
     * @param bookmark word to replace.
     * @param replacement word of replacement.
     */
    public void replaceWordsInText(String bookmark, String replacement) {
        replacer.replaceInText(document, bookmark, replacement);
    }

    /**
     * Replaces all occurrences of a bookmark only in tables of the file with a replacement string.
     * @param bookmark word to replace.
     * @param replacement word of replacement.
     */
    public void replaceWordsInTables(String bookmark, String replacement) {
        replacer.replaceInTable(document, bookmark, replacement);
    }

    /**
     * Most of the time we want our template files untouched. Creates file from path, saves the modified document to it and returns it.
     * @param path filepath (dirs + filename).
     * @return modified file.
     * @throws Exception thrown if some issues while saving occur - mostly due to unavailable file or permissions.
     */
    public File saveAndGetModdedFile(String path) throws Exception {
        File file = new File(path);
        return saveToFile(file);
    }

    /**
     * Most of the time we want our template files untouched. Saves the modified document to the given file and returns it.
     * @param file to save to.
     * @return modified file.
     * @throws Exception thrown if some issues while saving occur - mostly due to unavailable file or permissions.
     */
    public File saveAndGetModdedFile(File file) throws Exception {
        return saveToFile(file);
    }

    public XWPFDocument getModdedXWPFDoc() {
        return document;
    }

    private File saveToFile(File file) throws Exception {
        FileOutputStream out = null;
        try {
            out = new FileOutputStream(file, false);
            document.write(out);
            document.close();
            return file;
        } catch (Exception e) {
            throw e;
        } finally {
            if (out != null) {
                out.flush();
                out.close();
            }
        }
    }
}

Using:

public static final String REPLACED_WORD = "pogo";
public static final String REPLACED_WORD_2 = "stick";

public static final String TEXT_WITHOUT_BOOKMARK = "Something here";

private static File docxFile;
private static TextReplacer replacer;
private static WordCounter wordCounter;
private XWPFDocument document;

......
docxFile = new File("./src/test/resources/docxfile.docx");
replacer = new TextReplacer();
wordCounter = new WordCounter();

......
replacer.replaceInText(document, WordCounterTest.TEST_DOC_TEST_CASE, REPLACED_WORD);
replacer.replaceInTable(document, WordCounterTest.TEST_DOC_TEST_CASE, REPLACED_WORD);

Many thanks to deividasstr.

Leave a comment

Email của bạn sẽ không được hiển thị công khai. Các trường bắt buộc được đánh dấu *