Private Sub btnRun_Click(sender As Object, e As RoutedEventArgs) Handles btnRun.Click
Try
Dim extTxt As New ExtractText
Dim extText = extTxt.ReadWordDocument("C:\sample-file.docx")
Using sw As New System.IO.StreamWriter("C:\Sample.txt")
sw.Write(extText)
End Using
MessageBox.Show("Done!", "Result", MessageBoxButton.OK, MessageBoxImage.Exclamation)
Catch ex As Exception
MessageBox.Show(ex.Message, "Error!", MessageBoxButton.OK, MessageBoxImage.Error)
End Try
End Sub
------------------------------------------------------------------------------------
Before building the sample , please make sure you have installed Open XML SDK 2.5
Imports System.Text
Imports DocumentFormat.OpenXml
Imports DocumentFormat.OpenXml.Packaging
Public Class ExtractText
''' <summary>
''' Read Word Document
''' </summary>
''' <returns>Plain Text in document </returns>
Public Function ReadWordDocument(filePath As String) As String
Dim sb As New StringBuilder()
' Open a WordprocessingDocument for editing using the filepath.
Using wpd As WordprocessingDocument = WordprocessingDocument.Open(filePath, True)
Dim element As OpenXmlElement = wpd.MainDocumentPart.Document.Body
If element IsNot Nothing Then sb.Append(GetPlainText(element))
End Using
Return sb.ToString()
End Function
''' <summary>
''' Read Plain Text in all XmlElements of word document
''' </summary>
''' <param name="element">XmlElement in document</param>
''' <returns>Plain Text in XmlElement</returns>
Public Function GetPlainText(element As OpenXmlElement) As String
Dim PlainTextInWord As New StringBuilder()
For Each section As OpenXmlElement In element.Elements()
Select Case section.LocalName
' Text
Case "t"
PlainTextInWord.Append(section.InnerText)
Exit Select
' Carriage return
Case "cr", "br"
' Page break
PlainTextInWord.Append(Environment.NewLine)
Exit Select
' Tab
Case "tab"
PlainTextInWord.Append(vbTab)
Exit Select
' Paragraph
Case "p"
PlainTextInWord.Append(GetPlainText(section))
PlainTextInWord.AppendLine(Environment.NewLine)
Exit Select
Case Else
PlainTextInWord.Append(GetPlainText(section))
Exit Select
End Select
Next
Return PlainTextInWord.ToString()
End Function
End Class