Convert Word documents to different formats
0
Convert-Word.ps1
1function Convert-Word {
2 <#
3 .SYNOPSIS
4 Converts Word Documents
5 .DESCRIPTION
6 Converts Word Documents to a few formats:
7
8 * `docx`
9 * `doct`
10 * `html`
11 * `pdf`
12 * `rtf`
13 * `xml`
14 * `xps`
15 .NOTES
16 This uses the Word COM Object Model for conversion.
17 This requires Word to be installed.
18
19 Some documents may not open due to trust settings.
20 .EXAMPLE
21 Get-ChildItem ./OldWord/ -File |
22 Where-Object Extension -in '.rtf', '.doc' |
23 Convert-Word
24 #>
25 param(
26 # The file path containing word documents
27 [Parameter(Mandatory,ValueFromPipelineByPropertyName)]
28 [Alias('Fullname')]
29 [string]
30 $FilePath,
31
32 # The output document type.
33 [Alias('docx', 'doct', 'html', 'pdf', 'rtf', 'xml', 'xps')]
34 [string]
35 $To = 'docx'
36 )
37
38 begin {
39 # Try to open word
40 $wordApp = New-Object -ComObject Word.Application
41 if (-not $wordApp) { throw "Word not installed"}
42
43 # and create a queue to hold our files
44 $convertQueue = [Collections.Queue]::new()
45 }
46
47 process {
48 # Get all of the files
49 foreach ($fileInfo in Get-Item -Path $FilePath) {
50 # check their extension
51 if ($fileInfo.Extension -notin '.rtf', '.doc','.docx') {
52 # and warn if it's not a file we can convert.
53 Write-Warning "$($fileInfo.FullName) is not a word document"
54 continue
55 }
56 # Otherwise, add it to the queue.
57 $convertQueue.Enqueue($fileInfo)
58 }
59 }
60
61 end {
62 # Turn our queue into an array
63 $queue = $convertQueue.ToArray()
64
65 # and prepare our progress bars
66
67 $progress = @{id=Get-Random;Activity='Converting Word'}
68
69 # Go over each document
70 for ($docNumber = 0 ; $docNumber -lt $queue.Count; $docNumber++) {
71 # get the file
72 $fileInfo = $queue[$docNumber]
73 # adjust our progress bar
74 $progress.PercentComplete = $docNumber / $queue.Count * 100
75 $progress.Status = $fileInfo.Fullname
76
77 # and write progress.
78 Write-Progress @progress
79
80 # Figure out where the file is going to go.
81 $destination =
82 $fileInfo.FullName.Substring(
83 0, $fileInfo.FullName.Length - $fileInfo.Extension.Length
84 ) + ".$($to.ToLower())"
85
86 # Warn if the files are the same
87 if ($destination -eq $fileInfo.FullName) {
88 Write-Warning "Will not overwrite $($fileInfo.Fullname)"
89 continue
90 }
91
92
93 try {
94 # Try to open the file
95 $openedFile = $wordApp.Documents.Open($fileInfo.FullName)
96 } catch {
97 # and error out and continue if we cannot
98 Write-Error "Could not open '$($fileInfo.Fullname)': $_"
99 continue
100 }
101
102 # We have to use an old enum to convert to different file formats
103 # [See This Reference](https://learn.microsoft.com/en-us/office/vba/api/word.wdsaveformat)
104 # Currently not allowing anywhere near the whole list, for the sake of sanity and security.
105 switch ($to) {
106 docx { $openedFile.SaveAs($destination,[ref]16) }
107 doct { $openedFile.SaveAs($destination,[ref]14) }
108 html { $openedFile.SaveAs($destination,[ref]8) }
109 pdf { $openedFile.SaveAs($destination,[ref]17) }
110 rtf { $openedFile.SaveAs($destination,[ref]6) }
111 txt { $openedFile.SaveAs($destination,[ref]5) }
112 xml { $openedFile.SaveAs($destination,[ref]11) }
113 xps { $openedFile.SaveAs($destination,[ref]18) }
114 }
115 # Close the file we've opened.
116 $openedFile.Close()
117
118 # and output the file we've exported.
119 Get-Item -LiteralPath $destination
120
121 }
122
123 # Make sure we complete our progress bar so it doesn't leave something on the screen
124 $progress.Remove('PercentComplete')
125 $progress.Completed = $true
126 Write-Progress @progress
127
128 # and quit word, so we don't have an open `winword.exe` hanging around.
129 $wordApp.Quit()
130 }
131}