|  | 
|  | 1 | +function Get-OpenXML | 
|  | 2 | +{ | 
|  | 3 | +    <# | 
|  | 4 | +    .SYNOPSIS | 
|  | 5 | +        Gets Open Office XML files (Excel, PowerPoint, and Word) | 
|  | 6 | +    .DESCRIPTION | 
|  | 7 | +        Gets Open Office XML files (Excel, PowerPoint, and Word) as a structured object. | 
|  | 8 | +
 | 
|  | 9 | +        The object contains the file path, parts, and relationships of the OpenXML document. | 
|  | 10 | +         | 
|  | 11 | +        This cmdlet can be used to read the contents of .docx, .pptx, .xps, .xlsx files | 
|  | 12 | +        (or any files that are readable with [`IO.Packaging.Package`](https://learn.microsoft.com/en-us/dotnet/api/system.io.packaging.package?wt.mc_id=MVP_321542)) | 
|  | 13 | +    .EXAMPLE | 
|  | 14 | +        # Get an OpenXML document | 
|  | 15 | +        Get-OpenXML -FilePath './Sample.docx' | 
|  | 16 | +    #> | 
|  | 17 | +    [CmdletBinding()] | 
|  | 18 | +    [Alias('OpenXML')] | 
|  | 19 | +    param( | 
|  | 20 | +    # The path to the OpenXML file to read | 
|  | 21 | +    [Parameter(ValueFromPipelineByPropertyName=$true)] | 
|  | 22 | +    [Alias('Fullname')] | 
|  | 23 | +    [string] | 
|  | 24 | +    $FilePath | 
|  | 25 | +    ) | 
|  | 26 | + | 
|  | 27 | +    begin { | 
|  | 28 | +         | 
|  | 29 | +        filter openXMLFromFile { | 
|  | 30 | +            $filePath = $_ | 
|  | 31 | +            # Get the file info and read the file as a byte stream.             | 
|  | 32 | +            $fileInfo = $FilePath -as [IO.FileInfo] | 
|  | 33 | +            # By reading the file with Get-Content -AsByteStream, we avoid locking the file | 
|  | 34 | +            # (or the file being locked by another process) | 
|  | 35 | +            $packageBytes = Get-Content -Path $FilePath -AsByteStream -Raw | 
|  | 36 | + | 
|  | 37 | +            # If there were no bytes, return | 
|  | 38 | +            if (-not $packageBytes) { return } | 
|  | 39 | + | 
|  | 40 | +            # Create a memory stream from the byte array | 
|  | 41 | +            $memoryStream = [IO.MemoryStream]::new($packageBytes) | 
|  | 42 | +            # and open the package from the memory stream | 
|  | 43 | +            $filePackage = [IO.Packaging.Package]::Open($memoryStream, "Open", "ReadWrite") | 
|  | 44 | +            # If that did not work, return. | 
|  | 45 | +            if (-not $filePackage) { return } | 
|  | 46 | +             | 
|  | 47 | +            $filePackage.pstypenames.insert(0,'OpenXML') | 
|  | 48 | +            $filePackage.pstypenames.insert(0,'OpenXML.File') | 
|  | 49 | +            $packageContent = $filePackage.Parts | 
|  | 50 | +            $openXMLObject = $filePackage |  | 
|  | 51 | +                Add-Member NoteProperty FilePath $filePath -Force -PassThru | | 
|  | 52 | +                Add-Member NoteProperty MemoryStream $memoryStream -Force -PassThru | 
|  | 53 | +                                                    | 
|  | 54 | +            # Now we can get more specific about what type of OpenXML file this is. | 
|  | 55 | +            # By looking for certain key parts, we can determine if this is a PowerPoint, Excel, or Word file. | 
|  | 56 | +            # For example, if the package contains a part with `/ppt/` in the URI, | 
|  | 57 | +            if ($filePackage.Parts.Keys -match '/ppt/') { | 
|  | 58 | +                # it is an `OpenXML.PowerPoint.File` | 
|  | 59 | +                $openXmlObject.pstypenames.insert(0, 'OpenXML.PowerPoint.File') | 
|  | 60 | +            } | 
|  | 61 | +             | 
|  | 62 | +            # If the package contains a part with `/xl/` in the URI, | 
|  | 63 | +            if ($filePackage.Parts.Keys -match '/xl/') { | 
|  | 64 | +                # it is an `OpenXML.Excel.File` | 
|  | 65 | +                $openXmlObject.pstypenames.insert(0, 'OpenXML.Excel.File') | 
|  | 66 | +            } | 
|  | 67 | +             | 
|  | 68 | +            # If the package contains a part with `/word/` in the URI, it is a Word file. | 
|  | 69 | +            if ($filePackage.Parts.Keys -match '/word/') { | 
|  | 70 | +                # it is an `OpenXML.Word.File` | 
|  | 71 | +                $openXmlObject.pstypenames.insert(0, 'OpenXML.Word.File') | 
|  | 72 | +            } | 
|  | 73 | + | 
|  | 74 | +            # If the package contains a part with `/Documents/` in the URI, | 
|  | 75 | +            if ($filePackage.Parts.Keys -match '/Documents/') { | 
|  | 76 | +                # it is an `OpenXML.XPS.File` | 
|  | 77 | +                $openXmlObject.pstypenames.insert(0, 'OpenXML.XPS.File') | 
|  | 78 | +            } | 
|  | 79 | + | 
|  | 80 | +            # Now we output our openXML object | 
|  | 81 | +            $OpenXMLObject | 
|  | 82 | +        } | 
|  | 83 | +    } | 
|  | 84 | +     | 
|  | 85 | +    process { | 
|  | 86 | +        if ($filePath) { | 
|  | 87 | +            # Try to resolve the file path | 
|  | 88 | +            $resolvedPath = $ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath($FilePath) | 
|  | 89 | +            # If we could not resolve the path, exit | 
|  | 90 | +            if (-not $resolvedPath ) { return } | 
|  | 91 | + | 
|  | 92 | +            $resolvedPath | openXMLFromFile                         | 
|  | 93 | +        } else { | 
|  | 94 | +            $memoryStream = [IO.MemoryStream]::new() | 
|  | 95 | +            $EmptyPackage = [io.packaging.package]::Open($memoryStream ,'Create') | 
|  | 96 | +            $EmptyPackage | Add-Member NoteProperty -Name MemoryStream -Value $memoryStream -Force | 
|  | 97 | +            $EmptyPackage.pstypenames.insert(0, 'OpenXML') | 
|  | 98 | +            $EmptyPackage             | 
|  | 99 | +        }                         | 
|  | 100 | +    }     | 
|  | 101 | +} | 
0 commit comments