C# Net 使用 openxml 提取ppt中的音頻、視頻、圖片、文本


C# Net 使用 openxml 提取ppt中的音頻、視頻、圖片、文本

 

名稱空間:

using System;
using DocumentFormat.OpenXml.Packaging;
using System.IO;
using System.Linq;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Presentation;
using A = DocumentFormat.OpenXml.Drawing;
using P14 = DocumentFormat.OpenXml.Office2010.PowerPoint;

  

代碼如下:

  

        public void PptInfo(string path)
        {
            using (var doc = PresentationDocument.Open(path, false))
            {
                var presentation = doc.PresentationPart.Presentation;

                foreach (SlideId slideId in presentation.SlideIdList)
                {
                    SlidePart slidePart = doc.PresentationPart.GetPartById(slideId.RelationshipId) as SlidePart;
                    if (slidePart == null || slidePart.Slide == null)
                        continue;

                    //ppt中顯示的真實編號
                    var SlideNumber = presentation.FirstSlideNum?.Value ?? 1 + presentation.SlideIdList.ToList().IndexOf(slideId);

                    Slide slide = slidePart.Slide;

                    //音頻
                    var audioList = slide.Descendants<Audio>();
                    //視頻
                    var videoList = slide.Descendants<Video>();
                    //圖片
                    var picList = slide.CommonSlideData.ShapeTree.Descendants<Picture>().Where(o => !o.NonVisualPictureProperties.ApplicationNonVisualDrawingProperties.Any());
                    //文本框
                    var txBodyList = slide.CommonSlideData.ShapeTree.Descendants<TextBody>();

                    //提取音視頻(將 audioList 換成 videoList )就行了
                    foreach (var media in videoList)
                    {
                        //音頻關聯的形狀
                        var spTgt = media.CommonMediaNode.TargetElement.ShapeTarget;
                        //形狀屬性
                        var cNvPr = slide.Descendants<NonVisualDrawingProperties>().FirstOrDefault(o => o.Id == spTgt.ShapeId);

                        //形狀信息
                        var ShapeId = cNvPr.Id.Value;
                        var ShapeName = cNvPr.Name.Value;
                        var ShapeDescr = cNvPr.Description?.Value;

                        //上級和上上級
                        var nvPicPr = (NonVisualPictureProperties)cNvPr.Parent;
                        var pic = (Picture)nvPicPr.Parent;

                        ////音頻文件關聯
                        //var audioFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.AudioFromFile>().FirstOrDefault();
                        //視頻文件關聯
                        var videoFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.VideoFromFile>().FirstOrDefault();

                        //獲取音視頻文件 外部/內部
                        var externalRelationship = slidePart.ExternalRelationships.FirstOrDefault(o => o.Id == videoFile.Link);//外部關系
                        var uri = externalRelationship?.Uri;
                        if (uri == null || uri.OriginalString.ToUpper() == "NULL")
                        {
                            var media14 = nvPicPr.ApplicationNonVisualDrawingProperties.Descendants<P14.Media>().FirstOrDefault();

                            //媒體裁剪信息
                            var mediaStart = media14.MediaTrim?.Start?.Value;
                            var mediaEnd = media14.MediaTrim?.End?.Value;

                            //獲取媒體
                            var dataPartReferenceRelationship = slidePart.DataPartReferenceRelationships.FirstOrDefault(o => o.Id == media14.Embed);//內部關系
                            var mediaStream = dataPartReferenceRelationship.DataPart.GetStream();
                            uri = dataPartReferenceRelationship?.Uri;
                        }

                        //媒體文件關聯的圖片(視頻默認為第一幀,音頻默認喇叭的圖像)
                        var embed = pic.BlipFill.Blip.Embed.Value;
                        var part = slidePart.GetPartById(embed);
                        var imgStream = part.GetStream();
                        var imgUri = part.Uri;
                    }

                    //提取圖片
                    foreach (var pic in picList)
                    {
                        var cNvPr = pic.NonVisualPictureProperties.NonVisualDrawingProperties;

                        //形狀信息
                        var ShapeId = cNvPr.Id.Value;
                        var ShapeName = cNvPr.Name.Value;
                        var ShapeDescr = cNvPr.Description?.Value;

                        //獲取圖片
                        var embed = pic.BlipFill.Blip.Embed.Value;
                        var part = slidePart.GetPartById(embed);
                        var imgStream = part.GetStream();
                        var imgUri = part.Uri;
                    }

                    //提取文本
                    foreach (var txBody in txBodyList)
                    {
                        //上級
                        var sp = (Shape)txBody.Parent;
                        //形狀屬性
                        var cNvPr = sp.NonVisualShapeProperties.NonVisualDrawingProperties;

                        //形狀信息
                        var ShapeId = cNvPr.Id.Value;
                        var ShapeName = cNvPr.Name.Value;

                        //獲取文本信息
                        //方式1
                        var text = txBody.InnerText;
                        //方式2
                        var texts = txBody.Descendants<A.Text>();
                        text = string.Join(null, texts.Select(o => o.Text));
                        //獲取文本信息(含段落)
                        var ps = txBody.Descendants<A.Paragraph>();
                        text = string.Join(Environment.NewLine, ps.Select(o => o.InnerText));
                    }

                }
            }
        }

  

  

 

ppt文檔的形狀結構大概為:

 

 

 

 

完畢

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM