C# Net 使用 openxml 提取ppt中的音頻、視頻、圖片、文本
名稱空間:
using System; using DocumentFormat.OpenXml.Packaging; using System.IO; using System.Linq; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Presentation; using A = DocumentFormat.OpenXml.Drawing; using P14 = DocumentFormat.OpenXml.Office2010.PowerPoint;
代碼如下:
public void PptInfo(string path) { using (var doc = PresentationDocument.Open(path, false)) { var presentation = doc.PresentationPart.Presentation; foreach (SlideId slideId in presentation.SlideIdList) { SlidePart slidePart = doc.PresentationPart.GetPartById(slideId.RelationshipId) as SlidePart; if (slidePart == null || slidePart.Slide == null) continue; //ppt中顯示的真實編號 var SlideNumber = presentation.FirstSlideNum?.Value ?? 1 + presentation.SlideIdList.ToList().IndexOf(slideId); Slide slide = slidePart.Slide; //音頻 var audioList = slide.Descendants<Audio>(); //視頻 var videoList = slide.Descendants<Video>(); //圖片 var picList = slide.CommonSlideData.ShapeTree.Descendants<Picture>().Where(o => !o.NonVisualPictureProperties.ApplicationNonVisualDrawingProperties.Any()); //文本框 var txBodyList = slide.CommonSlideData.ShapeTree.Descendants<TextBody>(); //提取音視頻(將 audioList 換成 videoList )就行了 foreach (var media in videoList) { //音頻關聯的形狀 var spTgt = media.CommonMediaNode.TargetElement.ShapeTarget; //形狀屬性 var cNvPr = slide.Descendants<NonVisualDrawingProperties>().FirstOrDefault(o => o.Id == spTgt.ShapeId); //形狀信息 var ShapeId = cNvPr.Id.Value; var ShapeName = cNvPr.Name.Value; var ShapeDescr = cNvPr.Description?.Value; //上級和上上級 var nvPicPr = (NonVisualPictureProperties)cNvPr.Parent; var pic = (Picture)nvPicPr.Parent; ////音頻文件關聯 //var audioFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.AudioFromFile>().FirstOrDefault(); //視頻文件關聯 var videoFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.VideoFromFile>().FirstOrDefault(); //獲取音視頻文件 外部/內部 var externalRelationship = slidePart.ExternalRelationships.FirstOrDefault(o => o.Id == videoFile.Link);//外部關系 var uri = externalRelationship?.Uri; if (uri == null || uri.OriginalString.ToUpper() == "NULL") { var media14 = nvPicPr.ApplicationNonVisualDrawingProperties.Descendants<P14.Media>().FirstOrDefault(); //媒體裁剪信息 var mediaStart = media14.MediaTrim?.Start?.Value; var mediaEnd = media14.MediaTrim?.End?.Value; //獲取媒體 var dataPartReferenceRelationship = slidePart.DataPartReferenceRelationships.FirstOrDefault(o => o.Id == media14.Embed);//內部關系 var mediaStream = dataPartReferenceRelationship.DataPart.GetStream(); uri = dataPartReferenceRelationship?.Uri; } //媒體文件關聯的圖片(視頻默認為第一幀,音頻默認喇叭的圖像) var embed = pic.BlipFill.Blip.Embed.Value; var part = slidePart.GetPartById(embed); var imgStream = part.GetStream(); var imgUri = part.Uri; } //提取圖片 foreach (var pic in picList) { var cNvPr = pic.NonVisualPictureProperties.NonVisualDrawingProperties; //形狀信息 var ShapeId = cNvPr.Id.Value; var ShapeName = cNvPr.Name.Value; var ShapeDescr = cNvPr.Description?.Value; //獲取圖片 var embed = pic.BlipFill.Blip.Embed.Value; var part = slidePart.GetPartById(embed); var imgStream = part.GetStream(); var imgUri = part.Uri; } //提取文本 foreach (var txBody in txBodyList) { //上級 var sp = (Shape)txBody.Parent; //形狀屬性 var cNvPr = sp.NonVisualShapeProperties.NonVisualDrawingProperties; //形狀信息 var ShapeId = cNvPr.Id.Value; var ShapeName = cNvPr.Name.Value; //獲取文本信息 //方式1 var text = txBody.InnerText; //方式2 var texts = txBody.Descendants<A.Text>(); text = string.Join(null, texts.Select(o => o.Text)); //獲取文本信息(含段落) var ps = txBody.Descendants<A.Paragraph>(); text = string.Join(Environment.NewLine, ps.Select(o => o.InnerText)); } } } }
ppt文檔的形狀結構大概為:
完畢