C# Net 使用 openxml 提取ppt中的音頻、視頻、圖片、文本
名稱空間:
using System; using DocumentFormat.OpenXml.Packaging; using System.IO; using System.Linq; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Presentation; using A = DocumentFormat.OpenXml.Drawing; using P14 = DocumentFormat.OpenXml.Office2010.PowerPoint;
代碼如下:
public void PptInfo(string path)
{
using (var doc = PresentationDocument.Open(path, false))
{
var presentation = doc.PresentationPart.Presentation;
foreach (SlideId slideId in presentation.SlideIdList)
{
SlidePart slidePart = doc.PresentationPart.GetPartById(slideId.RelationshipId) as SlidePart;
if (slidePart == null || slidePart.Slide == null)
continue;
//ppt中顯示的真實編號
var SlideNumber = presentation.FirstSlideNum?.Value ?? 1 + presentation.SlideIdList.ToList().IndexOf(slideId);
Slide slide = slidePart.Slide;
//音頻
var audioList = slide.Descendants<Audio>();
//視頻
var videoList = slide.Descendants<Video>();
//圖片
var picList = slide.CommonSlideData.ShapeTree.Descendants<Picture>().Where(o => !o.NonVisualPictureProperties.ApplicationNonVisualDrawingProperties.Any());
//文本框
var txBodyList = slide.CommonSlideData.ShapeTree.Descendants<TextBody>();
//提取音視頻(將 audioList 換成 videoList )就行了
foreach (var media in videoList)
{
//音頻關聯的形狀
var spTgt = media.CommonMediaNode.TargetElement.ShapeTarget;
//形狀屬性
var cNvPr = slide.Descendants<NonVisualDrawingProperties>().FirstOrDefault(o => o.Id == spTgt.ShapeId);
//形狀信息
var ShapeId = cNvPr.Id.Value;
var ShapeName = cNvPr.Name.Value;
var ShapeDescr = cNvPr.Description?.Value;
//上級和上上級
var nvPicPr = (NonVisualPictureProperties)cNvPr.Parent;
var pic = (Picture)nvPicPr.Parent;
////音頻文件關聯
//var audioFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.AudioFromFile>().FirstOrDefault();
//視頻文件關聯
var videoFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.VideoFromFile>().FirstOrDefault();
//獲取音視頻文件 外部/內部
var externalRelationship = slidePart.ExternalRelationships.FirstOrDefault(o => o.Id == videoFile.Link);//外部關系
var uri = externalRelationship?.Uri;
if (uri == null || uri.OriginalString.ToUpper() == "NULL")
{
var media14 = nvPicPr.ApplicationNonVisualDrawingProperties.Descendants<P14.Media>().FirstOrDefault();
//媒體裁剪信息
var mediaStart = media14.MediaTrim?.Start?.Value;
var mediaEnd = media14.MediaTrim?.End?.Value;
//獲取媒體
var dataPartReferenceRelationship = slidePart.DataPartReferenceRelationships.FirstOrDefault(o => o.Id == media14.Embed);//內部關系
var mediaStream = dataPartReferenceRelationship.DataPart.GetStream();
uri = dataPartReferenceRelationship?.Uri;
}
//媒體文件關聯的圖片(視頻默認為第一幀,音頻默認喇叭的圖像)
var embed = pic.BlipFill.Blip.Embed.Value;
var part = slidePart.GetPartById(embed);
var imgStream = part.GetStream();
var imgUri = part.Uri;
}
//提取圖片
foreach (var pic in picList)
{
var cNvPr = pic.NonVisualPictureProperties.NonVisualDrawingProperties;
//形狀信息
var ShapeId = cNvPr.Id.Value;
var ShapeName = cNvPr.Name.Value;
var ShapeDescr = cNvPr.Description?.Value;
//獲取圖片
var embed = pic.BlipFill.Blip.Embed.Value;
var part = slidePart.GetPartById(embed);
var imgStream = part.GetStream();
var imgUri = part.Uri;
}
//提取文本
foreach (var txBody in txBodyList)
{
//上級
var sp = (Shape)txBody.Parent;
//形狀屬性
var cNvPr = sp.NonVisualShapeProperties.NonVisualDrawingProperties;
//形狀信息
var ShapeId = cNvPr.Id.Value;
var ShapeName = cNvPr.Name.Value;
//獲取文本信息
//方式1
var text = txBody.InnerText;
//方式2
var texts = txBody.Descendants<A.Text>();
text = string.Join(null, texts.Select(o => o.Text));
//獲取文本信息(含段落)
var ps = txBody.Descendants<A.Paragraph>();
text = string.Join(Environment.NewLine, ps.Select(o => o.InnerText));
}
}
}
}
ppt文檔的形狀結構大概為:

完畢
