C# Npoi如何读取单元格图片并获取所在单元格位置
目录
- C# Npoi读取单元格图片并获取所在单元格位置
- 主要的代码逻辑
- 以下是封装完成的代码
- 总结
C# Npoi读取单元格图片并获取所在单元格位置
C#在excel中读取图片 获取图片的单元格位置信息(仅限于xlsx)
主要的代码逻辑
- 将excel使用zip直接打开
- 压缩包内图片信息主要存放在两个文件夹中(xl/media,xl/drawings)前者是图片本身,后者是图片的主要信息
- 在drawings文件夹下会存在若干以drawing开头的.XML文件,一个sheet页对应一个drawing.xml后缀按照数字序号递增(例如:第一页为drawing1.xml,第二页为drawing2.xml以此类推)
- 打开drawing.xml后,读取文档节点,节点含义对照表在末尾
- 在pic标签中获取到图片的rid后通过该id在同级目录下的_rels文件夹下找到对应的drawing.xml.rels中读取图片的存放地址(例如drawing1.xml对应drawing1.xml.rels)
twoCellAnchor | 图片信息的节点 节点下包含了图片的起始单元格位置 图片的id即[r:embed]的内容 存在多少个[twoCellAnchor]即存在多少个图片 |
twoCellAnchor/from | 图片左上角开始位置单元格信息 |
twoCellAnchor/to | 图片右下角结束位置单元格信息 (当前需求需要尽量保证一张图片存在一个单元格中,不允许跨单元格 因为不会对其做其他的处理) |
twoCellAnchor/pic | 图片的文件信息 主要读取 [blipFill->blip->r:embed]此信息为图片的id信息可以读取到文件在media文件夹中存放的位置 |
twoCellAnchor/xfrm | 待确定该信息,主要猜想为图片的大小? |
twoCellAnchor/prstGeom | 图片的插入方式 |
以下是封装完成的代码
public class ExcelImgHelper { #region 常量 /// <summary> /// 文件id与文件路径文件夹路径 /// </summary> private const string DrawingRels = "xl/drawings/_rels/drawing_id_.xml.rels"; /// <summary> /// 图片信息文件 /// </summary> private const string Drawing = "xl/drawings/drawing_id_.xml"; #region 图片信息主要标签 private const string twoCellAnchor = "twoCellAnchor"; private const string embed = "embed"; private const string link = "link"; private const string prst = "prst"; #endregion 图片信息主要标签 #endregion 常量 #region 路径信息 /// <summary> /// excel文件地址 /// </summary> private string ExcelPath { get; } /// <summary> /// 解压的文件夹 /// </summary> private string ExcelZipPath { get; } /// <summary> /// 压缩包 *注意 与上方需要区分开当前路劲是压缩文件包.zip /// </summary> private string ExcelZipFilePath { get; } #endregion 路径信息 private List<ExcelImgInfo> ExcelImgInfos = new List<ExcelImgInfo>(); public ExcelImgHelper(string filePath) { if (string.IsNullOrEmpty(filePath)) { throw new ArgumentNullException(nameof(filePath)); } //解压后文件夹存放的位置 与源文件同目录 var dir = Path.GetDirectoryName(filePath); //获取文件名 var fileName = Path.GetFileNameWithoutExtension(filePath); //压缩包路径 var zipFilePath = dir + "\\" + fileName + ".zip"; //复制为压缩包 File.Copy(filePath, zipFilePath); //解压文件 if (UnZipFile(filePath, out string UnZipFilePath)) { ExcelPath = filePath; ExcelZipPath = UnZipFilePath; ExcelZipFilePath = zipFilePath; ExcelImgInfos = Analysis(); } else { throw new Exception("解压失败"); } } /// <summary> /// 解析excel中的图片 /// </summary> /// <returns></returns> private List<ExcelImgInfo> Analysis() { List<ExcelImgPathAndId> imgs = new List<ExcelImgPathAndId>(); List<ExcelImgInfo> excelImgInfos = new List<ExcelImgInfo>(); //读取所有图片以及位置信息 FindPicPathByID(ref imgs); //默认命名空间 XNamespace xdr_namespace = "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"; XNamespace a_namespace = "http://schemas.openxmlformats.org/drawingml/2006/main"; XNamespace r_namespace = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"; //加载图片信息文档xml(替换的文件名与页数对应 此处可以优化为传入指定id) XDocument xdoc = XDocument.Load(ExcelZipPath + Drawing.Replace("_id_", "1")); //加载文档中的默认命名空间 var root = xdoc.Root; foreach (var item in root.Attributes()) { if (item.Name.LocalName == "xdr") { xdr_namespace = item.Value; } else if (item.Name.LocalName == "a") { a_namespace = item.Value; } } //读取twoCellAnchor标签中的内容 **核心部分** foreach (var node in xdoc.Descendants(xdr_namespace + twoCellAnchor)) { //twoCellAnchor标签中子标签内容顺序永远为:from->to->pic //所以此处顺序读取即可 var NodeFrom = (XElement)node.FirstNode; var NodeTo = (XElement)NodeFrom.NextNode; var NodePic = (XElement)NodeTo.NextNode; //找到blipFill节点,并找到r节点的命名空间 var blipFill = (XElement)(((XElement)NodePic.FirstNode.NextNode).FirstNode); r_namespace = blipFill.FirstAttribute.IsNamespaceDeclaration ? blipFill.FirstAttribute.Value : r_namespace; //找到spPr节点 var spPr = (XElement)NodePic.FirstNode.NextNode.NextNode; //获取图片Id var ImgId = (blipFill.Attribute(r_namespace + embed) != null ? blipFill.Attribute(r_namespace + embed) : blipFill.Attribute(r_namespace + link)).Value.ToString(); //获取from var From = new Position() { Col = int.Parse(((XElement)NodeFrom.FirstNode).Value), ColOff = int.Parse(((XElement)NodeFrom.FirstNode.NextNode).Value), Row = int.Parse(((XElement)NodeFrom.FirstNode.NextNode.NextNode).Value), RowOff = int.Parse(((XElement)NodeFrom.FirstNode.NextNode.NextNode.NextNode).Value) }; //获取to var To = new Position() { Col = int.Parse(((XElement)NodeTo.FirstNode).Value), ColOff = int.Parse(((XElement)NodeTo.FirstNode.NextNode).Value), Row = int.Parse(((XElement)NodeTo.FirstNode.NextNode.NextNode).Value), RowOff = int.Parse(((XElement)NodeTo.FirstNode.NextNode.NextNode.NextNode).Value) }; //获取图片插入方式 var PrstGeom = ((XElement)spPr.FirstNode.NextNode).Attribute(prst).Value.ToString(); // var xfrm = ((XElement)spPhttp://www.devze.comr.FirstNode); var xfrm_off = ((XElement)xfrm.FirstNode); var xfrm_ext = ((XElement)xfrm.FirstNode.NextNode); List<int> xfrm_offData = new List<int> { int.Parse(xfrm_off.Attribute("x").Value.ToString()), int.Parse(xfrm_off.Attribute("y").Value.ToString()) }; List<int> xfrm_extData = new List<int> { int.Parse(xfrm_ext.Attribute("cx").Value.ToString()), int.Parse(xfrm_ext.Attribute("cy").Value.ToString()) }; //获取图片实际位置 var PathOfPicture = imgs.FirstOrDefault(e => e.Id == ImgId)?.Path; //此处图片为相对位置需要处理成为绝对路径 PathOfPicture = PathOfPicture.Replace("../", ExcelZipPath + "xl\\").Replace("/", "\\"); //至此 所有需要使用的节点全部取出 开始组装数据 ExcelImgInfo excelImgInfo = new ExcelImgInfo( imgId: ImgId, from: From, to: To, prstGeom: PrstGeom, xfrm_off: xfrm_offData, xfrm_ext: xfrm_extData, pathOfPicture: PathOfPicture); excelImgInfos.Add(excelImgInfo); } //Dispose(); return excelImgInfos; } /// <summary> /// 解压文件 /// </summary> /// <param name="zipFilePath">压缩文件路径</param> /// <param name="path">返回压缩文件夹路径</param> /// <param name="unZipDir">解压文件存放路径,为空时默认与压缩文件同一级目录下,跟压缩文件同名的文件夹</param> /// <returns></returns> private bool UnZipFile(string zipFilePath, out string path, string unZipDir = null) { if (zipFilePath == string.Empty) { path = null; return false; } if (!System.IO.File.Exists(zipFilePath)) { path = null; return false; } //解压文件夹为空时默认与压缩文件同一级目录下,跟压缩文件同名的文件夹 if (string.IsNullOrWhiteSpace(unZipDir)) unZipDir = zipFilePath.Replace(Path.GetFileName(zipFilePath), Path.GetFileNameWithoutExtension(zipFilePath)); if (!unZipDir.EndsWith("\\")) unZipDir += "\\"; if (!Directory.Exists(unZipDir)) Directory.CreateDirectory(unZipDir); try { using (ZipInputStream s = new ZipInputStream(System.IO.File.OpenRead(zipFilePath))) { ZipEntry theEntry; while ((theEntry = s.GetNextEntry()) != null) { string directoryName = Path.GetDirectoryName(theEntry.Name); string fileName = Path.GetFileName(theEntry.Name); if (directoryName.Length > 0) { Directory.CreateDirectory(unZipDir + directoryName); } if (!directoryName.EndsWith("\\")) directoryName += "\\"; if (fileName != String.Empty) { using (FileStream streamWriter = System.IO.File.Create(unZipDir + theEntry.Name)) { int size = 2048; byte[] data = new byte[2048]; while (true) { size = s.Read(data, 0, data.Length); if (size > 0) { streamWriter.Write(data, 0, size); 编程客栈 } else { break; } } } } } } } catch { path = null; return false; } path = unZipDir; return true; } /// <summary> /// 获取全部文件id与路径 /// </summary> /// <param name="imgs"></param> /// <param name="_id"></param> private void FindPicPathByID(ref List<ExcelImgPathAndId> imgs, int _id = 1) { string _file = Path.Combine(ExcelZipPath + DrawingRels.Replace("_id_", _id.ToString())); if (!File.Exists(_file)) { throw new DirectoryNotFoundException(_file); } XDocument xDoc = XDocument.Load(_file); var root = xDoc.Root; foreach (XElement node in root.Nodes()) { var attrs = node.Attributes(); string Id = ""; string Target = ""; foreach (var attr in attrs) { if (attr.Name == "Id") Id = attr.Value.ToString(); else if (attr.Name == "Target") python Target = attr.Value.ToString(); } imgs.Add(new ExcelImgPathAndId() { Id = Id, Path = Target }); } } /// <summary> /// 获取excel图片以及位置信息 /// </summary> /// <returns></returns> public List<ExcelImgInfo> GetAllImgs() { return ExcelImgInfos; } /// <summary> /// 删除解压的文件 /// </summary> public void Dispose() { File.Delete(ExcelZipFilePath); DirectoryInfo di = new DirectoryInfo(ExcelZipPath); di.Delete(true); } }
需要用到的辅助类
/// <summary> /// 提取出来的图片信息类 /// </summary> public class ExcelImgInfo { public ExcelImgInfo() { } public ExcelImgInfo(string imgId, Position from, Position to, string prstGeom, List<int> xfrm_off, List<int> xfrm_ext, string pathOfPicture) { try 编程客栈 { ImgId = imgId; From = from; To = to; PrstGeom = prstGeom; this.xfrm_off = xfrm_off; this.xfrm_ext = xfrm_ext; PathOfPicture = pathOfPicture; if (File.Exists(PathOfPicture)) { //将图片读取到内存中并且不锁定文件 FileStream fileStream = new FileStream(PathOfPicture, FileMode.Open, FileAccess.Read); int byteLength = (int)fileStream.Length; byte[] fileBytes = new byte[byteLength]; fileStream.Read(fileBytes, 0, byteLength); fileStream.Close(); using (MemoryStream ms = new MemoryStream(fileBytes)) { ms.Write(fileBytes, 0, fileBytes.Length); ExcelImage = Image.FromStream(ms, true); } imgByteArray = fileBytes; } else { throw new FileNotFoundException("图片位置错误"); } } catch (Exception e) { throw new Exception($" 图片对象初始化时错误:[{imgId}]\n{e.Message}\n{e.StackTrace}"); } } /// <summary> /// 图片Id /// </summary> public string ImgId { get; protected set; } /// <summary> /// 开始的单元格 /// </summary> public Position From { get; protected set; } /// <summary> /// 结束的单元格 /// </summary> public Position To { get; protected set; } /// <summary> /// 图片插入方式 /// </summary> public string PrstGeom { get; protected set; } /// <summary> /// [0]:x /// [1]:y /// </summary> public List<int> xfrm_off { get; protected set; } = new List<int>(); 编程客栈/// <summary> /// [0]:cx /// [1]:cy /// </summary> public List<int> xfrm_ext { get; protected set; } = new List<int>(); /// <summary> /// 图片地址 /// </summary> public string PathOfPicture { get; protected set; } /// <summary> /// 图片数据 /// </summary> public Image ExcelImage { get; protected set; } /// <summary> /// 图片数组 /// </summary> public byte[] imgByteArray { get; protected set; } public void Dispose() { //ExcelImage.Dispose(); } }
/// <summary> /// 文件Id与路径 /// </summary> public class ExcelImgPathAndId { public string Id { get; set; } public string Path { get; set; } }
/// <summary> /// 位置信息 /// </summary> public class Position { public int Col { get; set; } public int ColOff { get; set; } public int Row { get; set; } public int RowOff { get; set; } }
总结
以上为个人经验,希望能给大家一个参考,也希望大家多多支持编程客栈(www.devze.com)。
精彩评论