- 修复<>分隔符周围的空行问题 - 添加UTF-8 BOM标记与示例文件保持一致 - 解决特殊字符和编码显示问题 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
268 lines
9.8 KiB
C#
268 lines
9.8 KiB
C#
using System;
|
||
using System.Collections.Generic;
|
||
using System.IO;
|
||
using System.Linq;
|
||
using System.Text;
|
||
|
||
namespace SlideCombine
|
||
{
|
||
public class ProcessResult
|
||
{
|
||
public string BaseFileName { get; set; }
|
||
public List<string> SourceFiles { get; set; }
|
||
public string OutputContent { get; set; }
|
||
public bool Success { get; set; }
|
||
public string ErrorMessage { get; set; }
|
||
public List<DocumentMetadata> MetadataDocuments { get; set; }
|
||
}
|
||
|
||
public class FileMerger
|
||
{
|
||
public static List<ProcessResult> ProcessAllFolders(string pdfRootPath, string txtOutputPath)
|
||
{
|
||
var results = new List<ProcessResult>();
|
||
|
||
try
|
||
{
|
||
// 获取所有包含FreePic2Pdf_bkmk文件的文件夹(支持无扩展名和.txt扩展名)
|
||
var bkmkFiles = new List<string>();
|
||
bkmkFiles.AddRange(Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk", SearchOption.AllDirectories));
|
||
bkmkFiles.AddRange(Directory.GetFiles(pdfRootPath, "FreePic2Pdf_bkmk.txt", SearchOption.AllDirectories));
|
||
|
||
if (bkmkFiles.Count == 0)
|
||
{
|
||
throw new Exception($"在路径 {pdfRootPath} 下未找到任何 FreePic2Pdf_bkmk 或 FreePic2Pdf_bkmk.txt 文件");
|
||
}
|
||
|
||
// 按基础文件名分组(取文件夹名称的空格前缀)
|
||
var fileGroups = new Dictionary<string, List<string>>();
|
||
|
||
foreach (var bkmkFile in bkmkFiles)
|
||
{
|
||
var folderName = Path.GetDirectoryName(bkmkFile);
|
||
var folderNameOnly = new DirectoryInfo(folderName).Name;
|
||
|
||
// 获取空格前的基础名称
|
||
var baseName = GetBaseFileName(folderNameOnly);
|
||
|
||
if (!fileGroups.ContainsKey(baseName))
|
||
{
|
||
fileGroups[baseName] = new List<string>();
|
||
}
|
||
|
||
fileGroups[baseName].Add(bkmkFile);
|
||
}
|
||
|
||
// 处理每个分组
|
||
foreach (var group in fileGroups)
|
||
{
|
||
var result = ProcessFileGroup(group.Key, group.Value.OrderBy(f => f).ToList());
|
||
results.Add(result);
|
||
}
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
var errorResult = new ProcessResult
|
||
{
|
||
Success = false,
|
||
ErrorMessage = ex.Message
|
||
};
|
||
results.Add(errorResult);
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
private static string GetBaseFileName(string folderName)
|
||
{
|
||
// 获取空格前的部分作为基础名称
|
||
var spaceIndex = folderName.IndexOf(' ');
|
||
return spaceIndex > 0 ? folderName.Substring(0, spaceIndex) : folderName;
|
||
}
|
||
|
||
private static ProcessResult ProcessFileGroup(string baseName, List<string> bkmkFiles)
|
||
{
|
||
var result = new ProcessResult
|
||
{
|
||
BaseFileName = baseName,
|
||
SourceFiles = bkmkFiles,
|
||
Success = true,
|
||
MetadataDocuments = new List<DocumentMetadata>()
|
||
};
|
||
|
||
try
|
||
{
|
||
foreach (var bkmkFile in bkmkFiles)
|
||
{
|
||
// 获取对应的TXT文件路径
|
||
var txtFile = GetCorrespondingTxtFile(bkmkFile);
|
||
|
||
// 创建元数据文档
|
||
var metadata = CreateMetadataFromFiles(txtFile, bkmkFile);
|
||
|
||
if (metadata != null)
|
||
{
|
||
result.MetadataDocuments.Add(metadata);
|
||
}
|
||
}
|
||
|
||
// 合并所有元数据文档
|
||
var combinedContent = ContentFormatter.CombineFormattedMetadataDocuments(result.MetadataDocuments);
|
||
result.OutputContent = combinedContent;
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
result.Success = false;
|
||
result.ErrorMessage = $"处理文件组 {baseName} 时出错: {ex.Message}";
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
private static string GetCorrespondingTxtFile(string bkmkFile)
|
||
{
|
||
var directory = Path.GetDirectoryName(bkmkFile);
|
||
var folderName = new DirectoryInfo(directory).Name;
|
||
|
||
// 在TXT文件夹中查找对应的文件
|
||
var txtDirectory = Path.Combine(Directory.GetParent(directory).Parent.FullName, "TXT");
|
||
var txtFile = Path.Combine(txtDirectory, $"{folderName}.txt");
|
||
|
||
return File.Exists(txtFile) ? txtFile : null;
|
||
}
|
||
|
||
private static DocumentMetadata CreateMetadataFromFiles(string txtFile, string bkmkFile)
|
||
{
|
||
var metadata = new DocumentMetadata();
|
||
|
||
if (File.Exists(txtFile))
|
||
{
|
||
// 从TXT文件读取元数据
|
||
ReadMetadataFromTxt(txtFile, metadata);
|
||
}
|
||
|
||
// 从bkmk文件提取书签
|
||
if (File.Exists(bkmkFile))
|
||
{
|
||
metadata.TableOfContents = BookmarkExtractor.ExtractBookmarksFromBkmk(bkmkFile);
|
||
}
|
||
|
||
return metadata;
|
||
}
|
||
|
||
private static void ReadMetadataFromTxt(string txtFile, DocumentMetadata metadata)
|
||
{
|
||
try
|
||
{
|
||
string[] lines;
|
||
try
|
||
{
|
||
// 先尝试GBK编码,因为示例文件是中文的
|
||
lines = File.ReadAllLines(txtFile, Encoding.GetEncoding("GBK"));
|
||
}
|
||
catch
|
||
{
|
||
// 如果GBK失败,再尝试UTF-8
|
||
lines = File.ReadAllLines(txtFile, Encoding.UTF8);
|
||
}
|
||
|
||
foreach (var line in lines)
|
||
{
|
||
var parts = line.Split(new[] { ':' }, 2);
|
||
if (parts.Length == 2)
|
||
{
|
||
var key = parts[0].Trim();
|
||
var value = parts[1].Trim();
|
||
|
||
switch (key)
|
||
{
|
||
case "title":
|
||
metadata.Title = value;
|
||
break;
|
||
case "Other titles":
|
||
metadata.OtherTitles = value;
|
||
break;
|
||
case "Volume":
|
||
metadata.Volume = value;
|
||
break;
|
||
case "ISBN":
|
||
metadata.ISBN = value;
|
||
break;
|
||
case "creator":
|
||
metadata.Creator = value;
|
||
break;
|
||
case "contributor":
|
||
metadata.Contributor = value;
|
||
break;
|
||
case "issuedDate":
|
||
metadata.IssuedDate = value;
|
||
break;
|
||
case "publisher":
|
||
metadata.Publisher = value;
|
||
break;
|
||
case "place":
|
||
metadata.Place = value;
|
||
break;
|
||
case "Classification number":
|
||
metadata.ClassificationNumber = value;
|
||
break;
|
||
case "page":
|
||
metadata.Page = value;
|
||
break;
|
||
case "subject":
|
||
metadata.Subject = value;
|
||
break;
|
||
case "date":
|
||
metadata.Date = value;
|
||
break;
|
||
case "spatial":
|
||
metadata.Spatial = value;
|
||
break;
|
||
case "Other ISBN":
|
||
metadata.OtherISBN = value;
|
||
break;
|
||
case "Other time":
|
||
metadata.OtherTime = value;
|
||
break;
|
||
case "url":
|
||
metadata.Url = value;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
throw new Exception($"读取TXT文件 {txtFile} 失败: {ex.Message}");
|
||
}
|
||
}
|
||
|
||
public static void SaveResults(List<ProcessResult> results, string outputPath)
|
||
{
|
||
if (!Directory.Exists(outputPath))
|
||
{
|
||
Directory.CreateDirectory(outputPath);
|
||
}
|
||
|
||
foreach (var result in results)
|
||
{
|
||
if (result.Success && !string.IsNullOrEmpty(result.OutputContent))
|
||
{
|
||
var outputFileName = $"{result.BaseFileName}.txt";
|
||
var outputFilePath = Path.Combine(outputPath, outputFileName);
|
||
|
||
// 添加BOM标记并使用ANSI编码保存
|
||
var bom = Encoding.UTF8.GetPreamble();
|
||
var content = Encoding.Default.GetBytes(result.OutputContent);
|
||
|
||
using (var fs = new FileStream(outputFilePath, FileMode.Create))
|
||
{
|
||
// 写入UTF-8的BOM(与示例文件一致)
|
||
fs.Write(bom, 0, bom.Length);
|
||
fs.Write(content, 0, content.Length);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
} |