package com.zhengqing.demo;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSON;
import cn.hutool.json.JSONUtil;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* @description: todo
* @author: azhou
* @create: 2024/6/27 11:56
**/
@Slf4j
public class AzhouTest {
public static void main(String[] args) throws Exception {
String path = "E:\\project\\file";
// 获取所有的需要操作的项目列表
List
projectFileList = getProjectFileList(path, false);
// 分两部进行,1:先把word转换为html
coverWordToHtml(projectFileList, true);
// 解析HTML,上传对应图片到阿里云,html章节提取变成文章内容,替换阿里云图片,将项目打包变成zip,zip上传百度网盘,获取分享链接
parseHtml(projectFileList, true);
}
public static void coverWordToHtml(String filePath, String name) {
try {
WordHandleTest.parseWord(filePath, name);
} catch (Exception e) {
e.printStackTrace();
}
}
public static boolean checkFolderExists(String directoryPath, String folderName) {
File directory = new File(directoryPath);
if (!directory.isDirectory()) {
return false;
}
File[] files = directory.listFiles();
if (files != null) {
for (File file : files) {
if (file.isDirectory() && file.getName().equals(folderName)) {
return true;
}
}
}
return false;
}
public static boolean checkTitle(String title) {
return title.equals("论文.doc") || title.equals("论文.docx");
}
public static void parseHtml(List fileEntities, Boolean printExcel) throws IOException {
log.info("html解析开始,待解析项目总数:{}", fileEntities.size());
int i = 178;
List errorList = new ArrayList<>();
for (FileEntity fileEntity : fileEntities) {
String wordPath = fileEntity.getWordPath();
log.info("当前解析地址:{}", wordPath);
if (StrUtil.isEmpty(wordPath)) {
errorList.add(fileEntity);
continue;
}
String htmlFilePath = wordPath.replaceAll(fileEntity.getArticleName(), "") + "\\html";
String projectName = fileEntity.getProjectName().replaceAll("^springboot\\d+", "");
boolean b = htmlToTxt(htmlFilePath, projectName, i);
if (!b) {
errorList.add(fileEntity);
} else {
i++;
}
}
if (printExcel) {
String outputPath = "errorHtml.xlsx";
exportToExcel(errorList, outputPath);
}
}
public static void deleteTxt(String folderPath) {
File folder = new File(folderPath);
if (!folder.exists()) return;
// 获取文件夹中的所有文件
File[] files = folder.listFiles();
// 遍历文件数组
for (File file : files) {
// 检查文件是否为txt文件
if (file.isFile() && file.getName().endsWith(".txt")) {
// 删除txt文件
boolean isDeleted = file.delete();
}
}
}
public static boolean htmlToTxt(String htmlFilePath, String projectName, Integer projectId) throws IOException {
// 删除之前的
deleteTxt(htmlFilePath);
String htmlPath = htmlFilePath + "\\b.html";
File file = new File(htmlPath);
if (!file.exists()) {
return false;
}
// 上传图片
File imgFile = new File(htmlFilePath);
List