From e4451cbe7eea81c397353e8d5649e52dcbd3b7d1 Mon Sep 17 00:00:00 2001 From: guyue <1721849008@qq.com> Date: 星期五, 05 九月 2025 13:14:00 +0800 Subject: [PATCH] 平台筛选 --- src/main/java/com/linghu/controller/CollectController.java | 1344 --------------------------------------------------------- 1 files changed, 16 insertions(+), 1,328 deletions(-) diff --git a/src/main/java/com/linghu/controller/CollectController.java b/src/main/java/com/linghu/controller/CollectController.java index 1cafe88..5ccdb76 100644 --- a/src/main/java/com/linghu/controller/CollectController.java +++ b/src/main/java/com/linghu/controller/CollectController.java @@ -1,45 +1,23 @@ package com.linghu.controller; -import java.time.Duration; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.concurrent.*; -import java.util.concurrent.locks.ReentrantLock; -import java.util.stream.Collectors; + import javax.servlet.http.HttpServletRequest; +import javax.validation.Valid; import com.fasterxml.jackson.core.JsonProcessingException; import com.linghu.model.dto.*; -import com.linghu.model.entity.*; + import com.linghu.service.*; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.core.ParameterizedTypeReference; -import org.springframework.dao.DuplicateKeyException; -import org.springframework.http.*; -import org.springframework.transaction.annotation.Transactional; -import org.springframework.web.reactive.function.client.ExchangeStrategies; -import org.springframework.web.reactive.function.client.WebClient; - -import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; -import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; import com.linghu.model.common.ResponseResult; -import com.linghu.utils.JwtUtils; - -import io.jsonwebtoken.lang.Collections; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; -import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; - import org.springframework.web.bind.annotation.* ; -import org.springframework.http.HttpStatus; -import com.linghu.model.dto.TaskResultResponse.QuestionResult; -import com.linghu.model.dto.TaskResultResponse.UserResult; -import reactor.core.scheduler.Schedulers; + + @RestController @RequestMapping("/collect") @@ -47,1334 +25,49 @@ @Slf4j public class CollectController { - @Autowired - private ReferenceService referenceService; - - @Value("${linghu.url}") - private String baseUrl; @Autowired - private WebClient webClient; + private CollectionService collectionService; - @Autowired - private JwtUtils jwtUtils; - @Autowired - private KeywordService keywordService; - @Autowired - private QuestionService questionService; - @Autowired - private KeywordTaskService keywordTaskService; - @Autowired - private PlatformService platformService; - @Autowired - private TypeService typeService; - @Autowired - private UserService userService; - @Autowired - private OrderService orderService; - @Autowired - private QuestionResultService questionResultService; - // 替换为线程安全队列 - private static final Queue<SearchTaskRequest> taskQueue = new ConcurrentLinkedQueue<>(); - // 全局映射:关键词ID -> 批次队列 - private static final ConcurrentMap<Integer, Queue<List<UserDto>>> batchQueues = new ConcurrentHashMap<>(); - -// private static boolean isProcessing = false; - private static volatile boolean isProcessing = false; // 添加 volatile @PostMapping("/search") @ApiOperation(value = "开始采集") public Mono<ResponseResult<?>> createSearchTask( - @RequestBody SearchTaskRequest searchTaskRequest, + @Valid @RequestBody SearchTaskRequest searchTaskRequest, HttpServletRequest request) throws JsonProcessingException { - // 首先检查服务器资源 - return getServerResource() - .flatMap(resourceResponse -> { - double cpuUsage = parseUsage(resourceResponse.getCpu_usage_percent()); - double memoryUsage = parseUsage(resourceResponse.getMemory_usage_percent()); - - if (cpuUsage >= 90.0 || memoryUsage >= 90.0) { - - String errorMsg = String.format("服务器资源不足,请稍后再试"); - - log.warn(errorMsg); - return Mono.just(ResponseResult.error(503, errorMsg)); - } - - // 将新的任务请求加入队列 - taskQueue.add(searchTaskRequest); - - // 如果当前没有任务在处理中,则启动任务队列的处理 - if (!isProcessing) { - processNextTaskInQueue(); - } - - // 返回响应,通知用户任务已开始 - return Mono.just(ResponseResult.success("任务已加入队列,正在处理...")); - }) - .onErrorResume(e -> { - log.error("检查服务器资源失败: {}", e.getMessage(), e); - return Mono.just(ResponseResult.error("检查服务器资源失败: " + e.getMessage())); - }); + return collectionService.getResponseResultMono(searchTaskRequest); } - private void processNextTaskInQueue() { - // 设置为正在处理 - isProcessing = true; - - // 从队列中取出下一个任务 - SearchTaskRequest nextTaskRequest = taskQueue.poll(); - - if (nextTaskRequest != null) { - Integer keywordId = nextTaskRequest.getKeyword_id(); - log.info("开始处理任务队列,keywordId: {}", keywordId); - - executeBatchTask(nextTaskRequest) - .doFinally(signal -> { - isProcessing = false; - if (!taskQueue.isEmpty()) { - processNextTaskInQueue(); - } - }) - .subscribe( - result -> log.info("任务处理完成,keywordId: {}", keywordId), // 成功日志 - error -> { // 关键:添加错误处理 - log.error("任务队列处理异常,keywordId: {}", keywordId, error); - } - ); - } else { - isProcessing = false; // 无任务时重置状态 - } - } - private Mono<ResponseResult<String>> executeBatchTask(SearchTaskRequest searchTaskRequest) { - Integer keywordId = searchTaskRequest.getKeyword_id(); - // - int maxConcurrentUsers = searchTaskRequest.getConfig() != null ? - searchTaskRequest.getConfig().getMax_concurrent_users() : 3; - List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers, keywordId,searchTaskRequest.getIs_first()); - - - // 创建批次队列并存入全局映射 - Queue<List<UserDto>> batchQueue = new ConcurrentLinkedQueue<>(userBatches); - batchQueues.put(keywordId, batchQueue); // 存储到全局映射 - - return Mono.just(ResponseResult.success("第一个批次已开始")) - .doOnTerminate(() -> { - executeBatchTask(batchQueue, searchTaskRequest, keywordId) - .subscribe( - result -> log.info("批次任务启动成功,keywordId: {}", keywordId), - error -> { // 处理批次执行异常 - log.error("批次任务执行异常,keywordId: {}", keywordId, error); - // 可选:异常时清理资源 - batchQueues.remove(keywordId); - } - ); - }); - } - private Mono<ResponseResult<?>> executeBatchTask(Queue<List<UserDto>> batchQueue, SearchTaskRequest searchTaskRequest, Integer keywordId) { - // 如果队列为空,说明所有批次已经完成 - - if (batchQueue == null || batchQueue.isEmpty()) { - // 清理资源 - batchQueues.remove(keywordId); - return Mono.just(ResponseResult.success("所有批次已完成")); - } - - List<UserDto> currentBatch = batchQueue.poll(); // 从队列中获取当前批次 - SearchTaskRequest batchRequest = new SearchTaskRequest(); - batchRequest.setUsers(currentBatch); - batchRequest.setQuestions(searchTaskRequest.getQuestions()); - batchRequest.setConfig(searchTaskRequest.getConfig()); - batchRequest.setSave_to_database(searchTaskRequest.getSave_to_database()); - batchRequest.setWebhook_url(searchTaskRequest.getWebhook_url()); - batchRequest.setKeyword_id(keywordId); - - return createSingleBatchTask(batchRequest) - .flatMap(taskResponse -> { - if (taskResponse != null && taskResponse.getTask_id() != null) { - // 直接等待任务完成,不再保存任务关联信息 - return waitForTaskCompletion(taskResponse.getTask_id(), batchQueue, searchTaskRequest, keywordId); - } else { - return Mono.just(ResponseResult.error("创建批次任务失败")); - } - }) - .onErrorResume(e -> { - log.error("调用第三方接口失败: {}", e.getMessage(), e); // 关键日志 - return Mono.error(new RuntimeException("调用第三方接口失败: " + e.getMessage())); - }) - .doFinally(signal -> { - // 任务完成时清理资源 - if (batchQueue.isEmpty()) { - batchQueues.remove(keywordId); - } - }); - } - - private Mono<ResponseResult<?>> waitForTaskCompletion(String taskId, Queue<List<UserDto>> batchQueue, SearchTaskRequest searchTaskRequest, Integer keywordId) { - // 查询任务状态 - return getTaskStatus(taskId) - .flatMap(statusResponse -> { - // 检查任务是否被取消 - if ("cancelled".equalsIgnoreCase(statusResponse.getStatus())) { - batchQueues.remove(keywordId); // 清理资源 - return Mono.just(ResponseResult.success("任务已被取消")); - } - // 如果任务状态是"submitted"或"running",继续轮询 - if (!"completed".equalsIgnoreCase(statusResponse.getStatus()) && !"failed".equalsIgnoreCase(statusResponse.getStatus()) && !"cancelled".equalsIgnoreCase(statusResponse.getStatus()) && !("ERROR".equalsIgnoreCase(statusResponse.getStatus()) && statusResponse.getMessage().contains("Task not found")) ) { - return Mono.delay(Duration.ofSeconds(5)) // 延迟 5 秒后再次查询 - .flatMap(aLong -> waitForTaskCompletion(taskId, batchQueue, searchTaskRequest, keywordId)); // 递归调用继续等待 - } else { - // 如果状态为其他状态,则继续处理下一个批次 - return executeBatchTask(batchQueue, searchTaskRequest, keywordId); - } - }) - .onErrorResume(e -> { - // 处理查询任务状态时的错误 - return Mono.just(ResponseResult.error("查询任务状态失败: " + e.getMessage())); - }); - } @ApiOperation(value = "查询任务状态") @GetMapping("/status") - public Mono<TaskStatusResponse> getTaskStatus(String taskId) { - return webClient.get() - .uri(baseUrl + "/api/v1/tasks/" + taskId) - .accept(MediaType.APPLICATION_JSON) - .retrieve() - .onStatus(HttpStatus::isError, response -> response.bodyToMono(TaskStatusResponse.class) - .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody.getDetail())))) - .bodyToMono(TaskStatusResponse.class) - .onErrorResume(e -> { - // 处理错误,创建一个自定义的错误响应对象 - TaskStatusResponse errorResponse = new TaskStatusResponse(); - errorResponse.setStatus("ERROR"); - errorResponse.setMessage(e.getMessage()); - errorResponse.setDetail(e.getMessage()); - return Mono.just(errorResponse); - }); - } - - // 添加一个辅助方法来安全地将字符串转换为double - private double parseUsage(String usageStr) { - try { - if (usageStr != null) { - // 移除可能存在的百分号 - usageStr = usageStr.replace("%", "").trim(); - return Double.parseDouble(usageStr); - } - return 0.0; - } catch (NumberFormatException e) { - log.error("解析资源使用率失败: {}", e.getMessage()); - return 0.0; - } - } - - private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize,Integer keywordId,Boolean isFirst) { - - Keyword keyword = keywordService.getById(keywordId); - if (isFirst){ - keyword.setNum(1); - }else { - keyword.setNum(keyword.getNum()+1); - } - keywordService.updateById(keyword); - - List<List<UserDto>> batches = new ArrayList<>(); - for (int i = 0; i < users.size(); i += batchSize) { - batches.add(users.subList(i, Math.min(i + batchSize, users.size()))); - - } - for (int i = 0; i < batches.size(); i++){ - // 创建 KeywordTask 关联,task_id 设置为 null,表示任务尚未开始 - KeywordTask keywordTask = new KeywordTask(); - keywordTask.setKeyword_id(keywordId); - keywordTask.setTask_id(null); // 任务ID为空 - - keywordTask.setNum(keyword.getNum()); - keywordTaskService.save(keywordTask); // 保存 KeywordTask - } - - - return batches; - } - - private Mono<SearchTaskResponse> createSingleBatchTask(SearchTaskRequest batchRequest) { - // 记录请求第三方的基本信息(便于排查) - String thirdPartyUrl = baseUrl + "/api/v1/search"; - Integer keywordId = batchRequest.getKeyword_id(); - log.info("开始向第三方提交任务,keywordId: {}, URL: {}, 请求参数: {}", - keywordId, thirdPartyUrl, batchRequest.toString()); // 打印请求参数(建议用工具类转JSON) - return webClient.post() - .uri(baseUrl + "/api/v1/search") - .contentType(MediaType.APPLICATION_JSON) - .bodyValue(batchRequest) - .retrieve() - .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class) - .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody)))) - // 处理第三方返回的5xx服务器错误(如第三方服务异常) - .onStatus(HttpStatus::is5xxServerError, response -> - response.bodyToMono(String.class) - .flatMap(errorBody -> { - String errorMsg = String.format("第三方接口5xx错误,keywordId: %d, URL: %s, 状态码: %d, 错误详情: %s", - keywordId, thirdPartyUrl, response.statusCode().value(), errorBody); - log.error(errorMsg); - return Mono.error(new RuntimeException(errorMsg)); - }) - ) - .bodyToMono(new ParameterizedTypeReference<SearchTaskResponse>() {}) - .flatMap(taskResponse -> { - if (taskResponse != null && taskResponse.getTask_id() != null) { - - // 使用 Reactor 的方式更新数据库 - return Mono.fromRunnable(() -> { - - //更新关键词状态 - LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>(); - updateWrapper.eq(Keyword::getKeyword_id, batchRequest.getKeyword_id()); - updateWrapper.set(Keyword::getStatus, "submitted"); - updateWrapper.set(Keyword::getTask_id, taskResponse.getTask_id()); - keywordService.update(updateWrapper); - //设置轮数 - Keyword keyword = keywordService.getById(batchRequest.getKeyword_id()); - // 更新关键词任务与任务ID的关联 - // 获取与关键词相关的任务,task_id 为 null,确保只取一个任务 - List<KeywordTask> keywordTasks = keywordTaskService.list(new LambdaQueryWrapper<KeywordTask>() - .eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()) - .eq(KeywordTask::getNum, keyword.getNum()) - .isNull(KeywordTask::getTask_id)); - if (keywordTasks.size() > 0) { - KeywordTask keywordTask = keywordTasks.get(0); - keywordTask.setTask_id(taskResponse.getTask_id()); - keywordTask.setStatus("pending"); - keywordTaskService.updateById(keywordTask); - } - //将提问词列表的状态转为pending - for (String questionName : batchRequest.getQuestions()) { - questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keyword.getKeyword_id()).eq(Question::getQuestion,questionName).set(Question::getStatus, "pending")); - - } - //所有关键词都在采集中或者已完成或者错误设置订单进入采集状态 - List<Keyword> orderKeywords = keywordService.list(new LambdaQueryWrapper<Keyword>() - .eq(Keyword::getOrder_id, keyword.getOrder_id())); - if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k -> - "completed".equals(k.getStatus()) || "submitted".equals(k.getStatus()) - )) { - Orders orders = orderService.getById(keyword.getOrder_id()); - if (orders != null) { - orders.setStatus(2); - orderService.updateById(orders); - - } - } - }).subscribeOn(Schedulers.boundedElastic()) // 在弹性线程池执行 - .thenReturn(taskResponse); - } - return Mono.just(taskResponse); - }); + public Mono<TaskStatusResponse> getTaskStatus(@RequestParam(value = "taskId" )String taskId) { + return collectionService.getError(taskId); } @PostMapping("/cancel/{keywordId}") @ApiOperation(value = "取消任务") - public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable Integer keywordId) { - // 1. 从主队列移除任务 - List<SearchTaskRequest> removedMainQueueTasks = removeTasksFromQueueByKeywordId(keywordId); - int removedMainQueueCount = removedMainQueueTasks.size(); // 获取移除的任务数量 - - // 2. 从批次队列移除任务 (新增逻辑) - int removedBatchQueue = removeBatchTasksByKeywordId(keywordId); - - // 3. 查询所有与关键词相关的任务 - List<KeywordTask> tasks = keywordTaskService.list( - new LambdaQueryWrapper<KeywordTask>().eq(KeywordTask::getKeyword_id, keywordId) - ); - - // 4. 筛选出需要远程取消的任务 - List<KeywordTask> tasksToCancelRemotely = tasks.stream() - .filter(task -> task.getTask_id() != null && "pending".equalsIgnoreCase(task.getStatus())) - .collect(Collectors.toList()); - - return Flux.fromIterable(tasksToCancelRemotely) - .flatMap(task -> { - // 创建状态更新和远程取消的组合操作 - Mono<Void> updateStatus = updateTaskStatus(task.getTask_id(), "cancelled"); - Mono<ResponseResult<?>> cancelOp = cancelRemoteTask(task.getTask_id()) - .onErrorResume(e -> { - log.error("取消任务 {} 失败: {}", task.getTask_id(), e.getMessage()); - return Mono.just(ResponseResult.error("取消任务失败: " + e.getMessage())); - }); - - return Mono.zip(cancelOp, updateStatus) - .thenReturn(true); - }, 10) - .collectList() - .flatMap(canceledTasks -> { - return updateKeywordAndOrderStatus(keywordId) - .thenReturn(ResponseResult.success( - new TaskCancelResponse( - String.format("任务已取消: 主队列移除%d, 批次队列移除%d, 远程取消%d", - removedMainQueueCount , - removedBatchQueue, - tasksToCancelRemotely.size()) - ) - )); - }); + public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable Integer keywordId) { + return collectionService.getResponseResult(keywordId); } - // 新增方法:移除批次队列 - private int removeBatchTasksByKeywordId(Integer keywordId) { - Queue<List<UserDto>> batchQueue = batchQueues.remove(keywordId); - if (batchQueue != null) { - int count = batchQueue.size(); - batchQueue.clear(); - log.info("从批次队列中移除关键词 {} 的 {} 个批次任务", keywordId, count); - return count; - } - return 0; - } - // 辅助方法:获取待取消任务 - private List<KeywordTask> getTasksToCancel(Integer keywordId) { - return keywordTaskService.list( - new LambdaQueryWrapper<KeywordTask>() - .eq(KeywordTask::getKeyword_id, keywordId) - .isNotNull(KeywordTask::getTask_id) - .eq(KeywordTask::getStatus, "pending") - ); - } - - // 提取关键词和订单状态更新的逻辑为单独方法 - private Mono<Void> updateKeywordAndOrderStatus(Integer keywordId) { - return Mono.fromRunnable(() -> { - try { - // 查询关键词 - Keyword keyword = keywordService.getById(keywordId); - if (keyword == null) { - log.warn("未找到关键词,keywordId: {}", keywordId); - return; - } - - //把任务id为空的删除 - LambdaUpdateWrapper<KeywordTask> updateWrapper = new LambdaUpdateWrapper<>(); - updateWrapper.eq(KeywordTask::getKeyword_id, keywordId); - updateWrapper.isNull(KeywordTask::getTask_id); - keywordTaskService.remove(updateWrapper); - // 查询该关键词下的所有任务 - LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>(); - keywordTaskWrapper.eq(KeywordTask::getKeyword_id, keywordId); - - List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper); - - // 更新关键词状态 - keyword.setStatus("completed"); - keywordService.updateById(keyword); - - //更新提问词状态为取消 - questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keywordId).isNull(Question::getResponse).set(Question::getStatus, "cancelled")); - - // 更新订单状态 - String orderId = keyword.getOrder_id(); - if (orderId != null && !orderId.isEmpty()) { - // 查询订单下所有关键词 - LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>(); - orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId); - List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper); - - // 所有关键词均已完成,则更新订单状态为3 - if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k -> - "completed".equals(k.getStatus()) || "false".equals(k.getStatus()) - )) { - Orders orders = orderService.getById(orderId); - if (orders != null) { - orders.setStatus(3); - orderService.updateById(orders); - log.info("订单 {} 所有关键词已完成,更新状态为3", orderId); - } - } - if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k -> - !"completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "cancelled".equals(k.getStatus()) - )) { - Orders orders = orderService.getById(orderId); - if (orders != null) { - orders.setStatus(1); - orderService.updateById(orders); - log.info("订单 {} 所有关键词已完成或者取消,更新状态为1", orderId); - } - } - } - } catch (Exception e) { - log.error("更新关键词和订单状态失败: {}", e.getMessage(), e); - } - }); - } -private List<SearchTaskRequest> removeTasksFromQueueByKeywordId(Integer keywordId) { - List<SearchTaskRequest> removedTasks = new ArrayList<>(); - - Iterator<SearchTaskRequest> iterator = taskQueue.iterator(); - while (iterator.hasNext()) { - SearchTaskRequest task = iterator.next(); - if (task.getKeyword_id() != null && task.getKeyword_id().equals(keywordId)) { - removedTasks.add(task); - iterator.remove(); - } - } - - - log.info("从队列中移除了 {} 个与关键词ID {} 相关的任务", removedTasks.size(), keywordId); - return removedTasks; -} - -// 发送远程取消请求 - private Mono<ResponseResult<?>> cancelRemoteTask(String taskId) { - // 使用Collections.singletonMap或手动创建Map - Map<String, Object> requestBody = new HashMap<>(); - requestBody.put("status", "pending"); - - return webClient.post() - .uri(baseUrl + "/api/v1/tasks/" + taskId + "/cancel") - .contentType(MediaType.APPLICATION_JSON) - .bodyValue(requestBody) - .retrieve() - .onStatus(HttpStatus::isError, response -> response.bodyToMono(String.class) - .flatMap(errorBody -> Mono.error(new RuntimeException("取消失败: " + errorBody)))) - .bodyToMono(Void.class) - .thenReturn(ResponseResult.success("任务已取消")); - } - - // 更新单个任务状态 - private Mono<Void> updateTaskStatus(String taskId, String status) { - return Mono.fromRunnable(() -> { - LambdaUpdateWrapper<KeywordTask> updateWrapper = new LambdaUpdateWrapper<>(); - updateWrapper.eq(KeywordTask::getTask_id, taskId); - updateWrapper.set(KeywordTask::getStatus, status); - keywordTaskService.update(updateWrapper); - }).subscribeOn(Schedulers.boundedElastic()).then(); - } @ApiOperation(value = "获取任务结果") @GetMapping("/tasks/{taskId}") public Mono<TaskResultResponse> getTaskResult(@PathVariable String taskId) { - WebClient webClient2 = WebClient.builder() - .exchangeStrategies(ExchangeStrategies.builder() - .codecs(configurer -> configurer.defaultCodecs() - .maxInMemorySize(10 * 1024 * 1024)) // 10MB - .build()) - .build(); - return webClient2.get() - .uri(baseUrl + "/api/v1/tasks/" + taskId + "/result") - .accept(MediaType.APPLICATION_JSON) - .retrieve() - .onStatus(HttpStatus::is4xxClientError, response -> { - if (response.statusCode() == HttpStatus.NOT_FOUND) { - return response.bodyToMono(String.class) - .flatMap(errorBody -> Mono.error(new RuntimeException("任务不存在"))); - } else if (response.statusCode() == HttpStatus.BAD_REQUEST) { - return response.bodyToMono(String.class) - .flatMap(errorBody -> Mono.error(new RuntimeException("任务未完成,无法获取结果"))); - } - return response.createException().flatMap(Mono::error); - }) - .bodyToMono(TaskResultResponse.class) - .flatMap(responseResult -> { - TaskResultResponse result = responseResult; - - if (result != null && result.getResults() != null) { - return updateQuestionAndReference(result) - .thenReturn(responseResult); - } - return Mono.just(responseResult); - }) - .onErrorResume(e -> { - System.out.println("获取任务结果失败"); - log.error("获取任务结果失败: {}", e.getMessage(), e); - TaskResultResponse result = new TaskResultResponse(); - result.setDetail("获取任务结果失败: " + e.getMessage()); - return Mono.just(result); - }); - } - - /** - * 获取或创建平台(确保同一domain只创建一次) - * @param domain 平台域名 - * @return 已存在或新创建的Platform - */ - private Platform getOrCreatePlatform(String domain,String platformName) { - // 1. 先尝试查询已存在的平台 - - Platform platform = platformService.getPlatformByDomain(domain); - if (platform != null) { - return platform; - } - - // 2. 若不存在,尝试创建(处理并发场景) - try { - // 2.1 获取或创建“默认”类型(Type也需避免重复,建议Type表的type_name也加唯一约束) - Type defaultType = typeService.getOne(new LambdaQueryWrapper<Type>() - .eq(Type::getType_name, "默认")); - if (defaultType == null) { - defaultType = new Type(); - defaultType.setType_name("默认"); - typeService.save(defaultType); // 若Type可能重复,此处也需处理DuplicateKeyException - } - - // 2.2 构建新平台对象 - Platform newPlatform = new Platform(); - newPlatform.setDomain(domain); - if (platformName != null) { - newPlatform.setPlatform_name(platformName); - }else { - newPlatform.setPlatform_name(domain); - } - // 平台名称默认使用域名,可根据实际需求调整 - newPlatform.setType_id(defaultType.getType_id()); - newPlatform.setCreate_time(LocalDateTime.now()); // 补充创建时间 - - // 2.3 尝试保存,若因唯一约束冲突失败,则捕获异常 - platformService.save(newPlatform); - return newPlatform; // 保存成功,返回新创建的平台 - - } catch (DuplicateKeyException e) { - // 3. 若捕获到重复键异常,说明并发创建了,重新查询即可(此时数据库中已存在该平台) - log.warn("平台domain={}已存在,无需重复创建", domain, e); - return platformService.getPlatformByDomain(domain); // 重新查询,一定能获取到 - } catch (Exception e) { - // 处理其他异常(如数据库连接失败等) - log.error("创建平台失败,domain={}", domain, e); - throw new RuntimeException("创建平台失败", e); - } - } - //更新提问词和引用数据 -// private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { -// return Mono.fromRunnable(() -> { -// try { -// //查看每个账号信息的status是否正常 -// -// // 1. 根据KeywordTask更新关键词状态 -// // 查询关键词ID -// LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>(); -// keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id()); -// KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper); -// keywordTask.setStatus("completed"); -// keywordTaskService.updateById(keywordTask); -// Keyword keyword = keywordService.getById(keywordTask.getKeyword_id()); -// -// if (keyword == null) { -// System.out.println("未找到关联的关键词,task_id: " + result.getTask_id()); -// //报错 -// throw new Exception("未找到关联的关键词,task_id: " + result.getTask_id()); -// -// } -// LambdaQueryWrapper<KeywordTask> keywordTaskWrapper2 = new LambdaQueryWrapper<>(); -// keywordTaskWrapper2.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()); -// List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper2); -// -// //如果全部为completed 或者错误、取消、任务不存在 关键词也为completed ,如果关联关系没有任务id,或者状态为running ,关键词为submitted, -// if (keywordTasks.stream().allMatch(task -> "completed".equals(task.getStatus()) || "false".equals(task.getStatus()) || "cancelled".equals(task.getStatus()) ||"canceled".equals(task.getStatus()) || "nonentity".equals(task.getStatus())) ) { -// keyword.setStatus("completed"); -// keywordService.updateById(keyword); -// -// } -// -// String orderId = keyword.getOrder_id(); -// if (orderId == null || orderId.isEmpty()) { -// System.out.println("关键词[" + keyword.getKeyword_id() + "]未关联订单,跳过订单状态更新"); -// return; -// } -// -// // 2.更新订单状态为待处理 查询该订单下的所有关键词,更新订单状态(有取消) -// LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>(); -// orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId); -// List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper); -// -// if (orderKeywords.isEmpty()) { -// System.out.println("订单[" + orderId + "]下无关键词,跳过状态更新"); -// return; -// } -// boolean allValid2 = orderKeywords.stream() -// .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "cancelled".equals(k.getStatus())); -// if (allValid2) { -// Orders orders = orderService.getById(orderId); -// if (orders != null) { -// orders.setStatus(1); // 假设Orders有Integer类型的status字段 -// orderService.updateById(orders); -// System.out.println("订单[" + orderId + "]所有关键词采集完成或者取消,已更新状态为1"); -// } else { -// System.out.println("未找到订单[" + orderId + "],无法更新状态"); -// } -// } -// // 3.更新订单状态为完成 检查所有关键词的状态是否均为 completed 或 false -// boolean allValid = orderKeywords.stream() -// .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus())); -// -// // 4. 若所有关键词状态均有效,更新订单状态为3 -// if (allValid) { -// Orders orders = orderService.getById(orderId); -// if (orders != null) { -// orders.setStatus(3); // 假设Orders有Integer类型的status字段 -// orderService.updateById(orders); -// System.out.println("订单[" + orderId + "]所有关键词状态符合条件,已更新状态为3"); -// } else { -// System.out.println("未找到订单[" + orderId + "],无法更新状态"); -// } -// } -// -// -// Orders orders = orderService.getById(keyword.getOrder_id()); -// -// // 2. 批量查询所有问题 -// LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>(); -// queryWrapper.eq(Question::getKeyword_id, keyword.getKeyword_id()); -// List<Question> questions = questionService.list(queryWrapper); -// -// // 构建问题映射表,用于快速查找 -// Map<String, Question> questionMap = questions.stream() -// .collect(Collectors.toMap(Question::getQuestion, q -> q)); -// -// // 3. 收集所有需要更新的问题和引用 -// List<Question> questionsToUpdate = new ArrayList<>(); -// List<Reference> allReferences = new ArrayList<>(); -// List<Reference> resultList = new ArrayList<>(); -// -// // 遍历账号 -// for (UserResult userResult : result.getResults()) { -// //更新账号状态 -// if ( "failed".equals(userResult.getStatus())){ -// if (userResult.getError().contains("登录失败")){ -// LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>(); -// userWrapper.eq(User::getUser_email, userResult.getUser_email()); -// userWrapper.set(User::getStatus, "无法登录"); -// userService.update(userWrapper); -// //更新所有提问词的状态 -// questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keyword.getKeyword_id()) -// .set(Question::getStatus, "failed") -// .set(Question::getError, "账户登录失败")); -// -// }else if (userResult.getError().contains("信息错误")){ -// LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>(); -// userWrapper.eq(User::getUser_email, userResult.getUser_email()); -// userWrapper.set(User::getStatus, "信息错误"); -// userService.update(userWrapper); -// } -// } -// for (QuestionResult questionResult : userResult.getQuestions_results()) { -// try { -// Question question = questionMap.get(questionResult.getQuestion()); -// if (question != null) { -// -// -// //保存问题结果 -// QuestionResultList questionResultList = new QuestionResultList(); -// questionResultList.setKeyword_id(keyword.getKeyword_id()); -// questionResultList.setQuestion(questionResult.getQuestion()); -// questionResultList.setResponse(questionResult.getResponse()); -// questionResultList.setStatus(questionResult.getStatus()); -// questionResultList.setExtracted_count(questionResult.getExtracted_count()); -// questionResultList.setKeyword_task_id(result.getTask_id()); -// questionResultList.setError(questionResult.getError()); -// questionResultList.setNum(keyword.getNum()); -// if (questionResult.getTimestamp() != null) { -// DateTimeFormatter formatter = DateTimeFormatter -// .ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"); -// questionResultList.setTimestamp( -// LocalDateTime.parse(questionResult.getTimestamp(), formatter)); -// } -// // 保存问题结果列表(新增保存逻辑) -// questionResultService.save(questionResultList); -// // 查询当前轮次下该提问词的所有结果 -// List<QuestionResultList> allResults = questionResultService.list( -// new LambdaQueryWrapper<QuestionResultList>() -// .eq(QuestionResultList::getKeyword_id, keyword.getKeyword_id()) -// .eq(QuestionResultList::getQuestion, question.getQuestion()) -// .eq(QuestionResultList::getNum, keyword.getNum()) -// ); -// -// // 判断最终状态 -// String finalStatus = determineFinalStatus(allResults); -// if ("success".equals(finalStatus)){ -// question.setStatus("success"); -// question.setError(""); -// }else if ("no_results".equals(finalStatus)){ -// question.setStatus("success"); -// question.setError("采集结果无引用数据"); -// }else if ("busyness".equals(finalStatus)){ -// question.setStatus("failed"); -// question.setError("DeepSeek繁忙,请稍后尝试"); -// } -// -// // 更新问题对象 -// question.setResponse(questionResult.getResponse()); -// question.setExtracted_count(questionResult.getExtracted_count()); -//// question.setError(questionResult.getError()); -// question.setKeyword_id(keyword.getKeyword_id()); -// -// if (questionResult.getTimestamp() != null) { -// DateTimeFormatter formatter = DateTimeFormatter -// .ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"); -// question.setTimestamp( -// LocalDateTime.parse(questionResult.getTimestamp(), formatter)); -// } -// -// questionsToUpdate.add(question); -// // 初始化引用列表(避免null) -// List<Reference> references = new ArrayList<>(); -// List<TaskResultResponse.Reference> originalReferences = questionResult.getReferences(); -// if (originalReferences == null) { -// originalReferences = Collections.emptyList(); -// } -// -// // 遍历原始引用列表,转换为Reference对象 -// for (TaskResultResponse.Reference ref : originalReferences) { -// Reference reference = new Reference(); -// // 设置基本字段 -// reference.setQuestion_id(question.getQuestion_id()); -// reference.setTitle(ref.getTitle()); -// reference.setUrl(ref.getUrl()); -// reference.setDomain(ref.getDomain()); -// reference.setNum(keyword.getNum()); -// reference.setTask_id(result.getTask_id()); -// reference.setKeyword_id(keyword.getKeyword_id()); -// if (null!=ref.getPublish_time()) { -// reference.setCreate_time(ref.getPublish_time().atStartOfDay()); -// } -// -// // 关键:使用优化后的方法获取平台,避免重复创建 -// Platform platform = getOrCreatePlatform(ref.getDomain(),ref.getPlatform_name()); -// reference.setPlatform_id(platform.getPlatform_id()); -// reference.setType_id(platform.getType_id()); // 直接从平台获取类型ID,更可靠 -// // 添加到结果列表 -// references.add(reference); -// } -// // 添加到总引用列表 -// if (!references.isEmpty()) { -// allReferences.addAll(references); -// } -// -// //取数据库中当前关键词的当前轮次的当前问题id结果拿出来 -// List<Reference> dbList = referenceService.list(new LambdaQueryWrapper<Reference>().eq(Reference::getKeyword_id, keyword.getKeyword_id()) -// .eq(Reference::getNum, keyword.getNum()) -// .eq(Reference::getQuestion_id, question.getQuestion_id()) -// ); -// -// // 1. 合并两个列表 -// List<Reference> combinedList = new ArrayList<>(); -// combinedList.addAll(allReferences); -// combinedList.addAll(dbList); -// -// // 2. 创建复合键的Map,用于统计完全匹配的记录 -// Map<String, List<Reference>> compositeKeyMap = combinedList.stream() -// .collect(Collectors.groupingBy( -// ref -> ref.getTitle() + "|" + ref.getUrl() + "|" + ref.getDomain() -// )); -// -// // 3. 处理每组重复记录 -// compositeKeyMap.forEach((key, refGroup) -> { -// // 3.1 找出组内有ID的记录(优先从dbList中获取) -// Optional<Reference> existingRecord = refGroup.stream() -// .filter(ref -> ref.getReference_id() != null) -// .findFirst(); -// -// // 3.2 统计该组的重复次数(总数-1) -// int repetitionCount = refGroup.size() - 1; -// -// // 3.3 决定最终保留的记录 -// Reference recordToSave = new Reference(); -// if (existingRecord.isPresent()) { -// // 使用已有ID的记录并更新重复次数 -// recordToSave = existingRecord.get(); -// recordToSave.setRepetition_num( -// (recordToSave.getRepetition_num() == null ? 1 : recordToSave.getRepetition_num()) -// + repetitionCount -// ); -// } else { -// // 没有ID记录则取第一条并设置重复次数 -// recordToSave = refGroup.get(0); -// recordToSave.setRepetition_num(1+repetitionCount); -// } -// -// resultList.add(recordToSave); -// }); -// referenceService.saveOrUpdateBatch(resultList); -// } -// } catch (Exception e) { -// log.error(e.getMessage(), e); -// System.out.println("处理问题结果失败: " + e.getMessage()); -// } -// } -// -// -// } -// -// // 4. 批量更新问题 -// System.out.println(questionsToUpdate); -// if (!questionsToUpdate.isEmpty()) { -// questionService.updateBatchById(questionsToUpdate); -// System.out.println("成功批量更新 " + questionsToUpdate.size() + " 个问题"); -// } -// -// } catch (Exception e) { -// log.error("更新问题和引用数据失败: " ,e.getMessage(), e); -// throw new RuntimeException("更新问题和引用数据失败", e); -// } -// }); -// } -// // 根据所有批次的结果判断最终状态 -// private String determineFinalStatus(List<QuestionResultList> results) { -// if (results.isEmpty()) { -// return "no_results"; // 无结果 -// } -// -// // 统计关键指标 -// int totalCount = results.size(); -// int emptyResponseCount = 0; -// int systemBusyCount = 0; -// -// for (QuestionResultList result : results) { -// // 判断回答是否为空 -// if (result.getExtracted_count() == 0 ) { -// emptyResponseCount++; -// } -// -// // 判断是否为系统繁忙 -// if ("success".equals(result.getStatus()) && (result.getResponse().isEmpty()|| result.getResponse().contains("WebDriver连接中断") || result.getResponse().contains("响应超时"))) { -// systemBusyCount++; -// } -// -// } -// -// // 全返回系统繁忙 -// if (systemBusyCount == totalCount) { -// return "busyness"; -// } -// // 全返回信息为空 -// if (emptyResponseCount == totalCount) { -// return "no_results"; -// } -// -// -// // 系统繁忙比例超过阈值(可配置,这里设为70%) -//// double busyRate = (double) systemBusyCount / totalCount; -//// if (busyRate >= 0.7) { -//// return "系统繁忙,请稍后尝试"; -//// } -// -// // 其他情况返回成功 -// return "success"; -// } - - private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { - return Mono.fromRunnable(() -> doUpdateQuestionAndReference(result)) - .onErrorResume(e -> { - log.error("处理任务结果失败", e); - return Mono.error(e); // 传播异常,触发事务回滚 - }).then(); - } - - // 核心业务逻辑,添加事务注解保证原子性 - @Transactional(rollbackFor = Exception.class) - public void doUpdateQuestionAndReference(TaskResultResponse result) { - try { - // 1. 查询关键词任务并更新状态 - LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>(); - keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id()); - KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper); - if (keywordTask == null) { - throw new Exception("未找到关键词任务,task_id: " + result.getTask_id()); - } - keywordTask.setStatus("completed"); - keywordTaskService.updateById(keywordTask); - - // 2. 查询关键词信息 - Keyword keyword = keywordService.getById(keywordTask.getKeyword_id()); - if (keyword == null) { - throw new Exception("未找到关联的关键词,task_id: " + result.getTask_id()); - } - - // 3. 更新关键词状态(基于关联任务状态) - updateKeywordStatus(keyword); - - // 4. 更新订单状态(基于关键词状态) - updateOrderStatus(keyword); - - // 5. 预查询问题列表(一次查询,内存映射) - LambdaQueryWrapper<Question> questionWrapper = new LambdaQueryWrapper<>(); - questionWrapper.eq(Question::getKeyword_id, keyword.getKeyword_id()); - List<Question> questions = questionService.list(questionWrapper); - Map<String, Question> questionMap = questions.stream() - .collect(Collectors.toMap(Question::getQuestion, q -> q)); - - // 6. 收集批量保存的数据(避免循环内保存) - List<QuestionResultList> questionResultsToSave = new ArrayList<>(); // 批量保存问题结果 - List<Reference> allReferences = new ArrayList<>(); // 收集所有引用,后续统一处理 - - // 7. 遍历结果处理问题和引用 - for (UserResult userResult : result.getResults()) { - // 7.1 更新用户状态(失败处理) - handleUserStatus(userResult, keyword); - - // 7.2 处理问题结果 - for (QuestionResult questionResult : userResult.getQuestions_results()) { - Question question = questionMap.get(questionResult.getQuestion()); - if (question == null) { - log.warn("未找到问题记录: {}", questionResult.getQuestion()); - continue; - } - - // 7.2.1 构建问题结果并加入批量列表 - QuestionResultList questionResultList = buildQuestionResultList(questionResult, keyword, result); - questionResultsToSave.add(questionResultList); - - // 7.2.2 处理引用数据(仅收集,不立即保存) - List<Reference> references = buildReferences(questionResult, question, keyword, result); - allReferences.addAll(references); - } - } - - // 8. 批量保存问题结果(一次数据库交互) - if (!questionResultsToSave.isEmpty()) { - questionResultService.saveBatch(questionResultsToSave); - log.info("批量保存问题结果 {} 条", questionResultsToSave.size()); - } - - // 9. 批量处理引用数据(去重+更新重复次数+批量保存) - if (!allReferences.isEmpty()) { - handleReferencesInBatch(allReferences, keyword); - } - - // 10. 批量更新问题状态(基于最终结果) - updateQuestionsStatusInBatch(questions, keyword); - - } catch (Exception e) { - log.error("更新数据失败", e); - throw new RuntimeException("更新数据失败", e); // 触发事务回滚 - } - } - - // 更新关键词状态 - private void updateKeywordStatus(Keyword keyword) { - LambdaQueryWrapper<KeywordTask> taskWrapper = new LambdaQueryWrapper<>(); - taskWrapper.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()); - List<KeywordTask> keywordTasks = keywordTaskService.list(taskWrapper); - - boolean allCompletedOrFailed = keywordTasks.stream() - .allMatch(task -> "completed".equals(task.getStatus()) - || "false".equals(task.getStatus()) - || "cancelled".equals(task.getStatus()) - || "canceled".equals(task.getStatus()) - || "nonentity".equals(task.getStatus())); - - if (allCompletedOrFailed) { - keyword.setStatus("completed"); - keywordService.updateById(keyword); - } - } - - // 更新订单状态 - private void updateOrderStatus(Keyword keyword) { - String orderId = keyword.getOrder_id(); - if (orderId == null || orderId.isEmpty()) { - log.info("关键词[{}]未关联订单,跳过订单更新", keyword.getKeyword_id()); - return; - } - - LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>(); - orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId); - List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper); - if (orderKeywords.isEmpty()) { - log.info("订单[{}]无关键词,跳过状态更新", orderId); - return; - } - - boolean allValid2 = orderKeywords.stream() - .allMatch(k -> "completed".equals(k.getStatus()) - || "false".equals(k.getStatus()) - || "cancelled".equals(k.getStatus())); - if (allValid2) { - updateOrderStatus(orderId, 1, "所有关键词采集完成或取消"); - } - - boolean allValid = orderKeywords.stream() - .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus())); - if (allValid) { - updateOrderStatus(orderId, 3, "所有关键词状态符合条件"); - } - } - - // 封装订单状态更新 - private void updateOrderStatus(String orderId, Integer status, String logMsg) { - Orders orders = orderService.getById(orderId); - if (orders != null) { - orders.setStatus(status); - orderService.updateById(orders); - log.info("订单[{}]{},已更新状态为{}", orderId, logMsg, status); - } else { - log.warn("未找到订单[{}],无法更新状态", orderId); - } - } - - // 构建问题结果对象 - private QuestionResultList buildQuestionResultList(QuestionResult questionResult, Keyword keyword, TaskResultResponse result) { - QuestionResultList questionResultList = new QuestionResultList(); - questionResultList.setKeyword_id(keyword.getKeyword_id()); - questionResultList.setQuestion(questionResult.getQuestion()); - questionResultList.setResponse(questionResult.getResponse()); - questionResultList.setStatus(questionResult.getStatus()); - questionResultList.setExtracted_count(questionResult.getExtracted_count()); - questionResultList.setKeyword_task_id(result.getTask_id()); - questionResultList.setError(questionResult.getError()); - questionResultList.setNum(keyword.getNum()); - - if (questionResult.getTimestamp() != null) { - DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"); - questionResultList.setTimestamp(LocalDateTime.parse(questionResult.getTimestamp(), formatter)); - } - return questionResultList; - } - - // 构建引用数据列表 - private List<Reference> buildReferences(QuestionResult questionResult, Question question, Keyword keyword, TaskResultResponse result) { - List<Reference> references = new ArrayList<>(); - List<TaskResultResponse.Reference> originalReferences = questionResult.getReferences(); - if (originalReferences == null) { - return references; - } - - for (TaskResultResponse.Reference ref : originalReferences) { - // 过滤无效引用(标题、URL、域名不能为空) - if (ref.getTitle() == null || ref.getUrl() == null || ref.getDomain() == null) { - log.warn("引用数据字段缺失,跳过:title={}, url={}, domain={}", - ref.getTitle(), ref.getUrl(), ref.getDomain()); - continue; - } - - Reference reference = new Reference(); - reference.setQuestion_id(question.getQuestion_id()); - reference.setTitle(ref.getTitle()); - reference.setUrl(ref.getUrl()); - reference.setDomain(ref.getDomain()); - reference.setNum(keyword.getNum()); - reference.setTask_id(result.getTask_id()); - reference.setKeyword_id(keyword.getKeyword_id()); - - if (ref.getPublish_time() != null) { - reference.setCreate_time(ref.getPublish_time().atStartOfDay()); - } - - Platform platform = getOrCreatePlatform(ref.getDomain(), ref.getPlatform_name()); - // 校验平台信息非空 - if (platform == null || platform.getPlatform_id() == null) { - log.warn("平台信息无效,跳过引用:domain={}", ref.getDomain()); - continue; - } - reference.setPlatform_id(platform.getPlatform_id()); - reference.setType_id(platform.getType_id()); - references.add(reference); - } - return references; - } - - // 批量处理引用数据(去重+更新重复次数) - private void handleReferencesInBatch(List<Reference> allReferences, Keyword keyword) { - // 1. 过滤原始列表中的 null 元素 - List<Reference> validReferences = allReferences.stream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); - - if (validReferences.isEmpty()) { - log.info("无有效引用数据,跳过批量保存"); - return; - } - - // 2. 查询数据库中已存在的引用并过滤 null - LambdaQueryWrapper<Reference> dbRefWrapper = new LambdaQueryWrapper<>(); - dbRefWrapper.eq(Reference::getKeyword_id, keyword.getKeyword_id()) - .eq(Reference::getNum, keyword.getNum()); - List<Reference> dbReferences = referenceService.list(dbRefWrapper); - List<Reference> validDbReferences = dbReferences.stream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); - - // 3. 合并并去重 - Map<String, List<Reference>> compositeKeyMap = new HashMap<>(); - validReferences.forEach(ref -> addToCompositeMap(compositeKeyMap, ref)); - validDbReferences.forEach(ref -> addToCompositeMap(compositeKeyMap, ref)); - - // 4. 处理重复次数 - List<Reference> referencesToSave = new ArrayList<>(); - compositeKeyMap.forEach((key, refGroup) -> { - List<Reference> validRefGroup = refGroup.stream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); - if (validRefGroup.isEmpty()) return; - - Optional<Reference> existingRef = validRefGroup.stream() - .filter(ref -> ref.getReference_id() != null) - .findFirst(); - - Reference finalRef = existingRef.orElse(validRefGroup.get(0)); - int repetitionNum = (finalRef.getRepetition_num() == null ? 1 : finalRef.getRepetition_num()) - + (validRefGroup.size() - 1); - finalRef.setRepetition_num(repetitionNum); - referencesToSave.add(finalRef); - }); - - // 5. 最终校验并保存 - List<Reference> finalSaveList = referencesToSave.stream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); - - // 关键校验:列表非空且元素有效 - if (finalSaveList.isEmpty()) { - log.info("处理后无有效引用数据可保存"); - return; - } - if (finalSaveList.stream().anyMatch(ref -> !(ref instanceof Reference))) { - log.error("引用数据类型异常,无法保存"); - return; - } - - // 执行保存 - try { - referenceService.saveOrUpdateBatch(finalSaveList); - log.info("批量保存引用数据成功,数量:{}", finalSaveList.size()); - } catch (Exception e) { - log.error("批量保存引用数据失败", e); - throw new RuntimeException("保存引用数据失败", e); - } - } - - // 辅助方法:将引用添加到复合键Map - private void addToCompositeMap(Map<String, List<Reference>> map, Reference ref) { - // 再次校验引用的核心字段非空 - if (ref.getTitle() == null || ref.getUrl() == null || ref.getDomain() == null) { - log.warn("引用核心字段为空,跳过映射:{}", ref); - return; - } - String key = ref.getQuestion_id() + "|" + ref.getTitle() + "|" + ref.getUrl() + "|" + ref.getDomain(); - map.computeIfAbsent(key, k -> new ArrayList<>()).add(ref); - } - - // 批量更新问题状态 - private void updateQuestionsStatusInBatch(List<Question> questions, Keyword keyword) { - if (questions.isEmpty()) { - return; - } - - // 一次性查询所有问题结果(基于关键词+轮次) - LambdaQueryWrapper<QuestionResultList> resultWrapper = new LambdaQueryWrapper<>(); - resultWrapper.eq(QuestionResultList::getKeyword_id, keyword.getKeyword_id()) - .eq(QuestionResultList::getNum, keyword.getNum()); - List<QuestionResultList> allQuestionResults = questionResultService.list(resultWrapper); - - // 按问题分组,便于查询 - Map<String, List<QuestionResultList>> questionResultsMap = allQuestionResults.stream() - .collect(Collectors.groupingBy(QuestionResultList::getQuestion)); - - // 批量更新问题状态 - List<Question> questionsToUpdate = new ArrayList<>(); - questions.forEach(question -> { - List<QuestionResultList> results = questionResultsMap.getOrDefault(question.getQuestion(), Collections.emptyList()); - String finalStatus = determineFinalStatus(results); - - if ("success".equals(finalStatus)) { - question.setStatus("success"); - question.setError(""); - } else if ("no_results".equals(finalStatus)) { - question.setStatus("success"); - question.setError("采集结果无引用数据"); - } else if ("busyness".equals(finalStatus)) { - question.setStatus("failed"); - question.setError("DeepSeek繁忙,请稍后尝试"); - } - // 更新其他字段(响应、时间戳等) - results.stream().findFirst().ifPresent(result -> { - question.setResponse(result.getResponse()); - question.setExtracted_count(result.getExtracted_count()); - question.setTimestamp(result.getTimestamp()); - }); - questionsToUpdate.add(question); - }); - - if (!questionsToUpdate.isEmpty()) { - questionService.updateBatchById(questionsToUpdate); - log.info("批量更新问题状态 {} 条", questionsToUpdate.size()); - } - } - - // 处理用户状态异常 - private void handleUserStatus(UserResult userResult, Keyword keyword) { - if ("failed".equals(userResult.getStatus())) { - LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>(); - userWrapper.eq(User::getUser_email, userResult.getUser_email()); - - if (userResult.getError().contains("登录失败")) { - userWrapper.set(User::getStatus, "无法登录"); - userService.update(userWrapper); - // 批量更新问题状态为失败 - questionService.update(new LambdaUpdateWrapper<Question>() - .eq(Question::getKeyword_id, keyword.getKeyword_id()) - .set(Question::getStatus, "failed") - .set(Question::getError, "账户登录失败")); - } else if (userResult.getError().contains("信息错误")) { - userWrapper.set(User::getStatus, "信息错误"); - userService.update(userWrapper); - } - } - } - - // 原方法:判断最终状态(复用) - private String determineFinalStatus(List<QuestionResultList> results) { - if (results.isEmpty()) { - return "no_results"; - } - - int totalCount = results.size(); - int emptyResponseCount = 0; - int systemBusyCount = 0; - - for (QuestionResultList result : results) { - if (result.getExtracted_count() == 0) { - emptyResponseCount++; - } - if ("success".equals(result.getStatus()) && - (result.getResponse() == null || result.getResponse().isEmpty() - || result.getResponse().contains("WebDriver连接中断") - || result.getResponse().contains("响应超时"))) { - systemBusyCount++; - } - } - - if (systemBusyCount == totalCount) { - return "busyness"; - } - if (emptyResponseCount == totalCount) { - return "no_results"; - } - return "success"; + return collectionService.getTaskResultResponseMono(taskId); } @GetMapping("/tasks/all") @ApiOperation(value = "获取所有任务列表") public Mono<TaskListResponse> getAllTasks() { - return webClient.get() - .uri(baseUrl + "/api/v1/tasks") - .accept(MediaType.APPLICATION_JSON) - .retrieve() - .bodyToMono(new ParameterizedTypeReference<TaskListResponse>() { - }) - .onErrorResume(e -> { - TaskListResponse response = new TaskListResponse(); - response.setDetail("获取任务列表失败: " + e.getMessage()); - return Mono.just(response); - }); + return collectionService.getTaskListResponseMono(); } @GetMapping("/health") @ApiOperation("健康检查") public Mono<HealthResponse> checkThirdPartyHealth() { - return webClient.get() - .uri(baseUrl + "/health") // 假设第三方健康检查接口路径为/health - .retrieve() - .bodyToMono(HealthResponse.class) - .onErrorResume(e -> Mono.just( - new HealthResponse("unhealthy", null, "", e.getMessage()))); + return collectionService.getHealthResponseMono(); } /** @@ -1383,12 +76,7 @@ @GetMapping("/server/resource") @ApiOperation(value = "查询服务器资源") public Mono<ServerResourceResponse> getServerResource() { - return webClient.get() - .uri(baseUrl + "/api/v1/system/resources") - .retrieve() - .bodyToMono(ServerResourceResponse.class) - .onErrorResume(e -> Mono.just( - new ServerResourceResponse( e.getMessage()))); + return collectionService.getServerResourceResponseMono(); } -- Gitblit v1.7.1