From faa95a5b183a42a6c3fcf1d6a41d81caa33da3bc Mon Sep 17 00:00:00 2001 From: guyue <1721849008@qq.com> Date: 星期三, 30 七月 2025 17:42:00 +0800 Subject: [PATCH] 修改定时器抢占 --- src/main/java/com/linghu/controller/CollectController.java | 1464 ++++++++++++++++++++++++++++++++-------------------------- 1 files changed, 811 insertions(+), 653 deletions(-) diff --git a/src/main/java/com/linghu/controller/CollectController.java b/src/main/java/com/linghu/controller/CollectController.java index 1ace567..0200878 100644 --- a/src/main/java/com/linghu/controller/CollectController.java +++ b/src/main/java/com/linghu/controller/CollectController.java @@ -20,6 +20,8 @@ import org.springframework.core.ParameterizedTypeReference; import org.springframework.dao.DuplicateKeyException; import org.springframework.http.*; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.web.reactive.function.client.ExchangeStrategies; import org.springframework.web.reactive.function.client.WebClient; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; @@ -70,13 +72,16 @@ private UserService userService; @Autowired private OrderService orderService; + @Autowired + private QuestionResultService questionResultService; // 替换为线程安全队列 private static final Queue<SearchTaskRequest> taskQueue = new ConcurrentLinkedQueue<>(); // 全局映射:关键词ID -> 批次队列 private static final ConcurrentMap<Integer, Queue<List<UserDto>>> batchQueues = new ConcurrentHashMap<>(); - private static boolean isProcessing = false; +// private static boolean isProcessing = false; + private static volatile boolean isProcessing = false; // 添加 volatile @PostMapping("/search") @ApiOperation(value = "开始采集") @@ -91,7 +96,6 @@ double memoryUsage = parseUsage(resourceResponse.getMemory_usage_percent()); if (cpuUsage >= 90.0 || memoryUsage >= 90.0) { - String errorMsg = String.format("服务器资源不足,请稍后再试"); @@ -124,24 +128,32 @@ SearchTaskRequest nextTaskRequest = taskQueue.poll(); if (nextTaskRequest != null) { - // 处理任务 + Integer keywordId = nextTaskRequest.getKeyword_id(); + log.info("开始处理任务队列,keywordId: {}", keywordId); + executeBatchTask(nextTaskRequest) .doFinally(signal -> { - // 完成后,继续处理下一个任务 isProcessing = false; if (!taskQueue.isEmpty()) { - processNextTaskInQueue(); // 继续处理队列中的下一个任务 + processNextTaskInQueue(); } }) - .subscribe(); + .subscribe( + result -> log.info("任务处理完成,keywordId: {}", keywordId), // 成功日志 + error -> { // 关键:添加错误处理 + log.error("任务队列处理异常,keywordId: {}", keywordId, error); + } + ); + } else { + isProcessing = false; // 无任务时重置状态 } } private Mono<ResponseResult<String>> executeBatchTask(SearchTaskRequest searchTaskRequest) { Integer keywordId = searchTaskRequest.getKeyword_id(); - // ... 原有逻辑 ... + // int maxConcurrentUsers = searchTaskRequest.getConfig() != null ? searchTaskRequest.getConfig().getMax_concurrent_users() : 3; - List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers, keywordId); + List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers, keywordId,searchTaskRequest.getIs_first()); // 创建批次队列并存入全局映射 @@ -151,32 +163,19 @@ return Mono.just(ResponseResult.success("第一个批次已开始")) .doOnTerminate(() -> { executeBatchTask(batchQueue, searchTaskRequest, keywordId) - .subscribe(); + .subscribe( + result -> log.info("批次任务启动成功,keywordId: {}", keywordId), + error -> { // 处理批次执行异常 + log.error("批次任务执行异常,keywordId: {}", keywordId, error); + // 可选:异常时清理资源 + batchQueues.remove(keywordId); + } + ); }); } -/* private Mono<ResponseResult<String>> executeBatchTask(SearchTaskRequest searchTaskRequest) { - log.info("开始处理任务:{}", searchTaskRequest); - log.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); - Integer keywordId = searchTaskRequest.getKeyword_id(); - int maxConcurrentUsers = searchTaskRequest.getConfig() != null ? - searchTaskRequest.getConfig().getMax_concurrent_users() : 3; - List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers, keywordId); - - Queue<List<UserDto>> batchQueue = new LinkedList<>(userBatches); // 用队列存储批次 - - return Mono.just(ResponseResult.success("第一个批次已开始")) - .doOnTerminate(() -> { - // 启动后台任务,继续处理批次 - executeBatchTask(batchQueue, searchTaskRequest, keywordId) - .subscribe(); // 使用subscribe()启动后台任务 - }); - }*/ - private Mono<ResponseResult<?>> executeBatchTask(Queue<List<UserDto>> batchQueue, SearchTaskRequest searchTaskRequest, Integer keywordId) { // 如果队列为空,说明所有批次已经完成 -// if (batchQueue.isEmpty()) { -// return Mono.just(ResponseResult.success("所有批次已完成")); -// } + if (batchQueue == null || batchQueue.isEmpty()) { // 清理资源 batchQueues.remove(keywordId); @@ -201,6 +200,10 @@ return Mono.just(ResponseResult.error("创建批次任务失败")); } }) + .onErrorResume(e -> { + log.error("调用第三方接口失败: {}", e.getMessage(), e); // 关键日志 + return Mono.error(new RuntimeException("调用第三方接口失败: " + e.getMessage())); + }) .doFinally(signal -> { // 任务完成时清理资源 if (batchQueue.isEmpty()) { @@ -209,26 +212,6 @@ }); } -// private Mono<Void> saveKeywordTasks(Integer keywordId, SearchTaskResponse taskResponse) { -// if (taskResponse == null || taskResponse.getTask_id() == null) { -// return Mono.error(new RuntimeException("任务响应无效或任务ID为空")); -// } -// -// KeywordTask keywordTask = new KeywordTask(); -// keywordTask.setKeyword_id(keywordId); -// keywordTask.setTask_id(taskResponse.getTask_id()); -// keywordTask.setStatus("pending"); -// -// // 将 MyBatis-Plus 的同步方法包装为 Mono<Void> -// return Mono.fromRunnable(() -> { -// boolean success = keywordTaskService.saveOrUpdate(keywordTask); -// if (!success) { -// throw new RuntimeException("保存关键词任务关联失败"); -// } -// }) -// .doFinally(signalType -> log.info("成功保存关键词任务关联: Task ID {}", taskResponse.getTask_id())) -// .then(); -// } private Mono<ResponseResult<?>> waitForTaskCompletion(String taskId, Queue<List<UserDto>> batchQueue, SearchTaskRequest searchTaskRequest, Integer keywordId) { // 查询任务状态 return getTaskStatus(taskId) @@ -239,7 +222,7 @@ return Mono.just(ResponseResult.success("任务已被取消")); } // 如果任务状态是"submitted"或"running",继续轮询 - if (!"completed".equalsIgnoreCase(statusResponse.getStatus()) && !"failed".equalsIgnoreCase(statusResponse.getStatus()) && !"cancelled".equalsIgnoreCase(statusResponse.getStatus()) ) { + if (!"completed".equalsIgnoreCase(statusResponse.getStatus()) && !"failed".equalsIgnoreCase(statusResponse.getStatus()) && !"cancelled".equalsIgnoreCase(statusResponse.getStatus()) && !("ERROR".equalsIgnoreCase(statusResponse.getStatus()) && statusResponse.getMessage().contains("Task not found")) ) { return Mono.delay(Duration.ofSeconds(5)) // 延迟 5 秒后再次查询 .flatMap(aLong -> waitForTaskCompletion(taskId, batchQueue, searchTaskRequest, keywordId)); // 递归调用继续等待 } else { @@ -272,8 +255,6 @@ }); } - - // 添加一个辅助方法来安全地将字符串转换为double private double parseUsage(String usageStr) { try { @@ -288,93 +269,15 @@ return 0.0; } } - /*@PostMapping("/search") - @ApiOperation(value = "开始采集") - public Mono<ResponseResult<?>> createSearchTask( - @RequestBody SearchTaskRequest searchTaskRequest, - HttpServletRequest request) throws JsonProcessingException { - // 首先检查服务器资源 - return getServerResource() - .flatMap(resourceResponse -> { - // 将字符串类型的使用率转换为double类型 - double cpuUsage = parseUsage(resourceResponse.getCpu_usage_percent()); - double memoryUsage = parseUsage(resourceResponse.getMemory_usage_percent()); - // 检查CPU和内存使用率 - if (cpuUsage >= 90.0 || memoryUsage >= 90.0) { - String errorMsg = String.format("服务器资源不足:CPU使用率 %.1f%%,内存使用率 %.1f%%", - resourceResponse.getCpu_usage_percent(), resourceResponse.getMemory_usage_percent()); - log.warn(errorMsg); - return Mono.just(ResponseResult.error(503, errorMsg)); - } - Integer keywordId = searchTaskRequest.getKeyword_id(); - - int maxConcurrentUsers = searchTaskRequest.getConfig() != null ? - searchTaskRequest.getConfig().getMax_concurrent_users() : 3; - List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers,keywordId); - - return Flux.fromIterable(userBatches) - .flatMap(batch -> { - SearchTaskRequest batchRequest = new SearchTaskRequest(); - batchRequest.setUsers(batch); - batchRequest.setQuestions(searchTaskRequest.getQuestions()); - batchRequest.setConfig(searchTaskRequest.getConfig()); - batchRequest.setSave_to_database(searchTaskRequest.getSave_to_database()); - batchRequest.setWebhook_url(searchTaskRequest.getWebhook_url()); - batchRequest.setKeyword_id(keywordId); - - return createSingleBatchTask(batchRequest) - .delaySubscription(Duration.ofSeconds(2)); // 批次之间添加延迟 - }, 1) // 限制并发数为1,确保顺序执行 - .collectList() // 收集所有批次的响应 - .flatMap(responses -> - saveKeywordTasks(keywordId, responses) // 保存关联关系 - .thenReturn(responses) // 返回原始响应 - ) - .map(responses -> ResponseResult.success(responses)) // 使用ResponseResult包装结果 - .onErrorResume(e -> { - log.error("创建搜索任务失败: {}", e.getMessage(), e); - return Mono.just(ResponseResult.error("创建搜索任务失败: " + e.getMessage())); - }); - }) - .onErrorResume(e -> { - log.error("检查服务器资源失败: {}", e.getMessage(), e); - return Mono.just(ResponseResult.error("检查服务器资源失败: " + e.getMessage())); - }); - }*/ - -// private Mono<Void> saveKeywordTasks(Integer keywordId, List<SearchTaskResponse> taskResponses) { -// List<KeywordTask> keywordTasks = taskResponses.stream() -// .filter(response -> response.getTask_id() != null) -// .map(response -> { -// KeywordTask keywordTask = new KeywordTask(); -// keywordTask.setKeyword_id(keywordId); -// keywordTask.setTask_id(response.getTask_id()); -// keywordTask.setStatus("pending"); -// return keywordTask; -// }) -// .collect(Collectors.toList()); -// -// // 将 MyBatis-Plus 的同步方法包装为 Mono<Void> -// return Mono.fromRunnable(() -> { -// boolean success = keywordTaskService.saveOrUpdateBatch(keywordTasks); -// if (!success) { -//// throw new RuntimeException("保存关键词任务关联失败"); -// // 添加异常处理 -// Mono.error( new RuntimeException("保存关键词任务关联失败")); -// } -// }) -// .doFinally(signalType -> log.info("成功保存 {} 个关键词任务关联", keywordTasks.size())) -// .then(); -// } - - private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize,Integer keywordId) { + private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize,Integer keywordId,Boolean isFirst) { Keyword keyword = keywordService.getById(keywordId); - if (null==keyword.getNum()){ - keyword.setNum(0); + if (isFirst){ + keyword.setNum(1); + }else { + keyword.setNum(keyword.getNum()+1); } - keyword.setNum(keyword.getNum()+1); keywordService.updateById(keyword); List<List<UserDto>> batches = new ArrayList<>(); @@ -388,7 +291,7 @@ keywordTask.setKeyword_id(keywordId); keywordTask.setTask_id(null); // 任务ID为空 - keywordTask.setNum(keyword.getNum());// 任务初始状态为 pending + keywordTask.setNum(keyword.getNum()); keywordTaskService.save(keywordTask); // 保存 KeywordTask } @@ -397,6 +300,11 @@ } private Mono<SearchTaskResponse> createSingleBatchTask(SearchTaskRequest batchRequest) { + // 记录请求第三方的基本信息(便于排查) + String thirdPartyUrl = baseUrl + "/api/v1/search"; + Integer keywordId = batchRequest.getKeyword_id(); + log.info("开始向第三方提交任务,keywordId: {}, URL: {}, 请求参数: {}", + keywordId, thirdPartyUrl, batchRequest.toString()); // 打印请求参数(建议用工具类转JSON) return webClient.post() .uri(baseUrl + "/api/v1/search") .contentType(MediaType.APPLICATION_JSON) @@ -404,6 +312,16 @@ .retrieve() .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class) .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody)))) + // 处理第三方返回的5xx服务器错误(如第三方服务异常) + .onStatus(HttpStatus::is5xxServerError, response -> + response.bodyToMono(String.class) + .flatMap(errorBody -> { + String errorMsg = String.format("第三方接口5xx错误,keywordId: %d, URL: %s, 状态码: %d, 错误详情: %s", + keywordId, thirdPartyUrl, response.statusCode().value(), errorBody); + log.error(errorMsg); + return Mono.error(new RuntimeException(errorMsg)); + }) + ) .bodyToMono(new ParameterizedTypeReference<SearchTaskResponse>() {}) .flatMap(taskResponse -> { if (taskResponse != null && taskResponse.getTask_id() != null) { @@ -423,6 +341,7 @@ // 获取与关键词相关的任务,task_id 为 null,确保只取一个任务 List<KeywordTask> keywordTasks = keywordTaskService.list(new LambdaQueryWrapper<KeywordTask>() .eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()) + .eq(KeywordTask::getNum, keyword.getNum()) .isNull(KeywordTask::getTask_id)); if (keywordTasks.size() > 0) { KeywordTask keywordTask = keywordTasks.get(0); @@ -430,16 +349,11 @@ keywordTask.setStatus("pending"); keywordTaskService.updateById(keywordTask); } -// KeywordTask taskToUpdate = keywordTaskService.getOne(new LambdaQueryWrapper<KeywordTask>() -// .eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()) -// .eq(KeywordTask::getTask_id, null)); // 确保 task_id 为 null 的任务 + //将提问词列表的状态转为pending + for (String questionName : batchRequest.getQuestions()) { + questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keyword.getKeyword_id()).eq(Question::getQuestion,questionName).set(Question::getStatus, "pending")); - // 更新任务的 task_id 和状态 -// taskToUpdate.setTask_id(taskResponse.getTask_id()); -// taskToUpdate.setStatus("submitted"); -// keywordTaskService.updateById(taskToUpdate); - - + } //所有关键词都在采集中或者已完成或者错误设置订单进入采集状态 List<Keyword> orderKeywords = keywordService.list(new LambdaQueryWrapper<Keyword>() .eq(Keyword::getOrder_id, keyword.getOrder_id())); @@ -460,47 +374,6 @@ }); } - // 移除原来的waitForTaskCompletion方法,不再需要同步等待 -// @ApiOperation(value = "查询任务状态") -// @GetMapping("/status") -// public Mono<TaskStatusResponse> getTaskStatus(String taskId) { -// return webClient.get() -// .uri(baseUrl + "/api/v1/tasks/" + taskId) -// .accept(MediaType.APPLICATION_JSON) -// .retrieve() -// .onStatus(HttpStatus::isError, response -> response.bodyToMono(TaskStatusResponse.class) -// .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody.getDetail())))) -// .bodyToMono(TaskStatusResponse.class) -// .flatMap(result -> { -// TaskStatusResponse taskStatusResponse = result; -// if (taskStatusResponse != null && taskStatusResponse.getStatus() != null) { -// List<Question> updateQuestions = taskStatusResponse.getQuestions_status().stream() -// .map(qs -> { -// Question question = new Question(); -// question.setQuestion_id(qs.getQuestion_id()); -// question.setStatus(qs.getStatus()); -// return question; -// }).collect(Collectors.toList()); -// -// // 包装成响应式操作 -// return Mono.fromCallable(() -> { -// questionService.updateBatchById(updateQuestions); -// return result; -// }); -// -// } -// return Mono.just(result); -// }) -// .onErrorResume(e -> { -// // 创建一个自定义的错误响应对象 -// TaskStatusResponse errorResponse = new TaskStatusResponse(); -// errorResponse.setStatus("ERROR"); -// errorResponse.setMessage(e.getMessage()); -// errorResponse.setDetail(e.getMessage()); -// -// return Mono.just(errorResponse); -// }); -// } @PostMapping("/cancel/{keywordId}") @ApiOperation(value = "取消任务") public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable Integer keywordId) { @@ -524,7 +397,7 @@ return Flux.fromIterable(tasksToCancelRemotely) .flatMap(task -> { // 创建状态更新和远程取消的组合操作 - Mono<Void> updateStatus = updateTaskStatus(task.getTask_id(), "canceled"); + Mono<Void> updateStatus = updateTaskStatus(task.getTask_id(), "cancelled"); Mono<ResponseResult<?>> cancelOp = cancelRemoteTask(task.getTask_id()) .onErrorResume(e -> { log.error("取消任务 {} 失败: {}", task.getTask_id(), e.getMessage()); @@ -568,54 +441,7 @@ .eq(KeywordTask::getStatus, "pending") ); } - /* @PostMapping("/cancel/{keywordId}") - @ApiOperation(value = "取消任务") - public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable Integer keywordId) { - // 1. 查询所有与关键词相关的任务 - List<KeywordTask> tasks = keywordTaskService.list( - new LambdaQueryWrapper<KeywordTask>().eq(KeywordTask::getKeyword_id, keywordId) - ); - // 2. 从队列中移除所有相关任务(使用锁保证线程安全) - List<SearchTaskRequest> removedQueueTasks = removeTasksFromQueueByKeywordId(keywordId); - - // 3. 筛选出需要远程取消的任务 - List<KeywordTask> tasksToCancelRemotely = tasks.stream() - .filter(task -> task.getTask_id() != null && "pending".equalsIgnoreCase(task.getStatus())) - .collect(Collectors.toList()); - - // 4. 对筛选出的任务发送远程取消请求(并行执行) - return Flux.fromIterable(tasksToCancelRemotely) - .flatMap(task -> { - // 创建状态更新和远程取消的组合操作 - Mono<Void> updateStatus = updateTaskStatus(task.getTask_id(), "canceled"); - Mono<ResponseResult<?>> cancelOp = cancelRemoteTask(task.getTask_id()) - .onErrorResume(e -> { - log.error("取消任务 {} 失败: {}", task.getTask_id(), e.getMessage()); - return Mono.just(ResponseResult.error("取消任务失败: " + e.getMessage())); - }); - - // 合并操作:无论远程取消是否成功,都更新状态 - return Mono.zip(cancelOp, updateStatus) - .thenReturn(true); - }, 10) // 设置10的并发度 - .collectList() - .flatMap(canceledTasks -> { - // 5. 更新关键词和订单状态 - return updateKeywordAndOrderStatus(keywordId) - .thenReturn(ResponseResult.success( - new TaskCancelResponse( - String.format("关键词任务已取消,队列中移除 %d 个任务,远程取消 %d 个任务", - removedQueueTasks.size(), - tasksToCancelRemotely.size()) - ) - )); - }) - .onErrorResume(e -> { - log.error("取消关键词任务失败: {}", e.getMessage()); - return Mono.just(ResponseResult.error(500, "取消关键词任务失败: " + e.getMessage())); - }); - }*/ // 提取关键词和订单状态更新的逻辑为单独方法 private Mono<Void> updateKeywordAndOrderStatus(Integer keywordId) { return Mono.fromRunnable(() -> { @@ -639,16 +465,13 @@ List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper); // 更新关键词状态 -// if (keywordTasks.stream().allMatch(task -> -// "completed".equals(task.getStatus()) || "false".equals(task.getStatus()) || "canceled".equals(task.getStatus()) -// )) { - keyword.setStatus("canceled"); - keywordService.updateById(keyword); -// log.info("关键词 {} 所有任务已完成,更新状态为 completed", keywordId); -// } - //更新提问词状态为取消 + keyword.setStatus("completed"); + keywordService.updateById(keyword); - questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keywordId).set(Question::getStatus, "canceled")); + //更新提问词状态为取消 + questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keywordId).isNull(Question::getResponse).set(Question::getStatus, "cancelled").set(Question::getError, "任务已取消")); +// 将所有提问词设置eroor为已取消 +// questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keywordId).set(Question::getStatus, "failed").set(Question::getError, "任务已取消")); // 更新订单状态 String orderId = keyword.getOrder_id(); @@ -670,7 +493,7 @@ } } if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k -> - !"completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "canceled".equals(k.getStatus()) + !"completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "cancelled".equals(k.getStatus()) )) { Orders orders = orderService.getById(orderId); if (orders != null) { @@ -702,9 +525,7 @@ return removedTasks; } - - // 发送远程取消请求 -// 发送远程取消请求(使用Java 8兼容的Map创建方式) +// 发送远程取消请求 private Mono<ResponseResult<?>> cancelRemoteTask(String taskId) { // 使用Collections.singletonMap或手动创建Map Map<String, Object> requestBody = new HashMap<>(); @@ -733,7 +554,13 @@ @ApiOperation(value = "获取任务结果") @GetMapping("/tasks/{taskId}") public Mono<TaskResultResponse> getTaskResult(@PathVariable String taskId) { - return webClient.get() + WebClient webClient2 = WebClient.builder() + .exchangeStrategies(ExchangeStrategies.builder() + .codecs(configurer -> configurer.defaultCodecs() + .maxInMemorySize(10 * 1024 * 1024)) // 10MB + .build()) + .build(); + return webClient2.get() .uri(baseUrl + "/api/v1/tasks/" + taskId + "/result") .accept(MediaType.APPLICATION_JSON) .retrieve() @@ -759,84 +586,21 @@ }) .onErrorResume(e -> { System.out.println("获取任务结果失败"); + log.error("获取任务结果失败: {}", e.getMessage(), e); TaskResultResponse result = new TaskResultResponse(); result.setDetail("获取任务结果失败: " + e.getMessage()); return Mono.just(result); }); } -// private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { -// return Mono.fromRunnable(() -> { -// // 1. 更新关键词状态 -// LambdaUpdateWrapper<Keyword> keywordUpdate = new LambdaUpdateWrapper<>(); -// keywordUpdate.eq(Keyword::getTask_id, result.getTask_id()) -// .set(Keyword::getStatus, "completed"); -// keywordService.update(keywordUpdate); -// -// // 查询关键词ID -// LambdaQueryWrapper<Keyword> keywordQuery = new LambdaQueryWrapper<>(); -// keywordQuery.eq(Keyword::getTask_id, result.getTask_id()); -// Keyword keyword = keywordService.getOne(keywordQuery); -// -// if (keyword == null) { -// System.out.println("未找到关联的关键词,task_id: " + result.getTask_id()); -// return; -// } -// -// // 2. 处理每个用户的问题结果 -// for (UserResult userResult : result.getResults()) { -// for (QuestionResult questionResult : userResult.getQuestions_results()) { -// // 2.1 查询问题ID -// LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>(); -// queryWrapper.eq(Question::getQuestion, questionResult.getQuestion()) -// .eq(Question::getKeyword_id, keyword.getKeyword_id()); -// Question question = questionService.getOne(queryWrapper); -// -// if (question != null) { -// // 更新问题状态 -// LambdaUpdateWrapper<Question> updateWrapper = new LambdaUpdateWrapper<>(); -// updateWrapper.eq(Question::getQuestion_id, question.getQuestion_id()) -// .set(Question::getStatus, questionResult.getStatus()) -// .set(Question::getResponse, questionResult.getResponse()) -// .set(Question::getExtracted_count, questionResult.getExtracted_count()) -// .set(Question::getError, questionResult.getError()) -// .set(Question::getTimestamp, LocalDateTime.parse( -// questionResult.getTimestamp(), -// DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS") -// )); -// questionService.update(updateWrapper); -// -// // 2.2 保存引用数据 -// List<Reference> references = questionResult.getReferences().stream() -// .map(ref -> { -// Reference reference = new Reference(); -// reference.setQuestion_id(question.getQuestion_id()); -// reference.setTitle(ref.getTitle()); -// reference.setUrl(ref.getUrl()); -// reference.setDomain(ref.getDomain()); -// reference.setCreate_time(LocalDateTime.now()); -// return reference; -// }) -// .collect(Collectors.toList()); -// -// if (!references.isEmpty()) { -// referenceService.saveBatch(references); -// } -// } else { -// System.out.println("未找到匹配的问题,question " + question.getQuestion()); -// -// } -// } -// } -// }); -// } /** * 获取或创建平台(确保同一domain只创建一次) * @param domain 平台域名 * @return 已存在或新创建的Platform */ - private Platform getOrCreatePlatform(String domain) { + private Platform getOrCreatePlatform(String domain,String platformName) { // 1. 先尝试查询已存在的平台 + Platform platform = platformService.getPlatformByDomain(domain); if (platform != null) { return platform; @@ -856,7 +620,12 @@ // 2.2 构建新平台对象 Platform newPlatform = new Platform(); newPlatform.setDomain(domain); - newPlatform.setPlatform_name(domain); // 平台名称默认使用域名,可根据实际需求调整 + if (platformName != null) { + newPlatform.setPlatform_name(platformName); + }else { + newPlatform.setPlatform_name(domain); + } + // 平台名称默认使用域名,可根据实际需求调整 newPlatform.setType_id(defaultType.getType_id()); newPlatform.setCreate_time(LocalDateTime.now()); // 补充创建时间 @@ -874,348 +643,741 @@ throw new RuntimeException("创建平台失败", e); } } - private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { - return Mono.fromRunnable(() -> { - try { - //查看每个账号信息的status是否正常 - - // 1. 根据KeywordTask更新关键词状态 - // 查询关键词ID - LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>(); - keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id()); - KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper); - keywordTask.setStatus("completed"); - keywordTaskService.updateById(keywordTask); - Keyword keyword = keywordService.getById(keywordTask.getKeyword_id()); - - if (keyword == null) { - System.out.println("未找到关联的关键词,task_id: " + result.getTask_id()); - //报错 - throw new Exception("未找到关联的关键词,task_id: " + result.getTask_id()); -// return; - } - LambdaQueryWrapper<KeywordTask> keywordTaskWrapper2 = new LambdaQueryWrapper<>(); - keywordTaskWrapper2.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()); - List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper2); - - //如果全部为completed 关键词也为completed ,如果关联关系没有任务id,或者状态为running ,关键词为submitted, - if (keywordTasks.stream().allMatch(task -> "completed".equals(task.getStatus()) || "false".equals(task.getStatus()) || "canceled".equals(task.getStatus())) ) { - keyword.setStatus("completed"); - keywordService.updateById(keyword); - - } - //如果有一个task为failed设置关键词为false -// else if (keywordTasks.stream().anyMatch(task -> "failed".equals(task.getStatus()))) { -// keyword.setStatus("false"); + //更新提问词和引用数据 +// private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { +// return Mono.fromRunnable(() -> { +// try { +// //查看每个账号信息的status是否正常 +// +// // 1. 根据KeywordTask更新关键词状态 +// // 查询关键词ID +// LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>(); +// keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id()); +// KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper); +// keywordTask.setStatus("completed"); +// keywordTaskService.updateById(keywordTask); +// Keyword keyword = keywordService.getById(keywordTask.getKeyword_id()); +// +// if (keyword == null) { +// System.out.println("未找到关联的关键词,task_id: " + result.getTask_id()); +// //报错 +// throw new Exception("未找到关联的关键词,task_id: " + result.getTask_id()); +// +// } +// LambdaQueryWrapper<KeywordTask> keywordTaskWrapper2 = new LambdaQueryWrapper<>(); +// keywordTaskWrapper2.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()); +// List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper2); +// +// //如果全部为completed 或者错误、取消、任务不存在 关键词也为completed ,如果关联关系没有任务id,或者状态为running ,关键词为submitted, +// if (keywordTasks.stream().allMatch(task -> "completed".equals(task.getStatus()) || "false".equals(task.getStatus()) || "cancelled".equals(task.getStatus()) ||"canceled".equals(task.getStatus()) || "nonentity".equals(task.getStatus())) ) { +// keyword.setStatus("completed"); // keywordService.updateById(keyword); +// // } - - - // -//// 定义状态优先级:canceled > false > completed -// String finalStatus = "completed"; // 默认状态为 completed -// -// for (KeywordTask task : keywordTasks) { -// String status = task.getStatus(); -//// if ("canceled".equals(status)) { -//// finalStatus = "canceled"; -//// break; // 遇到 canceled 直接跳出循环,因为优先级最高 -//// } else -// if ("false".equals(status)) { -// finalStatus = "false"; -// // 不跳出循环,继续检查是否存在 canceled +// String orderId = keyword.getOrder_id(); +// if (orderId == null || orderId.isEmpty()) { +// System.out.println("关键词[" + keyword.getKeyword_id() + "]未关联订单,跳过订单状态更新"); +// return; // } +// +// // 2.更新订单状态为待处理 查询该订单下的所有关键词,更新订单状态(有取消) +// LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>(); +// orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId); +// List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper); +// +// if (orderKeywords.isEmpty()) { +// System.out.println("订单[" + orderId + "]下无关键词,跳过状态更新"); +// return; +// } +// boolean allValid2 = orderKeywords.stream() +// .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "cancelled".equals(k.getStatus())); +// if (allValid2) { +// Orders orders = orderService.getById(orderId); +// if (orders != null) { +// orders.setStatus(1); // 假设Orders有Integer类型的status字段 +// orderService.updateById(orders); +// System.out.println("订单[" + orderId + "]所有关键词采集完成或者取消,已更新状态为1"); +// } else { +// System.out.println("未找到订单[" + orderId + "],无法更新状态"); +// } +// } +// // 3.更新订单状态为完成 检查所有关键词的状态是否均为 completed 或 false +// boolean allValid = orderKeywords.stream() +// .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus())); +// +// // 4. 若所有关键词状态均有效,更新订单状态为3 +// if (allValid) { +// Orders orders = orderService.getById(orderId); +// if (orders != null) { +// orders.setStatus(3); // 假设Orders有Integer类型的status字段 +// orderService.updateById(orders); +// System.out.println("订单[" + orderId + "]所有关键词状态符合条件,已更新状态为3"); +// } else { +// System.out.println("未找到订单[" + orderId + "],无法更新状态"); +// } +// } +// +// +// Orders orders = orderService.getById(keyword.getOrder_id()); +// +// // 2. 批量查询所有问题 +// LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>(); +// queryWrapper.eq(Question::getKeyword_id, keyword.getKeyword_id()); +// List<Question> questions = questionService.list(queryWrapper); +// +// // 构建问题映射表,用于快速查找 +// Map<String, Question> questionMap = questions.stream() +// .collect(Collectors.toMap(Question::getQuestion, q -> q)); +// +// // 3. 收集所有需要更新的问题和引用 +// List<Question> questionsToUpdate = new ArrayList<>(); +// List<Reference> allReferences = new ArrayList<>(); +// List<Reference> resultList = new ArrayList<>(); +// +// // 遍历账号 +// for (UserResult userResult : result.getResults()) { +// //更新账号状态 +// if ( "failed".equals(userResult.getStatus())){ +// if (userResult.getError().contains("登录失败")){ +// LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>(); +// userWrapper.eq(User::getUser_email, userResult.getUser_email()); +// userWrapper.set(User::getStatus, "无法登录"); +// userService.update(userWrapper); +// //更新所有提问词的状态 +// questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keyword.getKeyword_id()) +// .set(Question::getStatus, "failed") +// .set(Question::getError, "账户登录失败")); +// +// }else if (userResult.getError().contains("信息错误")){ +// LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>(); +// userWrapper.eq(User::getUser_email, userResult.getUser_email()); +// userWrapper.set(User::getStatus, "信息错误"); +// userService.update(userWrapper); +// } +// } +// for (QuestionResult questionResult : userResult.getQuestions_results()) { +// try { +// Question question = questionMap.get(questionResult.getQuestion()); +// if (question != null) { +// +// +// //保存问题结果 +// QuestionResultList questionResultList = new QuestionResultList(); +// questionResultList.setKeyword_id(keyword.getKeyword_id()); +// questionResultList.setQuestion(questionResult.getQuestion()); +// questionResultList.setResponse(questionResult.getResponse()); +// questionResultList.setStatus(questionResult.getStatus()); +// questionResultList.setExtracted_count(questionResult.getExtracted_count()); +// questionResultList.setKeyword_task_id(result.getTask_id()); +// questionResultList.setError(questionResult.getError()); +// questionResultList.setNum(keyword.getNum()); +// if (questionResult.getTimestamp() != null) { +// DateTimeFormatter formatter = DateTimeFormatter +// .ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"); +// questionResultList.setTimestamp( +// LocalDateTime.parse(questionResult.getTimestamp(), formatter)); +// } +// // 保存问题结果列表(新增保存逻辑) +// questionResultService.save(questionResultList); +// // 查询当前轮次下该提问词的所有结果 +// List<QuestionResultList> allResults = questionResultService.list( +// new LambdaQueryWrapper<QuestionResultList>() +// .eq(QuestionResultList::getKeyword_id, keyword.getKeyword_id()) +// .eq(QuestionResultList::getQuestion, question.getQuestion()) +// .eq(QuestionResultList::getNum, keyword.getNum()) +// ); +// +// // 判断最终状态 +// String finalStatus = determineFinalStatus(allResults); +// if ("success".equals(finalStatus)){ +// question.setStatus("success"); +// question.setError(""); +// }else if ("no_results".equals(finalStatus)){ +// question.setStatus("success"); +// question.setError("采集结果无引用数据"); +// }else if ("busyness".equals(finalStatus)){ +// question.setStatus("failed"); +// question.setError("DeepSeek繁忙,请稍后尝试"); +// } +// +// // 更新问题对象 +// question.setResponse(questionResult.getResponse()); +// question.setExtracted_count(questionResult.getExtracted_count()); +//// question.setError(questionResult.getError()); +// question.setKeyword_id(keyword.getKeyword_id()); +// +// if (questionResult.getTimestamp() != null) { +// DateTimeFormatter formatter = DateTimeFormatter +// .ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"); +// question.setTimestamp( +// LocalDateTime.parse(questionResult.getTimestamp(), formatter)); +// } +// +// questionsToUpdate.add(question); +// // 初始化引用列表(避免null) +// List<Reference> references = new ArrayList<>(); +// List<TaskResultResponse.Reference> originalReferences = questionResult.getReferences(); +// if (originalReferences == null) { +// originalReferences = Collections.emptyList(); +// } +// +// // 遍历原始引用列表,转换为Reference对象 +// for (TaskResultResponse.Reference ref : originalReferences) { +// Reference reference = new Reference(); +// // 设置基本字段 +// reference.setQuestion_id(question.getQuestion_id()); +// reference.setTitle(ref.getTitle()); +// reference.setUrl(ref.getUrl()); +// reference.setDomain(ref.getDomain()); +// reference.setNum(keyword.getNum()); +// reference.setTask_id(result.getTask_id()); +// reference.setKeyword_id(keyword.getKeyword_id()); +// if (null!=ref.getPublish_time()) { +// reference.setCreate_time(ref.getPublish_time().atStartOfDay()); +// } +// +// // 关键:使用优化后的方法获取平台,避免重复创建 +// Platform platform = getOrCreatePlatform(ref.getDomain(),ref.getPlatform_name()); +// reference.setPlatform_id(platform.getPlatform_id()); +// reference.setType_id(platform.getType_id()); // 直接从平台获取类型ID,更可靠 +// // 添加到结果列表 +// references.add(reference); +// } +// // 添加到总引用列表 +// if (!references.isEmpty()) { +// allReferences.addAll(references); +// } +// +// //取数据库中当前关键词的当前轮次的当前问题id结果拿出来 +// List<Reference> dbList = referenceService.list(new LambdaQueryWrapper<Reference>().eq(Reference::getKeyword_id, keyword.getKeyword_id()) +// .eq(Reference::getNum, keyword.getNum()) +// .eq(Reference::getQuestion_id, question.getQuestion_id()) +// ); +// +// // 1. 合并两个列表 +// List<Reference> combinedList = new ArrayList<>(); +// combinedList.addAll(allReferences); +// combinedList.addAll(dbList); +// +// // 2. 创建复合键的Map,用于统计完全匹配的记录 +// Map<String, List<Reference>> compositeKeyMap = combinedList.stream() +// .collect(Collectors.groupingBy( +// ref -> ref.getTitle() + "|" + ref.getUrl() + "|" + ref.getDomain() +// )); +// +// // 3. 处理每组重复记录 +// compositeKeyMap.forEach((key, refGroup) -> { +// // 3.1 找出组内有ID的记录(优先从dbList中获取) +// Optional<Reference> existingRecord = refGroup.stream() +// .filter(ref -> ref.getReference_id() != null) +// .findFirst(); +// +// // 3.2 统计该组的重复次数(总数-1) +// int repetitionCount = refGroup.size() - 1; +// +// // 3.3 决定最终保留的记录 +// Reference recordToSave = new Reference(); +// if (existingRecord.isPresent()) { +// // 使用已有ID的记录并更新重复次数 +// recordToSave = existingRecord.get(); +// recordToSave.setRepetition_num( +// (recordToSave.getRepetition_num() == null ? 1 : recordToSave.getRepetition_num()) +// + repetitionCount +// ); +// } else { +// // 没有ID记录则取第一条并设置重复次数 +// recordToSave = refGroup.get(0); +// recordToSave.setRepetition_num(1+repetitionCount); +// } +// +// resultList.add(recordToSave); +// }); +// referenceService.saveOrUpdateBatch(resultList); +// } +// } catch (Exception e) { +// log.error(e.getMessage(), e); +// System.out.println("处理问题结果失败: " + e.getMessage()); +// } +// } +// +// // } -// 更新关键词状态 - - String orderId = keyword.getOrder_id(); - if (orderId == null || orderId.isEmpty()) { - System.out.println("关键词[" + keyword.getKeyword_id() + "]未关联订单,跳过订单状态更新"); - return; - } - - // 2. 查询该订单下的所有关键词 - LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>(); - orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId); - List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper); - - if (orderKeywords.isEmpty()) { - System.out.println("订单[" + orderId + "]下无关键词,跳过状态更新"); - return; - } - // 3. 检查所有关键词的状态是否均为 completed 或 false - boolean allValid = orderKeywords.stream() - .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus())); - - // 4. 若所有关键词状态均有效,更新订单状态为3 - if (allValid) { - Orders orders = orderService.getById(orderId); - if (orders != null) { - orders.setStatus(3); // 假设Orders有Integer类型的status字段 - orderService.updateById(orders); - System.out.println("订单[" + orderId + "]所有关键词状态符合条件,已更新状态为3"); - } else { - System.out.println("未找到订单[" + orderId + "],无法更新状态"); - } - } - boolean allValid2 = orderKeywords.stream() - .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "canceled".equals(k.getStatus())); - if (allValid) { - Orders orders = orderService.getById(orderId); - if (orders != null) { - orders.setStatus(1); // 假设Orders有Integer类型的status字段 - orderService.updateById(orders); - System.out.println("订单[" + orderId + "]所有关键词采集完成或者取消,已更新状态为1"); - } else { - System.out.println("未找到订单[" + orderId + "],无法更新状态"); - } - } - - Orders orders = orderService.getById(keyword.getOrder_id()); - - // 2. 批量查询所有问题 - LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>(); - queryWrapper.eq(Question::getKeyword_id, keyword.getKeyword_id()); - List<Question> questions = questionService.list(queryWrapper); - - // 构建问题映射表,用于快速查找 - Map<String, Question> questionMap = questions.stream() - .collect(Collectors.toMap(Question::getQuestion, q -> q)); - - // 3. 收集所有需要更新的问题和引用 - List<Question> questionsToUpdate = new ArrayList<>(); - List<Reference> allReferences = new ArrayList<>(); - List<Reference> resultList = new ArrayList<>(); - // 遍历结果 - for (UserResult userResult : result.getResults()) { - for (QuestionResult questionResult : userResult.getQuestions_results()) { - try { - Question question = questionMap.get(questionResult.getQuestion()); - if (question != null) { - // 更新问题对象 - question.setStatus(questionResult.getStatus()); - question.setResponse(questionResult.getResponse()); - question.setExtracted_count(questionResult.getExtracted_count()); - question.setError(questionResult.getError()); - question.setKeyword_id(keyword.getKeyword_id()); - - // 解析时间戳 - if (questionResult.getTimestamp() != null) { - DateTimeFormatter formatter = DateTimeFormatter - .ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"); - question.setTimestamp( - LocalDateTime.parse(questionResult.getTimestamp(), formatter)); - } - //更新 -// questionService.updateById(question); - - questionsToUpdate.add(question); - - /* List<Reference> references = - Optional.ofNullable(questionResult.getReferences()) - .orElse(Collections.emptyList()) - .stream() - .map(ref -> { - Reference reference = new Reference(); - reference.setQuestion_id(question.getQuestion_id()); - reference.setTitle(ref.getTitle()); - reference.setUrl(ref.getUrl()); - reference.setDomain(ref.getDomain()); - reference.setNum(keyword.getNum()); - reference.setTask_id(result.getTask_id()); - reference.setKeyword_id(keyword.getKeyword_id()); - //域名和平台id映射 - reference.setCreate_time(LocalDateTime.now()); - Platform platform = platformService.getPlatformByDomain(reference.getDomain()); - if (platform == null) { - //平台为空 创建平台 类型为“默认” - Type type = typeService.getOne(new LambdaQueryWrapper<Type>().eq(Type::getType_name,"默认")); - if (type == null) { - Type newType = new Type(); - newType.setType_name("默认"); - typeService.save(newType); - type = newType; - } - Platform platform1 = new Platform(); - platform1.setDomain(reference.getDomain()); - platform1.setPlatform_name(reference.getDomain()); - platform1.setType_id(type.getType_id()); - platformService.save(platform1); - - reference.setType_id(type.getType_id()); - reference.setPlatform_id(platform1.getPlatform_id()); - - } - else { - reference.setPlatform_id(platform.getPlatform_id()); - Type type = typeService.getById(platform.getType_id()); - if (type != null){ - reference.setType_id(type.getType_id()); - } - } - return reference; - }) - .collect(Collectors.toList());*/ - // 初始化引用列表(避免null) - List<Reference> references = new ArrayList<>(); - List<TaskResultResponse.Reference> originalReferences = questionResult.getReferences(); - if (originalReferences == null) { - originalReferences = Collections.emptyList(); - } - - // 遍历原始引用列表,转换为Reference对象 - for (TaskResultResponse.Reference ref : originalReferences) { // 注意:需将“原引用类型”替换为实际类型(如QuestionResult中的引用类型) - Reference reference = new Reference(); - // 设置基本字段 - reference.setQuestion_id(question.getQuestion_id()); - reference.setTitle(ref.getTitle()); - reference.setUrl(ref.getUrl()); - reference.setDomain(ref.getDomain()); - reference.setNum(keyword.getNum()); - reference.setTask_id(result.getTask_id()); - reference.setKeyword_id(keyword.getKeyword_id()); - reference.setCreate_time(LocalDateTime.now()); - - /* // 处理平台和类型关联 - Platform platform = platformService.getPlatformByDomain(reference.getDomain()); - if (platform == null) { - // 平台不存在,创建新平台(类型默认为“默认”) - Type type = typeService.getOne(new LambdaQueryWrapper<Type>().eq(Type::getType_name, "默认")); - if (type == null) { - Type newType = new Type(); - newType.setType_name("默认"); - typeService.save(newType); - type = newType; - } - Platform platform1 = new Platform(); - platform1.setDomain(reference.getDomain()); - platform1.setPlatform_name(reference.getDomain()); - platform1.setType_id(type.getType_id()); - platform1.setCreate_time(LocalDateTime.now()); - platformService.save(platform1); - - // 关联新平台和类型 - reference.setType_id(type.getType_id()); - reference.setPlatform_id(platform1.getPlatform_id()); - } else { - // 平台已存在,直接关联 - reference.setPlatform_id(platform.getPlatform_id()); - Type type = typeService.getById(platform.getType_id()); - if (type != null) { - reference.setType_id(type.getType_id()); - } - } -*/ - // 关键:使用优化后的方法获取平台,避免重复创建 - Platform platform = getOrCreatePlatform(ref.getDomain()); - reference.setPlatform_id(platform.getPlatform_id()); - reference.setType_id(platform.getType_id()); // 直接从平台获取类型ID,更可靠 - // 添加到结果列表 - references.add(reference); - } - // 添加到总引用列表 - if (!references.isEmpty()) { - allReferences.addAll(references); - } - - //取数据库中当前关键词的当前轮次的当前问题id结果拿出来 - List<Reference> dbList = referenceService.list(new LambdaQueryWrapper<Reference>().eq(Reference::getKeyword_id, keyword.getKeyword_id()) - .eq(Reference::getNum, keyword.getNum()) - .eq(Reference::getQuestion_id, question.getQuestion_id()) - ); - - // 1. 合并两个列表 - List<Reference> combinedList = new ArrayList<>(); - combinedList.addAll(allReferences); - combinedList.addAll(dbList); - - // 2. 创建复合键的Map,用于统计完全匹配的记录 - Map<String, List<Reference>> compositeKeyMap = combinedList.stream() - .collect(Collectors.groupingBy( - ref -> ref.getTitle() + "|" + ref.getUrl() + "|" + ref.getDomain() - )); - - // 3. 处理每组重复记录 - - compositeKeyMap.forEach((key, refGroup) -> { - // 3.1 找出组内有ID的记录(优先从dbList中获取) - Optional<Reference> existingRecord = refGroup.stream() - .filter(ref -> ref.getReference_id() != null) - .findFirst(); - - // 3.2 统计该组的重复次数(总数-1) - int repetitionCount = refGroup.size() - 1; - - // 3.3 决定最终保留的记录 - Reference recordToSave = new Reference(); - if (existingRecord.isPresent()) { - // 使用已有ID的记录并更新重复次数 - recordToSave = existingRecord.get(); - recordToSave.setRepetition_num( - (recordToSave.getRepetition_num() == null ? 1 : recordToSave.getRepetition_num()) - + repetitionCount - ); - } else { - // 没有ID记录则取第一条并设置重复次数 - recordToSave = refGroup.get(0); - recordToSave.setRepetition_num(1+repetitionCount); - } - - resultList.add(recordToSave); - }); - referenceService.saveOrUpdateBatch(resultList); - } - } catch (Exception e) { - log.error(e.getMessage(), e); - System.out.println("处理问题结果失败: " + e.getMessage()); - } - } - //更新账号状态 - if ( "failed".equals(userResult.getStatus())){ - if (userResult.getError().contains("登录失败")){ - LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>(); - userWrapper.eq(User::getUser_email, userResult.getUser_email()); - userWrapper.set(User::getStatus, "无法登录"); - userService.update(userWrapper); - - }else if (userResult.getError().contains("信息错误")){ - LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>(); - userWrapper.eq(User::getUser_email, userResult.getUser_email()); - userWrapper.set(User::getStatus, "信息错误"); - userService.update(userWrapper); - } - } - - } - - // 4. 批量更新问题 - System.out.println(questionsToUpdate); - if (!questionsToUpdate.isEmpty()) { - questionService.updateBatchById(questionsToUpdate); - System.out.println("成功批量更新 " + questionsToUpdate.size() + " 个问题"); - } - - // 5. 批量插入引用,使用流式分批处理 -// if (!allReferences.isEmpty()) { -// int batchSize = 1000; -// IntStream.iterate(0, i -> i + batchSize) -// .limit((allReferences.size() + batchSize - 1) / batchSize) -// .forEach(i -> { -// List<Reference> batch = allReferences.subList( -// i, Math.min(i + batchSize, allReferences.size())); -// referenceService.saveBatch(batch); -// }); -// System.out.println("成功批量插入 " + allReferences.size() + " 条引用数据"); +// +// // 4. 批量更新问题 +// System.out.println(questionsToUpdate); +// if (!questionsToUpdate.isEmpty()) { +// questionService.updateBatchById(questionsToUpdate); +// System.out.println("成功批量更新 " + questionsToUpdate.size() + " 个问题"); // } +// +// } catch (Exception e) { +// log.error("更新问题和引用数据失败: " ,e.getMessage(), e); +// throw new RuntimeException("更新问题和引用数据失败", e); +// } +// }); +// } +// // 根据所有批次的结果判断最终状态 +// private String determineFinalStatus(List<QuestionResultList> results) { +// if (results.isEmpty()) { +// return "no_results"; // 无结果 +// } +// +// // 统计关键指标 +// int totalCount = results.size(); +// int emptyResponseCount = 0; +// int systemBusyCount = 0; +// +// for (QuestionResultList result : results) { +// // 判断回答是否为空 +// if (result.getExtracted_count() == 0 ) { +// emptyResponseCount++; +// } +// +// // 判断是否为系统繁忙 +// if ("success".equals(result.getStatus()) && (result.getResponse().isEmpty()|| result.getResponse().contains("WebDriver连接中断") || result.getResponse().contains("响应超时"))) { +// systemBusyCount++; +// } +// +// } +// +// // 全返回系统繁忙 +// if (systemBusyCount == totalCount) { +// return "busyness"; +// } +// // 全返回信息为空 +// if (emptyResponseCount == totalCount) { +// return "no_results"; +// } +// +// +// // 系统繁忙比例超过阈值(可配置,这里设为70%) +//// double busyRate = (double) systemBusyCount / totalCount; +//// if (busyRate >= 0.7) { +//// return "系统繁忙,请稍后尝试"; +//// } +// +// // 其他情况返回成功 +// return "success"; +// } + private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { + return Mono.fromRunnable(() -> doUpdateQuestionAndReference(result)) + .onErrorResume(e -> { + log.error("处理任务结果失败", e); + return Mono.error(e); // 传播异常,触发事务回滚 + }).then(); + } - } catch (Exception e) { - log.error("更新问题和引用数据失败: " ,e.getMessage(), e); -// System.out.println("更新问题和引用数据失败: " + e.getMessage()); - throw new RuntimeException("更新问题和引用数据失败", e); + // 核心业务逻辑,添加事务注解保证原子性 + @Transactional(rollbackFor = Exception.class) + public void doUpdateQuestionAndReference(TaskResultResponse result) { + try { + // 1. 查询关键词任务并更新状态 + LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>(); + keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id()); + KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper); + if (keywordTask == null) { + throw new Exception("未找到关键词任务,task_id: " + result.getTask_id()); } + keywordTask.setStatus("completed"); + keywordTaskService.updateById(keywordTask); + + // 2. 查询关键词信息 + Keyword keyword = keywordService.getById(keywordTask.getKeyword_id()); + if (keyword == null) { + throw new Exception("未找到关联的关键词,task_id: " + result.getTask_id()); + } + + // 3. 更新关键词状态(基于关联任务状态) + updateKeywordStatus(keyword); + + // 4. 更新订单状态(基于关键词状态) + updateOrderStatus(keyword); + + // 5. 预查询问题列表(一次查询,内存映射) + LambdaQueryWrapper<Question> questionWrapper = new LambdaQueryWrapper<>(); + questionWrapper.eq(Question::getKeyword_id, keyword.getKeyword_id()); + List<Question> questions = questionService.list(questionWrapper); + Map<String, Question> questionMap = questions.stream() + .collect(Collectors.toMap(Question::getQuestion, q -> q)); + + // 6. 收集批量保存的数据(避免循环内保存) + List<QuestionResultList> questionResultsToSave = new ArrayList<>(); // 批量保存问题结果 + List<Reference> allReferences = new ArrayList<>(); // 收集所有引用,后续统一处理 + + // 7. 遍历结果处理问题和引用 + for (UserResult userResult : result.getResults()) { + // 7.1 更新用户状态(失败处理) + handleUserStatus(userResult, keyword); + + // 7.2 处理问题结果 + for (QuestionResult questionResult : userResult.getQuestions_results()) { + Question question = questionMap.get(questionResult.getQuestion()); + if (question == null) { + log.warn("未找到问题记录: {}", questionResult.getQuestion()); + continue; + } + + // 7.2.1 构建问题结果并加入批量列表 + QuestionResultList questionResultList = buildQuestionResultList(questionResult, keyword, result); + questionResultsToSave.add(questionResultList); + + // 7.2.2 处理引用数据(仅收集,不立即保存) + List<Reference> references = buildReferences(questionResult, question, keyword, result); + allReferences.addAll(references); + } + } + + // 8. 批量保存问题结果(一次数据库交互) + if (!questionResultsToSave.isEmpty()) { + questionResultService.saveBatch(questionResultsToSave); + log.info("批量保存问题结果 {} 条", questionResultsToSave.size()); + } + + // 9. 批量处理引用数据(去重+更新重复次数+批量保存) + if (!allReferences.isEmpty()) { + handleReferencesInBatch(allReferences, keyword); + } + + // 10. 批量更新问题状态(基于最终结果) + updateQuestionsStatusInBatch(questions, keyword); + + } catch (Exception e) { + log.error("更新数据失败", e); + throw new RuntimeException("更新数据失败", e); // 触发事务回滚 + } + } + + // 更新关键词状态 + private void updateKeywordStatus(Keyword keyword) { + LambdaQueryWrapper<KeywordTask> taskWrapper = new LambdaQueryWrapper<>(); + taskWrapper.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()); + List<KeywordTask> keywordTasks = keywordTaskService.list(taskWrapper); + + boolean allCompletedOrFailed = keywordTasks.stream() + .allMatch(task -> "completed".equals(task.getStatus()) + || "false".equals(task.getStatus()) + || "cancelled".equals(task.getStatus()) + || "canceled".equals(task.getStatus()) + || "nonentity".equals(task.getStatus())); + + if (allCompletedOrFailed) { + keyword.setStatus("completed"); + keywordService.updateById(keyword); + } + } + + // 更新订单状态 + private void updateOrderStatus(Keyword keyword) { + String orderId = keyword.getOrder_id(); + if (orderId == null || orderId.isEmpty()) { + log.info("关键词[{}]未关联订单,跳过订单更新", keyword.getKeyword_id()); + return; + } + + LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>(); + orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId); + List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper); + if (orderKeywords.isEmpty()) { + log.info("订单[{}]无关键词,跳过状态更新", orderId); + return; + } + + boolean allValid2 = orderKeywords.stream() + .allMatch(k -> "completed".equals(k.getStatus()) + || "false".equals(k.getStatus()) + || "cancelled".equals(k.getStatus())); + if (allValid2) { + updateOrderStatus(orderId, 1, "所有关键词采集完成或取消"); + } + + boolean allValid = orderKeywords.stream() + .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus())); + if (allValid) { + updateOrderStatus(orderId, 3, "所有关键词状态符合条件"); + } + } + + // 封装订单状态更新 + private void updateOrderStatus(String orderId, Integer status, String logMsg) { + Orders orders = orderService.getById(orderId); + if (orders != null) { + orders.setStatus(status); + orderService.updateById(orders); + log.info("订单[{}]{},已更新状态为{}", orderId, logMsg, status); + } else { + log.warn("未找到订单[{}],无法更新状态", orderId); + } + } + + // 构建问题结果对象 + private QuestionResultList buildQuestionResultList(QuestionResult questionResult, Keyword keyword, TaskResultResponse result) { + QuestionResultList questionResultList = new QuestionResultList(); + questionResultList.setKeyword_id(keyword.getKeyword_id()); + questionResultList.setQuestion(questionResult.getQuestion()); + questionResultList.setResponse(questionResult.getResponse()); + questionResultList.setStatus(questionResult.getStatus()); + questionResultList.setExtracted_count(questionResult.getExtracted_count()); + questionResultList.setKeyword_task_id(result.getTask_id()); + questionResultList.setError(questionResult.getError()); + questionResultList.setNum(keyword.getNum()); + + if (questionResult.getTimestamp() != null) { + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"); + questionResultList.setTimestamp(LocalDateTime.parse(questionResult.getTimestamp(), formatter)); + } + return questionResultList; + } + + // 构建引用数据列表 + private List<Reference> buildReferences(QuestionResult questionResult, Question question, Keyword keyword, TaskResultResponse result) { + List<Reference> references = new ArrayList<>(); + List<TaskResultResponse.Reference> originalReferences = questionResult.getReferences(); + if (originalReferences == null) { + return references; + } + + for (TaskResultResponse.Reference ref : originalReferences) { + // 过滤无效引用(标题、URL、域名不能为空) + if (ref.getTitle() == null || ref.getUrl() == null || ref.getDomain() == null) { + log.warn("引用数据字段缺失,跳过:title={}, url={}, domain={}", + ref.getTitle(), ref.getUrl(), ref.getDomain()); + continue; + } + + Reference reference = new Reference(); + reference.setQuestion_id(question.getQuestion_id()); + reference.setTitle(ref.getTitle()); + reference.setUrl(ref.getUrl()); + reference.setDomain(ref.getDomain()); + reference.setNum(keyword.getNum()); + reference.setTask_id(result.getTask_id()); + reference.setKeyword_id(keyword.getKeyword_id()); + + if (ref.getPublish_time() != null) { + reference.setCreate_time(ref.getPublish_time().atStartOfDay()); + } + + Platform platform = getOrCreatePlatform(ref.getDomain(), ref.getPlatform_name()); + // 校验平台信息非空 + if (platform == null || platform.getPlatform_id() == null) { + log.warn("平台信息无效,跳过引用:domain={}", ref.getDomain()); + continue; + } + reference.setPlatform_id(platform.getPlatform_id()); + reference.setType_id(platform.getType_id()); + references.add(reference); + } + return references; + } + + // 批量处理引用数据(去重+更新重复次数) + private void handleReferencesInBatch(List<Reference> allReferences, Keyword keyword) { + // 1. 过滤原始列表中的 null 元素 + List<Reference> validReferences = allReferences.stream() + .filter(Objects::nonNull) + .collect(Collectors.toList()); + + if (validReferences.isEmpty()) { + log.info("无有效引用数据,跳过批量保存"); + return; + } +// // 在合并到 compositeKeyMap 之前 +// validReferences = validReferences.stream() +// .collect(Collectors.toMap( +// ref -> ref.getQuestion_id() +// + "|" + ref.getPlatform_id() +// + "|" + ref.getType_id() +// + "|" + ref.getUrl() +// + "|" + ref.getDomain() +// + "|" + ref.getKeyword_id() +// + "|" + ref.getNum(), +// ref -> ref, +// (r1, r2) -> r1 // 碰到同 key 就保第一个 +// )) +// .values() +// .stream() +// .collect(Collectors.toList()); + + // 2. 查询数据库中已存在的引用并过滤 null + LambdaQueryWrapper<Reference> dbRefWrapper = new LambdaQueryWrapper<>(); + dbRefWrapper.eq(Reference::getKeyword_id, keyword.getKeyword_id()) + .eq(Reference::getNum, keyword.getNum()); + List<Reference> dbReferences = referenceService.list(dbRefWrapper); + List<Reference> validDbReferences = dbReferences.stream() + .filter(Objects::nonNull) + .collect(Collectors.toList()); + + // 3. 合并并去重 + Map<String, List<Reference>> compositeKeyMap = new HashMap<>(); + validReferences.forEach(ref -> addToCompositeMap(compositeKeyMap, ref)); + validDbReferences.forEach(ref -> addToCompositeMap(compositeKeyMap, ref)); + + // 4. 处理重复次数 + List<Reference> referencesToSave = new ArrayList<>(); + compositeKeyMap.forEach((key, refGroup) -> { + List<Reference> validRefGroup = refGroup.stream() + .filter(Objects::nonNull) + .collect(Collectors.toList()); + if (validRefGroup.isEmpty()) return; + + Optional<Reference> existingRef = validRefGroup.stream() + .filter(ref -> ref.getReference_id() != null) + .findFirst(); + + Reference finalRef = existingRef.orElse(validRefGroup.get(0)); +// 重复次数 = 老的 + (这一组里一共抓到多少-1) + int repetitionNum = (finalRef.getRepetition_num() == null ? 1 : finalRef.getRepetition_num()) + + (validRefGroup.size() - 1); + finalRef.setRepetition_num(repetitionNum); + referencesToSave.add(finalRef); }); + + // 5. 最终校验并保存 + List<Reference> finalSaveList = referencesToSave.stream() + .filter(Objects::nonNull) + .collect(Collectors.toList()); + + // 关键校验:列表非空且元素有效 + if (finalSaveList.isEmpty()) { + log.info("处理后无有效引用数据可保存"); + return; + } + if (finalSaveList.stream().anyMatch(ref -> !(ref instanceof Reference))) { + log.error("引用数据类型异常,无法保存"); + return; + } + + // 执行保存 + try { + referenceService.saveOrUpdateBatch(finalSaveList); + log.info("批量保存引用数据成功,数量:{}", finalSaveList.size()); + } catch (Exception e) { + log.error("批量保存引用数据失败", e); + throw new RuntimeException("保存引用数据失败", e); + } + } + + // 辅助方法:将引用添加到复合键Map + private void addToCompositeMap(Map<String, List<Reference>> map, Reference ref) { + // 再次校验引用的核心字段非空 + if (ref.getTitle() == null || ref.getUrl() == null || ref.getDomain() == null) { + log.warn("引用核心字段为空,跳过映射:{}", ref); + return; + } + String key = ref.getQuestion_id() + "|" + + ref.getPlatform_id() + "|" + + ref.getType_id() + "|" + + ref.getTitle() + "|" + + ref.getUrl() + "|" + + ref.getDomain() + "|" + + ref.getNum(); + map.computeIfAbsent(key, k -> new ArrayList<>()).add(ref); + } + + // 批量更新问题状态 + private void updateQuestionsStatusInBatch(List<Question> questions, Keyword keyword) { + if (questions.isEmpty()) { + return; + } + + // 一次性查询所有问题结果(基于关键词+轮次) + LambdaQueryWrapper<QuestionResultList> resultWrapper = new LambdaQueryWrapper<>(); + resultWrapper.eq(QuestionResultList::getKeyword_id, keyword.getKeyword_id()) + .eq(QuestionResultList::getNum, keyword.getNum()); + List<QuestionResultList> allQuestionResults = questionResultService.list(resultWrapper); + + // 按问题分组,便于查询 + Map<String, List<QuestionResultList>> questionResultsMap = allQuestionResults.stream() + .collect(Collectors.groupingBy(QuestionResultList::getQuestion)); + + // 批量更新问题状态 + List<Question> questionsToUpdate = new ArrayList<>(); + questions.forEach(question -> { + List<QuestionResultList> results = questionResultsMap.getOrDefault(question.getQuestion(), Collections.emptyList()); + String finalStatus = determineFinalStatus(results); + + if ("success".equals(finalStatus)) { + question.setStatus("success"); + question.setError(""); + } else if ("no_results".equals(finalStatus)) { + question.setStatus("success"); + question.setError("采集结果无引用数据"); + } else if ("busyness".equals(finalStatus)) { + question.setStatus("failed"); + question.setError("DeepSeek繁忙,请稍后尝试"); + } + // 更新其他字段(响应、时间戳等) + results.stream().findFirst().ifPresent(result -> { + question.setResponse(result.getResponse()); + question.setExtracted_count(result.getExtracted_count()); + question.setTimestamp(result.getTimestamp()); + }); + questionsToUpdate.add(question); + }); + + if (!questionsToUpdate.isEmpty()) { + questionService.updateBatchById(questionsToUpdate); + log.info("批量更新问题状态 {} 条", questionsToUpdate.size()); + } + } + + // 处理用户状态异常 + private void handleUserStatus(UserResult userResult, Keyword keyword) { + if ("failed".equals(userResult.getStatus())) { + LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>(); + userWrapper.eq(User::getUser_email, userResult.getUser_email()); + + if (userResult.getError().contains("登录失败")) { + userWrapper.set(User::getStatus, "无法登录"); + userService.update(userWrapper); + // 批量更新问题状态为失败 + questionService.update(new LambdaUpdateWrapper<Question>() + .eq(Question::getKeyword_id, keyword.getKeyword_id()) + .set(Question::getStatus, "failed") + .set(Question::getError, "账户登录失败")); + } else if (userResult.getError().contains("信息错误")) { + userWrapper.set(User::getStatus, "信息错误"); + userService.update(userWrapper); + // 批量更新问题状态为失败 + questionService.update(new LambdaUpdateWrapper<Question>() + .eq(Question::getKeyword_id, keyword.getKeyword_id()) + .set(Question::getStatus, "failed") + .set(Question::getError, "账户信息错误")); + } + } + } + + // 原方法:判断最终状态(复用) + private String determineFinalStatus(List<QuestionResultList> results) { + if (results.isEmpty()) { + return "no_results"; + } + + int totalCount = results.size(); + int emptyResponseCount = 0; + int systemBusyCount = 0; + + for (QuestionResultList result : results) { + if (result.getExtracted_count() == 0) { + emptyResponseCount++; + } + if ("success".equals(result.getStatus()) && + (result.getResponse() == null || result.getResponse().isEmpty() + || result.getResponse().contains("WebDriver连接中断") + || result.getResponse().contains("响应超时"))) { + systemBusyCount++; + } + } + + if (systemBusyCount == totalCount) { + return "busyness"; + } + if (emptyResponseCount == totalCount) { + return "no_results"; + } + return "success"; } @GetMapping("/tasks/all") @@ -1231,8 +1393,6 @@ TaskListResponse response = new TaskListResponse(); response.setDetail("获取任务列表失败: " + e.getMessage()); return Mono.just(response); - - // return Mono.just(ResponseResult.error("获取任务列表失败: " + e.getMessage())); }); } @@ -1260,9 +1420,7 @@ .onErrorResume(e -> Mono.just( new ServerResourceResponse( e.getMessage()))); } - /** - * 传入orderid查所有关键词id以及关键词下面的所有任务id,轮询所有任务状态,如果状态为completed,则循环调用获取结果接口,处理结果 - */ + } -- Gitblit v1.7.1