From a9287c6b562da327587e2a4bac92df14eb7e2b01 Mon Sep 17 00:00:00 2001 From: guyue <1721849008@qq.com> Date: 星期六, 26 七月 2025 19:16:14 +0800 Subject: [PATCH] 增加获取结果缓冲区的上限 --- src/main/java/com/linghu/controller/CollectController.java | 1030 +++++++++++++++++++++++++++++++++----------------------- 1 files changed, 603 insertions(+), 427 deletions(-) diff --git a/src/main/java/com/linghu/controller/CollectController.java b/src/main/java/com/linghu/controller/CollectController.java index da004b9..23cf78b 100644 --- a/src/main/java/com/linghu/controller/CollectController.java +++ b/src/main/java/com/linghu/controller/CollectController.java @@ -4,6 +4,8 @@ import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.*; +import java.util.concurrent.*; +import java.util.concurrent.locks.ReentrantLock; import java.util.stream.Collectors; import javax.servlet.http.HttpServletRequest; @@ -16,7 +18,9 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.ParameterizedTypeReference; +import org.springframework.dao.DuplicateKeyException; import org.springframework.http.*; +import org.springframework.web.reactive.function.client.ExchangeStrategies; import org.springframework.web.reactive.function.client.WebClient; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; @@ -35,10 +39,6 @@ import com.linghu.model.dto.TaskResultResponse.QuestionResult; import com.linghu.model.dto.TaskResultResponse.UserResult; import reactor.core.scheduler.Schedulers; - -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import java.util.stream.Stream; @RestController @RequestMapping("/collect") @@ -71,129 +71,189 @@ private UserService userService; @Autowired private OrderService orderService; + @Autowired + private QuestionResultService questionResultService; + // 替换为线程安全队列 + private static final Queue<SearchTaskRequest> taskQueue = new ConcurrentLinkedQueue<>(); + // 全局映射:关键词ID -> 批次队列 + private static final ConcurrentMap<Integer, Queue<List<UserDto>>> batchQueues = new ConcurrentHashMap<>(); - /* @PostMapping("/search") +// private static boolean isProcessing = false; + private static volatile boolean isProcessing = false; // 添加 volatile + + @PostMapping("/search") @ApiOperation(value = "开始采集") - public Mono<SearchTaskResponse> createSearchTask( + public Mono<ResponseResult<?>> createSearchTask( @RequestBody SearchTaskRequest searchTaskRequest, HttpServletRequest request) throws JsonProcessingException { - return webClient.post() - .uri(baseUrl + "/api/v1/search") - .contentType(MediaType.APPLICATION_JSON) - .bodyValue(searchTaskRequest) - .retrieve() - .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class) - .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody)))) - .bodyToMono(new ParameterizedTypeReference<SearchTaskResponse>() { - }) - .flatMap(responseResult -> { - // 提取任务ID - SearchTaskResponse taskResponse = responseResult; - if (taskResponse != null && taskResponse.getTask_id() != null) { - // 保存任务ID到关键词 - LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>(); - updateWrapper.eq(Keyword::getKeyword_id, searchTaskRequest.getKeyword_id()); - updateWrapper.set(Keyword::getStatus,"Submitted"); - updateWrapper.set(Keyword::getTask_id, taskResponse.getTask_id()); - keywordService.update(updateWrapper); + // 首先检查服务器资源 + return getServerResource() + .flatMap(resourceResponse -> { + double cpuUsage = parseUsage(resourceResponse.getCpu_usage_percent()); + double memoryUsage = parseUsage(resourceResponse.getMemory_usage_percent()); - // 可选:更新响应中的其他信息 + if (cpuUsage >= 90.0 || memoryUsage >= 90.0) { + String errorMsg = String.format("服务器资源不足,请稍后再试"); + + log.warn(errorMsg); + return Mono.just(ResponseResult.error(503, errorMsg)); } - return Mono.just(taskResponse); + + // 将新的任务请求加入队列 + taskQueue.add(searchTaskRequest); + + // 如果当前没有任务在处理中,则启动任务队列的处理 + if (!isProcessing) { + processNextTaskInQueue(); + } + + // 返回响应,通知用户任务已开始 + return Mono.just(ResponseResult.success("任务已加入队列,正在处理...")); }) .onErrorResume(e -> { - // return Mono.just(ResponseResult.error("调用失败: " + e.getMessage())); - SearchTaskResponse task = new SearchTaskResponse(); - task.setMessage("调用失败: " + e.getMessage()); - return Mono.just(task); + log.error("检查服务器资源失败: {}", e.getMessage(), e); + return Mono.just(ResponseResult.error("检查服务器资源失败: " + e.getMessage())); }); - }*/ + } -// public SearchTaskController(WebClient.Builder webClientBuilder, KeywordService keywordService) { -// this.webClient = webClientBuilder.build(); -// this.keywordService = keywordService; -// } + private void processNextTaskInQueue() { + // 设置为正在处理 + isProcessing = true; - /* @PostMapping("/search") - @ApiOperation(value = "开始采集") - public Mono<SearchTaskResponse> createSearchTask( - @RequestBody SearchTaskRequest searchTaskRequest, - HttpServletRequest request) throws JsonProcessingException { + // 从队列中取出下一个任务 + SearchTaskRequest nextTaskRequest = taskQueue.poll(); + if (nextTaskRequest != null) { + Integer keywordId = nextTaskRequest.getKeyword_id(); + log.info("开始处理任务队列,keywordId: {}", keywordId); + + executeBatchTask(nextTaskRequest) + .doFinally(signal -> { + isProcessing = false; + if (!taskQueue.isEmpty()) { + processNextTaskInQueue(); + } + }) + .subscribe( + result -> log.info("任务处理完成,keywordId: {}", keywordId), // 成功日志 + error -> { // 关键:添加错误处理 + log.error("任务队列处理异常,keywordId: {}", keywordId, error); + } + ); + } else { + isProcessing = false; // 无任务时重置状态 + } + } + private Mono<ResponseResult<String>> executeBatchTask(SearchTaskRequest searchTaskRequest) { + Integer keywordId = searchTaskRequest.getKeyword_id(); + // int maxConcurrentUsers = searchTaskRequest.getConfig() != null ? searchTaskRequest.getConfig().getMax_concurrent_users() : 3; - List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers); + List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers, keywordId,searchTaskRequest.getIs_first()); - return processBatchesSequentially(userBatches, searchTaskRequest) - .onErrorResume(e -> { - SearchTaskResponse task = new SearchTaskResponse(); - task.setMessage("调用失败: " + e.getMessage()); - return Mono.just(task); + + // 创建批次队列并存入全局映射 + Queue<List<UserDto>> batchQueue = new ConcurrentLinkedQueue<>(userBatches); + batchQueues.put(keywordId, batchQueue); // 存储到全局映射 + + return Mono.just(ResponseResult.success("第一个批次已开始")) + .doOnTerminate(() -> { + executeBatchTask(batchQueue, searchTaskRequest, keywordId) + .subscribe( + result -> log.info("批次任务启动成功,keywordId: {}", keywordId), + error -> { // 处理批次执行异常 + log.error("批次任务执行异常,keywordId: {}", keywordId, error); + // 可选:异常时清理资源 + batchQueues.remove(keywordId); + } + ); }); } + private Mono<ResponseResult<?>> executeBatchTask(Queue<List<UserDto>> batchQueue, SearchTaskRequest searchTaskRequest, Integer keywordId) { + // 如果队列为空,说明所有批次已经完成 - private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize) { - List<List<UserDto>> batches = new ArrayList<>(); - for (int i = 0; i < users.size(); i += batchSize) { - batches.add(users.subList(i, Math.min(i + batchSize, users.size()))); + if (batchQueue == null || batchQueue.isEmpty()) { + // 清理资源 + batchQueues.remove(keywordId); + return Mono.just(ResponseResult.success("所有批次已完成")); } - return batches; - } - private Mono<SearchTaskResponse> processBatchesSequentially(List<List<UserDto>> userBatches, SearchTaskRequest originalRequest) { - Mono<SearchTaskResponse> resultMono = Mono.empty(); - for (List<UserDto> batch : userBatches) { - SearchTaskRequest batchRequest = new SearchTaskRequest(); - batchRequest.setUsers(batch); - batchRequest.setQuestions(originalRequest.getQuestions()); - batchRequest.setConfig(originalRequest.getConfig()); - batchRequest.setSave_to_database(originalRequest.getSave_to_database()); - batchRequest.setWebhook_url(originalRequest.getWebhook_url()); - batchRequest.setKeyword_id(originalRequest.getKeyword_id()); + List<UserDto> currentBatch = batchQueue.poll(); // 从队列中获取当前批次 + SearchTaskRequest batchRequest = new SearchTaskRequest(); + batchRequest.setUsers(currentBatch); + batchRequest.setQuestions(searchTaskRequest.getQuestions()); + batchRequest.setConfig(searchTaskRequest.getConfig()); + batchRequest.setSave_to_database(searchTaskRequest.getSave_to_database()); + batchRequest.setWebhook_url(searchTaskRequest.getWebhook_url()); + batchRequest.setKeyword_id(keywordId); - resultMono = resultMono.then(createSingleBatchTask(batchRequest)); - } - return resultMono; - } - - private Mono<SearchTaskResponse> createSingleBatchTask(SearchTaskRequest batchRequest) { - return webClient.post() - .uri(baseUrl + "/api/v1/search") - .contentType(MediaType.APPLICATION_JSON) - .bodyValue(batchRequest) - .retrieve() - .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class) - .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody)))) - .bodyToMono(new ParameterizedTypeReference<SearchTaskResponse>() { - }) - .flatMap(responseResult -> { - SearchTaskResponse taskResponse = responseResult; + return createSingleBatchTask(batchRequest) + .flatMap(taskResponse -> { if (taskResponse != null && taskResponse.getTask_id() != null) { - LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>(); - updateWrapper.eq(Keyword::getKeyword_id, batchRequest.getKeyword_id()); - updateWrapper.set(Keyword::getStatus, "Submitted"); - updateWrapper.set(Keyword::getTask_id, taskResponse.getTask_id()); - keywordService.update(updateWrapper); + // 直接等待任务完成,不再保存任务关联信息 + return waitForTaskCompletion(taskResponse.getTask_id(), batchQueue, searchTaskRequest, keywordId); + } else { + return Mono.just(ResponseResult.error("创建批次任务失败")); } - return waitForTaskCompletion(taskResponse.getTask_id()) - .then(Mono.just(taskResponse)); + }) + .onErrorResume(e -> { + log.error("调用第三方接口失败: {}", e.getMessage(), e); // 关键日志 + return Mono.error(new RuntimeException("调用第三方接口失败: " + e.getMessage())); + }) + .doFinally(signal -> { + // 任务完成时清理资源 + if (batchQueue.isEmpty()) { + batchQueues.remove(keywordId); + } }); } - private Mono<Void> waitForTaskCompletion(String taskId) { - return Flux.interval(Duration.ofSeconds(5)) // 每5秒执行一次 - .flatMap(tick -> webClient.get() - .uri(baseUrl + "/api/v1/tasks/" + taskId) - .retrieve() - .bodyToMono(TaskStatusResponse.class) - ) - .filter(response -> "completed".equals(response.getStatus())) - .next() // 找到第一个完成的响应后结束流 - .then(); // 转换为Mono<Void> - }*/ + private Mono<ResponseResult<?>> waitForTaskCompletion(String taskId, Queue<List<UserDto>> batchQueue, SearchTaskRequest searchTaskRequest, Integer keywordId) { + // 查询任务状态 + return getTaskStatus(taskId) + .flatMap(statusResponse -> { + // 检查任务是否被取消 + if ("cancelled".equalsIgnoreCase(statusResponse.getStatus())) { + batchQueues.remove(keywordId); // 清理资源 + return Mono.just(ResponseResult.success("任务已被取消")); + } + // 如果任务状态是"submitted"或"running",继续轮询 + if (!"completed".equalsIgnoreCase(statusResponse.getStatus()) && !"failed".equalsIgnoreCase(statusResponse.getStatus()) && !"cancelled".equalsIgnoreCase(statusResponse.getStatus()) && !("ERROR".equalsIgnoreCase(statusResponse.getStatus()) && statusResponse.getMessage().contains("Task not found")) ) { + return Mono.delay(Duration.ofSeconds(5)) // 延迟 5 秒后再次查询 + .flatMap(aLong -> waitForTaskCompletion(taskId, batchQueue, searchTaskRequest, keywordId)); // 递归调用继续等待 + } else { + // 如果状态为其他状态,则继续处理下一个批次 + return executeBatchTask(batchQueue, searchTaskRequest, keywordId); + } + }) + .onErrorResume(e -> { + // 处理查询任务状态时的错误 + return Mono.just(ResponseResult.error("查询任务状态失败: " + e.getMessage())); + }); + } + @ApiOperation(value = "查询任务状态") + @GetMapping("/status") + public Mono<TaskStatusResponse> getTaskStatus(String taskId) { + return webClient.get() + .uri(baseUrl + "/api/v1/tasks/" + taskId) + .accept(MediaType.APPLICATION_JSON) + .retrieve() + .onStatus(HttpStatus::isError, response -> response.bodyToMono(TaskStatusResponse.class) + .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody.getDetail())))) + .bodyToMono(TaskStatusResponse.class) + .onErrorResume(e -> { + // 处理错误,创建一个自定义的错误响应对象 + TaskStatusResponse errorResponse = new TaskStatusResponse(); + errorResponse.setStatus("ERROR"); + errorResponse.setMessage(e.getMessage()); + errorResponse.setDetail(e.getMessage()); + return Mono.just(errorResponse); + }); + } + // 添加一个辅助方法来安全地将字符串转换为double private double parseUsage(String usageStr) { try { @@ -208,103 +268,42 @@ return 0.0; } } - @PostMapping("/search") - @ApiOperation(value = "开始采集") - public Mono<ResponseResult<?>> createSearchTask( - @RequestBody SearchTaskRequest searchTaskRequest, - HttpServletRequest request) throws JsonProcessingException { - // 首先检查服务器资源 - return getServerResource() - .flatMap(resourceResponse -> { - // 将字符串类型的使用率转换为double类型 - double cpuUsage = parseUsage(resourceResponse.getCpu_usage_percent()); - double memoryUsage = parseUsage(resourceResponse.getMemory_usage_percent()); - // 检查CPU和内存使用率 - if (cpuUsage >= 90.0 || memoryUsage >= 90.0) { - String errorMsg = String.format("服务器资源不足:CPU使用率 %.1f%%,内存使用率 %.1f%%", - resourceResponse.getCpu_usage_percent(), resourceResponse.getMemory_usage_percent()); - log.warn(errorMsg); - return Mono.just(ResponseResult.error(503, errorMsg)); - } - Integer keywordId = searchTaskRequest.getKeyword_id(); - - int maxConcurrentUsers = searchTaskRequest.getConfig() != null ? - searchTaskRequest.getConfig().getMax_concurrent_users() : 3; - List<List<UserDto>> userBatches = splitUsersIntoBatches(searchTaskRequest.getUsers(), maxConcurrentUsers,keywordId); - - return Flux.fromIterable(userBatches) - .flatMap(batch -> { - SearchTaskRequest batchRequest = new SearchTaskRequest(); - batchRequest.setUsers(batch); - batchRequest.setQuestions(searchTaskRequest.getQuestions()); - batchRequest.setConfig(searchTaskRequest.getConfig()); - batchRequest.setSave_to_database(searchTaskRequest.getSave_to_database()); - batchRequest.setWebhook_url(searchTaskRequest.getWebhook_url()); - batchRequest.setKeyword_id(keywordId); - - return createSingleBatchTask(batchRequest) - .delaySubscription(Duration.ofSeconds(2)); // 批次之间添加延迟 - }, 1) // 限制并发数为1,确保顺序执行 - .collectList() // 收集所有批次的响应 - .flatMap(responses -> - saveKeywordTasks(keywordId, responses) // 保存关联关系 - .thenReturn(responses) // 返回原始响应 - ) - .map(responses -> ResponseResult.success(responses)) // 使用ResponseResult包装结果 - .onErrorResume(e -> { - log.error("创建搜索任务失败: {}", e.getMessage(), e); - return Mono.just(ResponseResult.error("创建搜索任务失败: " + e.getMessage())); - }); - }) - .onErrorResume(e -> { - log.error("检查服务器资源失败: {}", e.getMessage(), e); - return Mono.just(ResponseResult.error("检查服务器资源失败: " + e.getMessage())); - }); - } - - private Mono<Void> saveKeywordTasks(Integer keywordId, List<SearchTaskResponse> taskResponses) { - List<KeywordTask> keywordTasks = taskResponses.stream() - .filter(response -> response.getTask_id() != null) - .map(response -> { - KeywordTask keywordTask = new KeywordTask(); - keywordTask.setKeyword_id(keywordId); - keywordTask.setTask_id(response.getTask_id()); - keywordTask.setStatus("pending"); - return keywordTask; - }) - .collect(Collectors.toList()); - - // 将 MyBatis-Plus 的同步方法包装为 Mono<Void> - return Mono.fromRunnable(() -> { - boolean success = keywordTaskService.saveOrUpdateBatch(keywordTasks); - if (!success) { -// throw new RuntimeException("保存关键词任务关联失败"); - // 添加异常处理 - Mono.error( new RuntimeException("保存关键词任务关联失败")); - } - }) - .doFinally(signalType -> log.info("成功保存 {} 个关键词任务关联", keywordTasks.size())) - .then(); - } - - private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize,Integer keywordId) { + private List<List<UserDto>> splitUsersIntoBatches(List<UserDto> users, int batchSize,Integer keywordId,Boolean isFirst) { Keyword keyword = keywordService.getById(keywordId); - if (null==keyword.getNum()){ - keyword.setNum(0); + if (isFirst){ + keyword.setNum(1); + }else { + keyword.setNum(keyword.getNum()+1); } - keyword.setNum(keyword.getNum()+1); keywordService.updateById(keyword); List<List<UserDto>> batches = new ArrayList<>(); for (int i = 0; i < users.size(); i += batchSize) { batches.add(users.subList(i, Math.min(i + batchSize, users.size()))); + } + for (int i = 0; i < batches.size(); i++){ + // 创建 KeywordTask 关联,task_id 设置为 null,表示任务尚未开始 + KeywordTask keywordTask = new KeywordTask(); + keywordTask.setKeyword_id(keywordId); + keywordTask.setTask_id(null); // 任务ID为空 + + keywordTask.setNum(keyword.getNum()); + keywordTaskService.save(keywordTask); // 保存 KeywordTask + } + + return batches; } private Mono<SearchTaskResponse> createSingleBatchTask(SearchTaskRequest batchRequest) { + // 记录请求第三方的基本信息(便于排查) + String thirdPartyUrl = baseUrl + "/api/v1/search"; + Integer keywordId = batchRequest.getKeyword_id(); + log.info("开始向第三方提交任务,keywordId: {}, URL: {}, 请求参数: {}", + keywordId, thirdPartyUrl, batchRequest.toString()); // 打印请求参数(建议用工具类转JSON) return webClient.post() .uri(baseUrl + "/api/v1/search") .contentType(MediaType.APPLICATION_JSON) @@ -312,12 +311,24 @@ .retrieve() .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class) .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody)))) + // 处理第三方返回的5xx服务器错误(如第三方服务异常) + .onStatus(HttpStatus::is5xxServerError, response -> + response.bodyToMono(String.class) + .flatMap(errorBody -> { + String errorMsg = String.format("第三方接口5xx错误,keywordId: %d, URL: %s, 状态码: %d, 错误详情: %s", + keywordId, thirdPartyUrl, response.statusCode().value(), errorBody); + log.error(errorMsg); + return Mono.error(new RuntimeException(errorMsg)); + }) + ) .bodyToMono(new ParameterizedTypeReference<SearchTaskResponse>() {}) .flatMap(taskResponse -> { if (taskResponse != null && taskResponse.getTask_id() != null) { // 使用 Reactor 的方式更新数据库 return Mono.fromRunnable(() -> { + + //更新关键词状态 LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>(); updateWrapper.eq(Keyword::getKeyword_id, batchRequest.getKeyword_id()); updateWrapper.set(Keyword::getStatus, "submitted"); @@ -325,15 +336,36 @@ keywordService.update(updateWrapper); //设置轮数 Keyword keyword = keywordService.getById(batchRequest.getKeyword_id()); + // 更新关键词任务与任务ID的关联 + // 获取与关键词相关的任务,task_id 为 null,确保只取一个任务 + List<KeywordTask> keywordTasks = keywordTaskService.list(new LambdaQueryWrapper<KeywordTask>() + .eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()) + .eq(KeywordTask::getNum, keyword.getNum()) + .isNull(KeywordTask::getTask_id)); + if (keywordTasks.size() > 0) { + KeywordTask keywordTask = keywordTasks.get(0); + keywordTask.setTask_id(taskResponse.getTask_id()); + keywordTask.setStatus("pending"); + keywordTaskService.updateById(keywordTask); + } + //将提问词列表的状态转为pending + for (String questionName : batchRequest.getQuestions()) { + questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keyword.getKeyword_id()).eq(Question::getQuestion,questionName).set(Question::getStatus, "pending")); - //设置订单进入采集状态 - LambdaUpdateWrapper<Orders> updateOrderWrapper = new LambdaUpdateWrapper<>(); - updateOrderWrapper.eq(Orders::getOrder_id, keyword.getOrder_id()) // 确保字段名正确 - .set(Orders::getStatus, 2); // 直接设置状态值 + } + //所有关键词都在采集中或者已完成或者错误设置订单进入采集状态 + List<Keyword> orderKeywords = keywordService.list(new LambdaQueryWrapper<Keyword>() + .eq(Keyword::getOrder_id, keyword.getOrder_id())); + if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k -> + "completed".equals(k.getStatus()) || "submitted".equals(k.getStatus()) + )) { + Orders orders = orderService.getById(keyword.getOrder_id()); + if (orders != null) { + orders.setStatus(2); + orderService.updateById(orders); - boolean success = orderService.update(updateOrderWrapper); - log.info("订单状态更新结果: {}", success ? "成功" : "失败"); - + } + } }).subscribeOn(Schedulers.boundedElastic()) // 在弹性线程池执行 .thenReturn(taskResponse); } @@ -341,99 +373,191 @@ }); } - // 移除原来的waitForTaskCompletion方法,不再需要同步等待 - @ApiOperation(value = "查询任务状态") - @GetMapping("/status") - public Mono<TaskStatusResponse> getTaskStatus(String taskId) { - return webClient.get() - .uri(baseUrl + "/api/v1/tasks/" + taskId) - .accept(MediaType.APPLICATION_JSON) - .retrieve() - .onStatus(HttpStatus::isError, response -> response.bodyToMono(TaskStatusResponse.class) - .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody.getDetail())))) - .bodyToMono(TaskStatusResponse.class) - .flatMap(result -> { - TaskStatusResponse taskStatusResponse = result; - if (taskStatusResponse != null && taskStatusResponse.getStatus() != null) { - List<Question> updateQuestions = taskStatusResponse.getQuestions_status().stream() - .map(qs -> { - Question question = new Question(); - question.setQuestion_id(qs.getQuestion_id()); - question.setStatus(qs.getStatus()); - return question; - }).collect(Collectors.toList()); + @PostMapping("/cancel/{keywordId}") + @ApiOperation(value = "取消任务") + public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable Integer keywordId) { + // 1. 从主队列移除任务 + List<SearchTaskRequest> removedMainQueueTasks = removeTasksFromQueueByKeywordId(keywordId); + int removedMainQueueCount = removedMainQueueTasks.size(); // 获取移除的任务数量 - // 包装成响应式操作 - return Mono.fromCallable(() -> { - questionService.updateBatchById(updateQuestions); - return result; - }); + // 2. 从批次队列移除任务 (新增逻辑) + int removedBatchQueue = removeBatchTasksByKeywordId(keywordId); - } - return Mono.just(result); - }) - .onErrorResume(e -> { - // 创建一个自定义的错误响应对象 - TaskStatusResponse errorResponse = new TaskStatusResponse(); - errorResponse.setStatus("ERROR"); - errorResponse.setMessage(e.getMessage()); - errorResponse.setDetail(e.getMessage()); + // 3. 查询所有与关键词相关的任务 + List<KeywordTask> tasks = keywordTaskService.list( + new LambdaQueryWrapper<KeywordTask>().eq(KeywordTask::getKeyword_id, keywordId) + ); - return Mono.just(errorResponse); + // 4. 筛选出需要远程取消的任务 + List<KeywordTask> tasksToCancelRemotely = tasks.stream() + .filter(task -> task.getTask_id() != null && "pending".equalsIgnoreCase(task.getStatus())) + .collect(Collectors.toList()); + + return Flux.fromIterable(tasksToCancelRemotely) + .flatMap(task -> { + // 创建状态更新和远程取消的组合操作 + Mono<Void> updateStatus = updateTaskStatus(task.getTask_id(), "cancelled"); + Mono<ResponseResult<?>> cancelOp = cancelRemoteTask(task.getTask_id()) + .onErrorResume(e -> { + log.error("取消任务 {} 失败: {}", task.getTask_id(), e.getMessage()); + return Mono.just(ResponseResult.error("取消任务失败: " + e.getMessage())); + }); + + return Mono.zip(cancelOp, updateStatus) + .thenReturn(true); + }, 10) + .collectList() + .flatMap(canceledTasks -> { + return updateKeywordAndOrderStatus(keywordId) + .thenReturn(ResponseResult.success( + new TaskCancelResponse( + String.format("任务已取消: 主队列移除%d, 批次队列移除%d, 远程取消%d", + removedMainQueueCount , + removedBatchQueue, + tasksToCancelRemotely.size()) + ) + )); }); } - @PostMapping("/cancel/{taskId}") - @ApiOperation(value = "取消任务") - public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable String taskId) { + // 新增方法:移除批次队列 + private int removeBatchTasksByKeywordId(Integer keywordId) { + Queue<List<UserDto>> batchQueue = batchQueues.remove(keywordId); + if (batchQueue != null) { + int count = batchQueue.size(); + batchQueue.clear(); + log.info("从批次队列中移除关键词 {} 的 {} 个批次任务", keywordId, count); + return count; + } + return 0; + } + // 辅助方法:获取待取消任务 + private List<KeywordTask> getTasksToCancel(Integer keywordId) { + return keywordTaskService.list( + new LambdaQueryWrapper<KeywordTask>() + .eq(KeywordTask::getKeyword_id, keywordId) + .isNotNull(KeywordTask::getTask_id) + .eq(KeywordTask::getStatus, "pending") + ); + } + + // 提取关键词和订单状态更新的逻辑为单独方法 + private Mono<Void> updateKeywordAndOrderStatus(Integer keywordId) { + return Mono.fromRunnable(() -> { + try { + // 查询关键词 + Keyword keyword = keywordService.getById(keywordId); + if (keyword == null) { + log.warn("未找到关键词,keywordId: {}", keywordId); + return; + } + + //把任务id为空的删除 + LambdaUpdateWrapper<KeywordTask> updateWrapper = new LambdaUpdateWrapper<>(); + updateWrapper.eq(KeywordTask::getKeyword_id, keywordId); + updateWrapper.isNull(KeywordTask::getTask_id); + keywordTaskService.remove(updateWrapper); + // 查询该关键词下的所有任务 + LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>(); + keywordTaskWrapper.eq(KeywordTask::getKeyword_id, keywordId); + + List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper); + + // 更新关键词状态 + keyword.setStatus("completed"); + keywordService.updateById(keyword); + + //更新提问词状态为取消 + questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keywordId).isNull(Question::getResponse).set(Question::getStatus, "cancelled")); + + // 更新订单状态 + String orderId = keyword.getOrder_id(); + if (orderId != null && !orderId.isEmpty()) { + // 查询订单下所有关键词 + LambdaQueryWrapper<Keyword> orderKeywordsWrapper = new LambdaQueryWrapper<>(); + orderKeywordsWrapper.eq(Keyword::getOrder_id, orderId); + List<Keyword> orderKeywords = keywordService.list(orderKeywordsWrapper); + + // 所有关键词均已完成,则更新订单状态为3 + if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k -> + "completed".equals(k.getStatus()) || "false".equals(k.getStatus()) + )) { + Orders orders = orderService.getById(orderId); + if (orders != null) { + orders.setStatus(3); + orderService.updateById(orders); + log.info("订单 {} 所有关键词已完成,更新状态为3", orderId); + } + } + if (!orderKeywords.isEmpty() && orderKeywords.stream().allMatch(k -> + !"completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "cancelled".equals(k.getStatus()) + )) { + Orders orders = orderService.getById(orderId); + if (orders != null) { + orders.setStatus(1); + orderService.updateById(orders); + log.info("订单 {} 所有关键词已完成或者取消,更新状态为1", orderId); + } + } + } + } catch (Exception e) { + log.error("更新关键词和订单状态失败: {}", e.getMessage(), e); + } + }); + } +private List<SearchTaskRequest> removeTasksFromQueueByKeywordId(Integer keywordId) { + List<SearchTaskRequest> removedTasks = new ArrayList<>(); + + Iterator<SearchTaskRequest> iterator = taskQueue.iterator(); + while (iterator.hasNext()) { + SearchTaskRequest task = iterator.next(); + if (task.getKeyword_id() != null && task.getKeyword_id().equals(keywordId)) { + removedTasks.add(task); + iterator.remove(); + } + } + + + log.info("从队列中移除了 {} 个与关键词ID {} 相关的任务", removedTasks.size(), keywordId); + return removedTasks; +} + +// 发送远程取消请求 + private Mono<ResponseResult<?>> cancelRemoteTask(String taskId) { + // 使用Collections.singletonMap或手动创建Map + Map<String, Object> requestBody = new HashMap<>(); + requestBody.put("status", "pending"); + return webClient.post() .uri(baseUrl + "/api/v1/tasks/" + taskId + "/cancel") .contentType(MediaType.APPLICATION_JSON) - .bodyValue(Collections.emptyMap()) // 添加空请求体 + .bodyValue(requestBody) .retrieve() - .onStatus(HttpStatus::isError, response -> response.bodyToMono(TaskCancelResponse.class) - .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody.getDetail())))) - .bodyToMono(TaskCancelResponse.class) - .flatMap(cancelResponse -> { - // 更新关键词状态 - Mono<Void> updateKeyword = Mono.fromRunnable(() -> { - LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>(); - updateWrapper.eq(Keyword::getTask_id, taskId); - updateWrapper.set(Keyword::getStatus, "canceled"); // 统一使用"canceled" - keywordService.update(updateWrapper); - }) - .subscribeOn(Schedulers.boundedElastic()) - .then(); - - // 更新关键词任务状态 - Mono<Void> updateKeywordTask = Mono.fromRunnable(() -> { - LambdaUpdateWrapper<KeywordTask> updateWrapper = new LambdaUpdateWrapper<>(); - updateWrapper.eq(KeywordTask::getTask_id, taskId); - updateWrapper.set(KeywordTask::getStatus, "canceled"); // 统一使用"canceled" - keywordTaskService.update(updateWrapper); - }) - .subscribeOn(Schedulers.boundedElastic()) - .then(); - - // 并行执行两个更新操作,并在完成后返回cancelResponse - return Mono.when(updateKeyword, updateKeywordTask) - .thenReturn(cancelResponse); - }) - .map(data -> ResponseResult.success(data)) - .onErrorResume(e -> { - if (e.getMessage().contains("任务不存在")) { - return Mono.just(ResponseResult.error(200, e.getMessage())); - } else if (e.getMessage().contains("无法取消")) { - return Mono.just(ResponseResult.error(200, e.getMessage())); - } - return Mono.just(ResponseResult.error(500, e.getMessage())); - }); + .onStatus(HttpStatus::isError, response -> response.bodyToMono(String.class) + .flatMap(errorBody -> Mono.error(new RuntimeException("取消失败: " + errorBody)))) + .bodyToMono(Void.class) + .thenReturn(ResponseResult.success("任务已取消")); } + // 更新单个任务状态 + private Mono<Void> updateTaskStatus(String taskId, String status) { + return Mono.fromRunnable(() -> { + LambdaUpdateWrapper<KeywordTask> updateWrapper = new LambdaUpdateWrapper<>(); + updateWrapper.eq(KeywordTask::getTask_id, taskId); + updateWrapper.set(KeywordTask::getStatus, status); + keywordTaskService.update(updateWrapper); + }).subscribeOn(Schedulers.boundedElastic()).then(); + } @ApiOperation(value = "获取任务结果") @GetMapping("/tasks/{taskId}") public Mono<TaskResultResponse> getTaskResult(@PathVariable String taskId) { - return webClient.get() + WebClient webClient2 = WebClient.builder() + .exchangeStrategies(ExchangeStrategies.builder() + .codecs(configurer -> configurer.defaultCodecs() + .maxInMemorySize(10 * 1024 * 1024)) // 10MB + .build()) + .build(); + return webClient2.get() .uri(baseUrl + "/api/v1/tasks/" + taskId + "/result") .accept(MediaType.APPLICATION_JSON) .retrieve() @@ -450,6 +574,7 @@ .bodyToMono(TaskResultResponse.class) .flatMap(responseResult -> { TaskResultResponse result = responseResult; + if (result != null && result.getResults() != null) { return updateQuestionAndReference(result) .thenReturn(responseResult); @@ -458,81 +583,64 @@ }) .onErrorResume(e -> { System.out.println("获取任务结果失败"); - if (e.getMessage().contains("登陆失败")){ - - } + log.error("获取任务结果失败: {}", e.getMessage(), e); TaskResultResponse result = new TaskResultResponse(); result.setDetail("获取任务结果失败: " + e.getMessage()); return Mono.just(result); }); } -// private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { -// return Mono.fromRunnable(() -> { -// // 1. 更新关键词状态 -// LambdaUpdateWrapper<Keyword> keywordUpdate = new LambdaUpdateWrapper<>(); -// keywordUpdate.eq(Keyword::getTask_id, result.getTask_id()) -// .set(Keyword::getStatus, "completed"); -// keywordService.update(keywordUpdate); -// -// // 查询关键词ID -// LambdaQueryWrapper<Keyword> keywordQuery = new LambdaQueryWrapper<>(); -// keywordQuery.eq(Keyword::getTask_id, result.getTask_id()); -// Keyword keyword = keywordService.getOne(keywordQuery); -// -// if (keyword == null) { -// System.out.println("未找到关联的关键词,task_id: " + result.getTask_id()); -// return; -// } -// -// // 2. 处理每个用户的问题结果 -// for (UserResult userResult : result.getResults()) { -// for (QuestionResult questionResult : userResult.getQuestions_results()) { -// // 2.1 查询问题ID -// LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>(); -// queryWrapper.eq(Question::getQuestion, questionResult.getQuestion()) -// .eq(Question::getKeyword_id, keyword.getKeyword_id()); -// Question question = questionService.getOne(queryWrapper); -// -// if (question != null) { -// // 更新问题状态 -// LambdaUpdateWrapper<Question> updateWrapper = new LambdaUpdateWrapper<>(); -// updateWrapper.eq(Question::getQuestion_id, question.getQuestion_id()) -// .set(Question::getStatus, questionResult.getStatus()) -// .set(Question::getResponse, questionResult.getResponse()) -// .set(Question::getExtracted_count, questionResult.getExtracted_count()) -// .set(Question::getError, questionResult.getError()) -// .set(Question::getTimestamp, LocalDateTime.parse( -// questionResult.getTimestamp(), -// DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS") -// )); -// questionService.update(updateWrapper); -// -// // 2.2 保存引用数据 -// List<Reference> references = questionResult.getReferences().stream() -// .map(ref -> { -// Reference reference = new Reference(); -// reference.setQuestion_id(question.getQuestion_id()); -// reference.setTitle(ref.getTitle()); -// reference.setUrl(ref.getUrl()); -// reference.setDomain(ref.getDomain()); -// reference.setCreate_time(LocalDateTime.now()); -// return reference; -// }) -// .collect(Collectors.toList()); -// -// if (!references.isEmpty()) { -// referenceService.saveBatch(references); -// } -// } else { -// System.out.println("未找到匹配的问题,question " + question.getQuestion()); -// -// } -// } -// } -// }); -// } + /** + * 获取或创建平台(确保同一domain只创建一次) + * @param domain 平台域名 + * @return 已存在或新创建的Platform + */ + private Platform getOrCreatePlatform(String domain,String platformName) { + // 1. 先尝试查询已存在的平台 + Platform platform = platformService.getPlatformByDomain(domain); + if (platform != null) { + return platform; + } + + // 2. 若不存在,尝试创建(处理并发场景) + try { + // 2.1 获取或创建“默认”类型(Type也需避免重复,建议Type表的type_name也加唯一约束) + Type defaultType = typeService.getOne(new LambdaQueryWrapper<Type>() + .eq(Type::getType_name, "默认")); + if (defaultType == null) { + defaultType = new Type(); + defaultType.setType_name("默认"); + typeService.save(defaultType); // 若Type可能重复,此处也需处理DuplicateKeyException + } + + // 2.2 构建新平台对象 + Platform newPlatform = new Platform(); + newPlatform.setDomain(domain); + if (platformName != null) { + newPlatform.setPlatform_name(platformName); + }else { + newPlatform.setPlatform_name(domain); + } + // 平台名称默认使用域名,可根据实际需求调整 + newPlatform.setType_id(defaultType.getType_id()); + newPlatform.setCreate_time(LocalDateTime.now()); // 补充创建时间 + + // 2.3 尝试保存,若因唯一约束冲突失败,则捕获异常 + platformService.save(newPlatform); + return newPlatform; // 保存成功,返回新创建的平台 + + } catch (DuplicateKeyException e) { + // 3. 若捕获到重复键异常,说明并发创建了,重新查询即可(此时数据库中已存在该平台) + log.warn("平台domain={}已存在,无需重复创建", domain, e); + return platformService.getPlatformByDomain(domain); // 重新查询,一定能获取到 + } catch (Exception e) { + // 处理其他异常(如数据库连接失败等) + log.error("创建平台失败,domain={}", domain, e); + throw new RuntimeException("创建平台失败", e); + } + } + //更新提问词和引用数据 private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { return Mono.fromRunnable(() -> { try { @@ -543,36 +651,28 @@ LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>(); keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id()); KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper); - + keywordTask.setStatus("completed"); + keywordTaskService.updateById(keywordTask); Keyword keyword = keywordService.getById(keywordTask.getKeyword_id()); if (keyword == null) { System.out.println("未找到关联的关键词,task_id: " + result.getTask_id()); //报错 throw new Exception("未找到关联的关键词,task_id: " + result.getTask_id()); -// return; + } LambdaQueryWrapper<KeywordTask> keywordTaskWrapper2 = new LambdaQueryWrapper<>(); keywordTaskWrapper2.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()); List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper2); -// 定义状态优先级:canceled > false > completed - String finalStatus = "completed"; // 默认状态为 completed - - for (KeywordTask task : keywordTasks) { - String status = task.getStatus(); - if ("canceled".equals(status)) { - finalStatus = "canceled"; - break; // 遇到 canceled 直接跳出循环,因为优先级最高 - } else if ("false".equals(status)) { - finalStatus = "false"; - // 不跳出循环,继续检查是否存在 canceled - } - } -// 更新关键词状态 - if (!finalStatus.equals(keyword.getStatus())) { - keyword.setStatus(finalStatus); + //如果全部为completed 或者错误、取消、任务不存在 关键词也为completed ,如果关联关系没有任务id,或者状态为running ,关键词为submitted, + if (keywordTasks.stream().allMatch(task -> "completed".equals(task.getStatus()) || "false".equals(task.getStatus()) || "cancelled".equals(task.getStatus()) ||"canceled".equals(task.getStatus()) || "nonentity".equals(task.getStatus())) ) { + keyword.setStatus("completed"); keywordService.updateById(keyword); + + } + +// 更新关键词状态 String orderId = keyword.getOrder_id(); if (orderId == null || orderId.isEmpty()) { System.out.println("关键词[" + keyword.getKeyword_id() + "]未关联订单,跳过订单状态更新"); @@ -588,7 +688,18 @@ System.out.println("订单[" + orderId + "]下无关键词,跳过状态更新"); return; } - + boolean allValid2 = orderKeywords.stream() + .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "cancelled".equals(k.getStatus())); + if (allValid2) { + Orders orders = orderService.getById(orderId); + if (orders != null) { + orders.setStatus(1); // 假设Orders有Integer类型的status字段 + orderService.updateById(orders); + System.out.println("订单[" + orderId + "]所有关键词采集完成或者取消,已更新状态为1"); + } else { + System.out.println("未找到订单[" + orderId + "],无法更新状态"); + } + } // 3. 检查所有关键词的状态是否均为 completed 或 false boolean allValid = orderKeywords.stream() .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus())); @@ -604,7 +715,8 @@ System.out.println("未找到订单[" + orderId + "],无法更新状态"); } } - } + + Orders orders = orderService.getById(keyword.getOrder_id()); // 2. 批量查询所有问题 @@ -620,77 +732,116 @@ List<Question> questionsToUpdate = new ArrayList<>(); List<Reference> allReferences = new ArrayList<>(); List<Reference> resultList = new ArrayList<>(); + // 遍历结果 for (UserResult userResult : result.getResults()) { + //更新账号状态 + if ( "failed".equals(userResult.getStatus())){ + if (userResult.getError().contains("登录失败")){ + LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>(); + userWrapper.eq(User::getUser_email, userResult.getUser_email()); + userWrapper.set(User::getStatus, "无法登录"); + userService.update(userWrapper); + //更新所有提问词的状态 + questionService.update(new LambdaUpdateWrapper<Question>().eq(Question::getKeyword_id, keyword.getKeyword_id()) + .set(Question::getStatus, "failed") + .set(Question::getError, "账户登录失败")); + + }else if (userResult.getError().contains("信息错误")){ + LambdaUpdateWrapper<User> userWrapper = new LambdaUpdateWrapper<>(); + userWrapper.eq(User::getUser_email, userResult.getUser_email()); + userWrapper.set(User::getStatus, "信息错误"); + userService.update(userWrapper); + } + } for (QuestionResult questionResult : userResult.getQuestions_results()) { try { Question question = questionMap.get(questionResult.getQuestion()); if (question != null) { + + + //保存问题结果 + QuestionResultList questionResultList = new QuestionResultList(); + questionResultList.setKeyword_id(keyword.getKeyword_id()); + questionResultList.setQuestion(questionResult.getQuestion()); + questionResultList.setResponse(questionResult.getResponse()); + questionResultList.setStatus(questionResult.getStatus()); + questionResultList.setExtracted_count(questionResult.getExtracted_count()); + questionResultList.setKeyword_task_id(result.getTask_id()); + questionResultList.setError(questionResult.getError()); + questionResultList.setNum(keyword.getNum()); + if (questionResult.getTimestamp() != null) { + DateTimeFormatter formatter = DateTimeFormatter + .ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"); + questionResultList.setTimestamp( + LocalDateTime.parse(questionResult.getTimestamp(), formatter)); + } + // 保存问题结果列表(新增保存逻辑) + questionResultService.save(questionResultList); + // 查询当前轮次下该提问词的所有结果 + List<QuestionResultList> allResults = questionResultService.list( + new LambdaQueryWrapper<QuestionResultList>() + .eq(QuestionResultList::getKeyword_id, keyword.getKeyword_id()) + .eq(QuestionResultList::getQuestion, question.getQuestion()) + .eq(QuestionResultList::getNum, keyword.getNum()) + ); + + // 判断最终状态 + String finalStatus = determineFinalStatus(allResults); + if ("success".equals(finalStatus)){ + question.setStatus("success"); + question.setError(""); + }else if ("no_results".equals(finalStatus)){ + question.setStatus("success"); + question.setError("采集结果无引用数据"); + }else if ("busyness".equals(finalStatus)){ + question.setStatus("failed"); + question.setError("DeepSeek繁忙,请稍后尝试"); + } + // 更新问题对象 - question.setStatus(questionResult.getStatus()); question.setResponse(questionResult.getResponse()); question.setExtracted_count(questionResult.getExtracted_count()); - question.setError(questionResult.getError()); +// question.setError(questionResult.getError()); question.setKeyword_id(keyword.getKeyword_id()); - // 解析时间戳 if (questionResult.getTimestamp() != null) { DateTimeFormatter formatter = DateTimeFormatter .ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"); question.setTimestamp( LocalDateTime.parse(questionResult.getTimestamp(), formatter)); } - //更新 -// questionService.updateById(question); questionsToUpdate.add(question); + // 初始化引用列表(避免null) + List<Reference> references = new ArrayList<>(); + List<TaskResultResponse.Reference> originalReferences = questionResult.getReferences(); + if (originalReferences == null) { + originalReferences = Collections.emptyList(); + } - List<Reference> references = - Optional.ofNullable(questionResult.getReferences()) - .orElse(Collections.emptyList()) - .stream() - .map(ref -> { - Reference reference = new Reference(); - reference.setQuestion_id(question.getQuestion_id()); - reference.setTitle(ref.getTitle()); - reference.setUrl(ref.getUrl()); - reference.setDomain(ref.getDomain()); - reference.setNum(keyword.getNum()); - reference.setTask_id(result.getTask_id()); - reference.setKeyword_id(keyword.getKeyword_id()); - //域名和平台id映射 - reference.setCreate_time(LocalDateTime.now()); - Platform platform = platformService.getPlatformByDomain(reference.getDomain()); - if (platform == null) { - //平台为空 创建平台 类型为“默认” - Type type = typeService.getOne(new LambdaQueryWrapper<Type>().eq(Type::getType_name,"默认")); - if (type == null) { - Type newType = new Type(); - newType.setType_name("默认"); - typeService.save(newType); - type = newType; - } - Platform platform1 = new Platform(); - platform1.setDomain(reference.getDomain()); - platform1.setPlatform_name(reference.getDomain()); - platform1.setType_id(type.getType_id()); - platformService.save(platform1); + // 遍历原始引用列表,转换为Reference对象 + for (TaskResultResponse.Reference ref : originalReferences) { + Reference reference = new Reference(); + // 设置基本字段 + reference.setQuestion_id(question.getQuestion_id()); + reference.setTitle(ref.getTitle()); + reference.setUrl(ref.getUrl()); + reference.setDomain(ref.getDomain()); + reference.setNum(keyword.getNum()); + reference.setTask_id(result.getTask_id()); + reference.setKeyword_id(keyword.getKeyword_id()); + if (null!=ref.getPublish_time()) { + reference.setCreate_time(ref.getPublish_time().atStartOfDay()); + } - reference.setType_id(type.getType_id()); - reference.setPlatform_id(platform1.getPlatform_id()); - - } - else { - reference.setPlatform_id(platform.getPlatform_id()); - Type type = typeService.getById(platform.getType_id()); - if (type != null){ - reference.setType_id(type.getType_id()); - } - } - return reference; - }) - .collect(Collectors.toList()); - + // 关键:使用优化后的方法获取平台,避免重复创建 + Platform platform = getOrCreatePlatform(ref.getDomain(),ref.getPlatform_name()); + reference.setPlatform_id(platform.getPlatform_id()); + reference.setType_id(platform.getType_id()); // 直接从平台获取类型ID,更可靠 + // 添加到结果列表 + references.add(reference); + } // 添加到总引用列表 if (!references.isEmpty()) { allReferences.addAll(references); @@ -725,18 +876,18 @@ int repetitionCount = refGroup.size() - 1; // 3.3 决定最终保留的记录 - Reference recordToSave; + Reference recordToSave = new Reference(); if (existingRecord.isPresent()) { // 使用已有ID的记录并更新重复次数 recordToSave = existingRecord.get(); recordToSave.setRepetition_num( - (recordToSave.getRepetition_num() == null ? 0 : recordToSave.getRepetition_num()) + (recordToSave.getRepetition_num() == null ? 1 : recordToSave.getRepetition_num()) + repetitionCount ); } else { // 没有ID记录则取第一条并设置重复次数 recordToSave = refGroup.get(0); - recordToSave.setRepetition_num(repetitionCount); + recordToSave.setRepetition_num(1+repetitionCount); } resultList.add(recordToSave); @@ -748,6 +899,8 @@ System.out.println("处理问题结果失败: " + e.getMessage()); } } + + } // 4. 批量更新问题 @@ -757,28 +910,55 @@ System.out.println("成功批量更新 " + questionsToUpdate.size() + " 个问题"); } - // 5. 批量插入引用,使用流式分批处理 -// if (!allReferences.isEmpty()) { -// int batchSize = 1000; -// IntStream.iterate(0, i -> i + batchSize) -// .limit((allReferences.size() + batchSize - 1) / batchSize) -// .forEach(i -> { -// List<Reference> batch = allReferences.subList( -// i, Math.min(i + batchSize, allReferences.size())); -// referenceService.saveBatch(batch); -// }); -// System.out.println("成功批量插入 " + allReferences.size() + " 条引用数据"); -// } - - } catch (Exception e) { log.error("更新问题和引用数据失败: " ,e.getMessage(), e); -// System.out.println("更新问题和引用数据失败: " + e.getMessage()); throw new RuntimeException("更新问题和引用数据失败", e); } }); } + // 根据所有批次的结果判断最终状态 + private String determineFinalStatus(List<QuestionResultList> results) { + if (results.isEmpty()) { + return "no_results"; // 无结果 + } + // 统计关键指标 + int totalCount = results.size(); + int emptyResponseCount = 0; + int systemBusyCount = 0; + + for (QuestionResultList result : results) { + // 判断回答是否为空 + if (result.getExtracted_count() == 0 ) { + emptyResponseCount++; + } + + // 判断是否为系统繁忙 + if ("success".equals(result.getStatus()) && (result.getResponse().isEmpty()|| result.getResponse().contains("WebDriver连接中断") || result.getResponse().contains("响应超时"))) { + systemBusyCount++; + } + + } + + // 全返回系统繁忙 + if (systemBusyCount == totalCount) { + return "busyness"; + } + // 全返回信息为空 + if (emptyResponseCount == totalCount) { + return "no_results"; + } + + + // 系统繁忙比例超过阈值(可配置,这里设为70%) +// double busyRate = (double) systemBusyCount / totalCount; +// if (busyRate >= 0.7) { +// return "系统繁忙,请稍后尝试"; +// } + + // 其他情况返回成功 + return "success"; + } @GetMapping("/tasks/all") @ApiOperation(value = "获取所有任务列表") public Mono<TaskListResponse> getAllTasks() { @@ -792,8 +972,6 @@ TaskListResponse response = new TaskListResponse(); response.setDetail("获取任务列表失败: " + e.getMessage()); return Mono.just(response); - - // return Mono.just(ResponseResult.error("获取任务列表失败: " + e.getMessage())); }); } @@ -821,9 +999,7 @@ .onErrorResume(e -> Mono.just( new ServerResourceResponse( e.getMessage()))); } - /** - * 传入orderid查所有关键词id以及关键词下面的所有任务id,轮询所有任务状态,如果状态为completed,则循环调用获取结果接口,处理结果 - */ + } -- Gitblit v1.7.1