From 18b282e5902d286f8c93dbeee19f727698c59e5c Mon Sep 17 00:00:00 2001 From: guyue <1721849008@qq.com> Date: 星期日, 13 七月 2025 04:25:29 +0800 Subject: [PATCH] 在分批次时创建空联系,轮询状态修改, --- src/main/java/com/linghu/controller/CollectController.java | 383 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 331 insertions(+), 52 deletions(-) diff --git a/src/main/java/com/linghu/controller/CollectController.java b/src/main/java/com/linghu/controller/CollectController.java index cfeab53..8882145 100644 --- a/src/main/java/com/linghu/controller/CollectController.java +++ b/src/main/java/com/linghu/controller/CollectController.java @@ -4,6 +4,9 @@ import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.*; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.locks.ReentrantLock; import java.util.stream.Collectors; import javax.servlet.http.HttpServletRequest; @@ -16,6 +19,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.ParameterizedTypeReference; +import org.springframework.dao.DuplicateKeyException; import org.springframework.http.*; import org.springframework.web.reactive.function.client.WebClient; @@ -27,6 +31,7 @@ import io.jsonwebtoken.lang.Collections; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; +import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; import org.springframework.web.bind.annotation.* ; @@ -66,8 +71,12 @@ private UserService userService; @Autowired private OrderService orderService; - private static final Queue<SearchTaskRequest> taskQueue = new LinkedList<>(); + // 1. 使用线程安全的队列实现 + private final BlockingQueue<SearchTaskRequest> taskQueue = new LinkedBlockingQueue<>(); + // 添加队列访问锁 + private final ReentrantLock queueLock = new ReentrantLock(); private static boolean isProcessing = false; + @PostMapping("/search") @ApiOperation(value = "开始采集") public Mono<ResponseResult<?>> createSearchTask( @@ -90,7 +99,13 @@ } // 将新的任务请求加入队列 - taskQueue.add(searchTaskRequest); +// taskQueue.add(searchTaskRequest); + queueLock.lock(); + try { + taskQueue.add(searchTaskRequest); + } finally { + queueLock.unlock(); + } // 如果当前没有任务在处理中,则启动任务队列的处理 if (!isProcessing) { @@ -162,42 +177,48 @@ return createSingleBatchTask(batchRequest) .flatMap(taskResponse -> { +// if (taskResponse != null && taskResponse.getTask_id() != null) { +// // 保存任务关联到数据库 +// return saveKeywordTasks(keywordId, taskResponse) +// .then(waitForTaskCompletion(taskResponse.getTask_id(), batchQueue, searchTaskRequest, keywordId)); +// } else { +// return Mono.just(ResponseResult.error("创建批次任务失败")); +// } if (taskResponse != null && taskResponse.getTask_id() != null) { - // 保存任务关联到数据库 - return saveKeywordTasks(keywordId, taskResponse) - .then(waitForTaskCompletion(taskResponse.getTask_id(), batchQueue, searchTaskRequest, keywordId)); + // 直接等待任务完成,不再保存任务关联信息 + return waitForTaskCompletion(taskResponse.getTask_id(), batchQueue, searchTaskRequest, keywordId); } else { return Mono.just(ResponseResult.error("创建批次任务失败")); } }); } - private Mono<Void> saveKeywordTasks(Integer keywordId, SearchTaskResponse taskResponse) { - if (taskResponse == null || taskResponse.getTask_id() == null) { - return Mono.error(new RuntimeException("任务响应无效或任务ID为空")); - } - - KeywordTask keywordTask = new KeywordTask(); - keywordTask.setKeyword_id(keywordId); - keywordTask.setTask_id(taskResponse.getTask_id()); - keywordTask.setStatus("pending"); - - // 将 MyBatis-Plus 的同步方法包装为 Mono<Void> - return Mono.fromRunnable(() -> { - boolean success = keywordTaskService.saveOrUpdate(keywordTask); - if (!success) { - throw new RuntimeException("保存关键词任务关联失败"); - } - }) - .doFinally(signalType -> log.info("成功保存关键词任务关联: Task ID {}", taskResponse.getTask_id())) - .then(); - } +// private Mono<Void> saveKeywordTasks(Integer keywordId, SearchTaskResponse taskResponse) { +// if (taskResponse == null || taskResponse.getTask_id() == null) { +// return Mono.error(new RuntimeException("任务响应无效或任务ID为空")); +// } +// +// KeywordTask keywordTask = new KeywordTask(); +// keywordTask.setKeyword_id(keywordId); +// keywordTask.setTask_id(taskResponse.getTask_id()); +// keywordTask.setStatus("pending"); +// +// // 将 MyBatis-Plus 的同步方法包装为 Mono<Void> +// return Mono.fromRunnable(() -> { +// boolean success = keywordTaskService.saveOrUpdate(keywordTask); +// if (!success) { +// throw new RuntimeException("保存关键词任务关联失败"); +// } +// }) +// .doFinally(signalType -> log.info("成功保存关键词任务关联: Task ID {}", taskResponse.getTask_id())) +// .then(); +// } private Mono<ResponseResult<?>> waitForTaskCompletion(String taskId, Queue<List<UserDto>> batchQueue, SearchTaskRequest searchTaskRequest, Integer keywordId) { // 查询任务状态 return getTaskStatus(taskId) .flatMap(statusResponse -> { // 如果任务状态是"submitted"或"running",继续轮询 - if ("submitted".equalsIgnoreCase(statusResponse.getStatus()) || "running".equalsIgnoreCase(statusResponse.getStatus())) { + if (!"completed".equalsIgnoreCase(statusResponse.getStatus()) && !"failed".equalsIgnoreCase(statusResponse.getStatus()) && !"cancelled".equalsIgnoreCase(statusResponse.getStatus()) ) { return Mono.delay(Duration.ofSeconds(5)) // 延迟 5 秒后再次查询 .flatMap(aLong -> waitForTaskCompletion(taskId, batchQueue, searchTaskRequest, keywordId)); // 递归调用继续等待 } else { @@ -338,7 +359,19 @@ List<List<UserDto>> batches = new ArrayList<>(); for (int i = 0; i < users.size(); i += batchSize) { batches.add(users.subList(i, Math.min(i + batchSize, users.size()))); + } + for (int i = 0; i < batches.size(); i++){ + // 创建 KeywordTask 关联,task_id 设置为 null,表示任务尚未开始 + KeywordTask keywordTask = new KeywordTask(); + keywordTask.setKeyword_id(keywordId); + keywordTask.setTask_id(null); // 任务ID为空 + + keywordTask.setNum(keyword.getNum());// 任务初始状态为 pending + keywordTaskService.save(keywordTask); // 保存 KeywordTask + } + + return batches; } @@ -356,6 +389,8 @@ // 使用 Reactor 的方式更新数据库 return Mono.fromRunnable(() -> { + + //更新关键词状态 LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>(); updateWrapper.eq(Keyword::getKeyword_id, batchRequest.getKeyword_id()); updateWrapper.set(Keyword::getStatus, "submitted"); @@ -363,6 +398,26 @@ keywordService.update(updateWrapper); //设置轮数 Keyword keyword = keywordService.getById(batchRequest.getKeyword_id()); + // 更新关键词任务与任务ID的关联 + // 获取与关键词相关的任务,task_id 为 null,确保只取一个任务 + List<KeywordTask> keywordTasks = keywordTaskService.list(new LambdaQueryWrapper<KeywordTask>() + .eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()) + .isNull(KeywordTask::getTask_id)); + if (keywordTasks.size() > 0) { + KeywordTask keywordTask = keywordTasks.get(0); + keywordTask.setTask_id(taskResponse.getTask_id()); + keywordTask.setStatus("pending"); + keywordTaskService.updateById(keywordTask); + } +// KeywordTask taskToUpdate = keywordTaskService.getOne(new LambdaQueryWrapper<KeywordTask>() +// .eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()) +// .eq(KeywordTask::getTask_id, null)); // 确保 task_id 为 null 的任务 + + // 更新任务的 task_id 和状态 +// taskToUpdate.setTask_id(taskResponse.getTask_id()); +// taskToUpdate.setStatus("submitted"); +// keywordTaskService.updateById(taskToUpdate); + //设置订单进入采集状态 LambdaUpdateWrapper<Orders> updateOrderWrapper = new LambdaUpdateWrapper<>(); @@ -420,8 +475,170 @@ // return Mono.just(errorResponse); // }); // } + @PostMapping("/cancel/{keywordId}") + @ApiOperation(value = "取消任务") + public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable Integer keywordId) { + // 1. 查询所有与关键词相关的任务 + List<KeywordTask> tasks = keywordTaskService.list( + new LambdaQueryWrapper<KeywordTask>().eq(KeywordTask::getKeyword_id, keywordId) + ); - @PostMapping("/cancel/{taskId}") + // 2. 从队列中移除所有相关任务(使用锁保证线程安全) + List<SearchTaskRequest> removedQueueTasks = removeTasksFromQueueByKeywordId(keywordId); + + // 3. 筛选出需要远程取消的任务 + List<KeywordTask> tasksToCancelRemotely = tasks.stream() + .filter(task -> task.getTask_id() != null && "pending".equalsIgnoreCase(task.getStatus())) + .collect(Collectors.toList()); + + // 4. 对筛选出的任务发送远程取消请求(并行执行) + return Flux.fromIterable(tasksToCancelRemotely) + .flatMap(task -> { + // 创建状态更新和远程取消的组合操作 + Mono<Void> updateStatus = updateTaskStatus(task.getTask_id(), "canceled"); + Mono<ResponseResult<?>> cancelOp = cancelRemoteTask(task.getTask_id()) + .onErrorResume(e -> { + log.error("取消任务 {} 失败: {}", task.getTask_id(), e.getMessage()); + return Mono.just(ResponseResult.error("取消任务失败: " + e.getMessage())); + }); + + // 合并操作:无论远程取消是否成功,都更新状态 + return Mono.zip(cancelOp, updateStatus) + .thenReturn(true); + }, 10) // 设置10的并发度 + .collectList() + .thenReturn(ResponseResult.success( + new TaskCancelResponse( + String.format("关键词任务已取消,队列中移除 %d 个任务,远程取消 %d 个任务", + removedQueueTasks.size(), + tasksToCancelRemotely.size()) + ) + )) + .onErrorResume(e -> { + log.error("取消关键词任务失败: {}", e.getMessage()); + return Mono.just(ResponseResult.error(500, "取消关键词任务失败: " + e.getMessage())); + }); + } + +// @PostMapping("/cancel/{keywordId}") +// @ApiOperation(value = "取消任务") +// public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable Integer keywordId) { +// // 1. 查询所有与关键词相关的任务 +// List<KeywordTask> tasks = keywordTaskService.list( +// new LambdaQueryWrapper<KeywordTask>().eq(KeywordTask::getKeyword_id, keywordId) +// ); +// +// // 2. 从队列中移除所有相关任务 +// List<SearchTaskRequest> removedQueueTasks = removeTasksFromQueueByKeywordId(keywordId); +// +// // 3. 筛选出需要远程取消的任务(任务ID不为空且状态为pending) +// List<KeywordTask> tasksToCancelRemotely = tasks.stream() +// .filter(task -> task.getTask_id() != null && "pending".equalsIgnoreCase(task.getStatus())) +// .collect(Collectors.toList()); +// // 检查是否有任务与关键词相关 +//// if (tasks.isEmpty()) { +//// return Mono.just(ResponseResult.error("没有找到相关任务")); +//// } +// +// // 4. 对筛选出的任务发送远程取消请求 +// List<Mono<ResponseResult<?>>> cancelRequests = tasksToCancelRemotely.stream() +// .map(task -> cancelRemoteTask(task.getTask_id()) +// .doOnSuccess(response -> { +// // 更新任务状态为canceled +// updateTaskStatus(task.getTask_id(), "canceled").subscribe(); +// }) +// .onErrorResume(e -> { +// log.error("取消任务 {} 失败: {}", task.getTask_id(), e.getMessage()); +// // 即使取消失败,也尝试更新状态 +// updateTaskStatus(task.getTask_id(), "canceled").subscribe(); +// return Mono.just(ResponseResult.error("取消任务失败: " + e.getMessage())); +// })) +// .collect(Collectors.toList()); +// +// // 5. 并行执行所有取消请求 +// return Flux.fromIterable(cancelRequests) +// .concatMap(request -> request) // 顺序执行,而非并行 +// .collectList() +// .thenReturn(ResponseResult.success( +// new TaskCancelResponse( +// String.format("关键词任务已取消,队列中移除 %d 个任务,远程取消 %d 个任务", +// removedQueueTasks.size(), +// tasksToCancelRemotely.size()) +// ) +// )) +// .onErrorResume(e -> { +// log.error("取消关键词任务失败: {}", e.getMessage()); +// return Mono.just(ResponseResult.error(500, "取消关键词任务失败: " + e.getMessage())); +// }); +// } +// 线程安全的队列移除方法 +private List<SearchTaskRequest> removeTasksFromQueueByKeywordId(Integer keywordId) { + List<SearchTaskRequest> removedTasks = new ArrayList<>(); + + // 使用锁保证队列操作的原子性 + queueLock.lock(); + try { + Iterator<SearchTaskRequest> iterator = taskQueue.iterator(); + while (iterator.hasNext()) { + SearchTaskRequest task = iterator.next(); + if (task.getKeyword_id() != null && task.getKeyword_id().equals(keywordId)) { + removedTasks.add(task); + iterator.remove(); + } + } + } finally { + queueLock.unlock(); + } + + log.info("从队列中移除了 {} 个与关键词ID {} 相关的任务", removedTasks.size(), keywordId); + return removedTasks; +} + + // 从队列中移除所有关键词ID匹配的任务 +// private List<SearchTaskRequest> removeTasksFromQueueByKeywordId(Integer keywordId) { +// List<SearchTaskRequest> removedTasks = new ArrayList<>(); +// +// // 使用迭代器安全地移除元素 +// Iterator<SearchTaskRequest> iterator = taskQueue.iterator(); +// while (iterator.hasNext()) { +// SearchTaskRequest task = iterator.next(); +// if (task.getKeyword_id() != null && task.getKeyword_id().equals(keywordId)) { +// removedTasks.add(task); +// iterator.remove(); +// } +// } +// +// log.info("从队列中移除了 {} 个与关键词ID {} 相关的任务", removedTasks.size(), keywordId); +// return removedTasks; +// } + // 发送远程取消请求 +// 发送远程取消请求(使用Java 8兼容的Map创建方式) + private Mono<ResponseResult<?>> cancelRemoteTask(String taskId) { + // 使用Collections.singletonMap或手动创建Map + Map<String, Object> requestBody = new HashMap<>(); + requestBody.put("status", "pending"); + + return webClient.post() + .uri(baseUrl + "/api/v1/tasks/" + taskId + "/cancel") + .contentType(MediaType.APPLICATION_JSON) + .bodyValue(requestBody) + .retrieve() + .onStatus(HttpStatus::isError, response -> response.bodyToMono(String.class) + .flatMap(errorBody -> Mono.error(new RuntimeException("取消失败: " + errorBody)))) + .bodyToMono(Void.class) + .thenReturn(ResponseResult.success("任务已取消")); + } + + // 更新单个任务状态 + private Mono<Void> updateTaskStatus(String taskId, String status) { + return Mono.fromRunnable(() -> { + LambdaUpdateWrapper<KeywordTask> updateWrapper = new LambdaUpdateWrapper<>(); + updateWrapper.eq(KeywordTask::getTask_id, taskId); + updateWrapper.set(KeywordTask::getStatus, status); + keywordTaskService.update(updateWrapper); + }).subscribeOn(Schedulers.boundedElastic()).then(); + } + /* @PostMapping("/cancel/{taskId}") @ApiOperation(value = "取消任务") public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable String taskId) { return webClient.post() @@ -466,7 +683,7 @@ } return Mono.just(ResponseResult.error(500, e.getMessage())); }); - } + }*/ @ApiOperation(value = "获取任务结果") @GetMapping("/tasks/{taskId}") @@ -488,6 +705,7 @@ .bodyToMono(TaskResultResponse.class) .flatMap(responseResult -> { TaskResultResponse result = responseResult; + if (result != null && result.getResults() != null) { return updateQuestionAndReference(result) .thenReturn(responseResult); @@ -567,7 +785,50 @@ // } // }); // } + /** + * 获取或创建平台(确保同一domain只创建一次) + * @param domain 平台域名 + * @return 已存在或新创建的Platform + */ + private Platform getOrCreatePlatform(String domain) { + // 1. 先尝试查询已存在的平台 + Platform platform = platformService.getPlatformByDomain(domain); + if (platform != null) { + return platform; + } + // 2. 若不存在,尝试创建(处理并发场景) + try { + // 2.1 获取或创建“默认”类型(Type也需避免重复,建议Type表的type_name也加唯一约束) + Type defaultType = typeService.getOne(new LambdaQueryWrapper<Type>() + .eq(Type::getType_name, "默认")); + if (defaultType == null) { + defaultType = new Type(); + defaultType.setType_name("默认"); + typeService.save(defaultType); // 若Type可能重复,此处也需处理DuplicateKeyException + } + + // 2.2 构建新平台对象 + Platform newPlatform = new Platform(); + newPlatform.setDomain(domain); + newPlatform.setPlatform_name(domain); // 平台名称默认使用域名,可根据实际需求调整 + newPlatform.setType_id(defaultType.getType_id()); + newPlatform.setCreate_time(LocalDateTime.now()); // 补充创建时间 + + // 2.3 尝试保存,若因唯一约束冲突失败,则捕获异常 + platformService.save(newPlatform); + return newPlatform; // 保存成功,返回新创建的平台 + + } catch (DuplicateKeyException e) { + // 3. 若捕获到重复键异常,说明并发创建了,重新查询即可(此时数据库中已存在该平台) + log.warn("平台domain={}已存在,无需重复创建", domain, e); + return platformService.getPlatformByDomain(domain); // 重新查询,一定能获取到 + } catch (Exception e) { + // 处理其他异常(如数据库连接失败等) + log.error("创建平台失败,domain={}", domain, e); + throw new RuntimeException("创建平台失败", e); + } + } private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { return Mono.fromRunnable(() -> { try { @@ -578,7 +839,8 @@ LambdaQueryWrapper<KeywordTask> keywordTaskWrapper = new LambdaQueryWrapper<>(); keywordTaskWrapper.eq(KeywordTask::getTask_id, result.getTask_id()); KeywordTask keywordTask = keywordTaskService.getOne(keywordTaskWrapper); - +// keywordTask.setStatus("completed"); +// keywordTaskService.updateById(keywordTask); Keyword keyword = keywordService.getById(keywordTask.getKeyword_id()); if (keyword == null) { @@ -591,23 +853,37 @@ keywordTaskWrapper2.eq(KeywordTask::getKeyword_id, keyword.getKeyword_id()); List<KeywordTask> keywordTasks = keywordTaskService.list(keywordTaskWrapper2); -// 定义状态优先级:canceled > false > completed - String finalStatus = "completed"; // 默认状态为 completed - - for (KeywordTask task : keywordTasks) { - String status = task.getStatus(); - if ("canceled".equals(status)) { - finalStatus = "canceled"; - break; // 遇到 canceled 直接跳出循环,因为优先级最高 - } else if ("false".equals(status)) { - finalStatus = "false"; - // 不跳出循环,继续检查是否存在 canceled - } - } -// 更新关键词状态 - if (!finalStatus.equals(keyword.getStatus())) { - keyword.setStatus(finalStatus); + //如果全部为completed 关键词也为completed ,如果关联关系没有任务id,或者状态为running ,关键词为submitted, + if (keywordTasks.stream().allMatch(task -> "completed".equals(task.getStatus())) ) { + keyword.setStatus("completed"); keywordService.updateById(keyword); + + } + //如果有一个task为failed设置关键词为false + else if (keywordTasks.stream().anyMatch(task -> "failed".equals(task.getStatus()))) { + keyword.setStatus("false"); + keywordService.updateById(keyword); + } + + + +// +//// 定义状态优先级:canceled > false > completed +// String finalStatus = "completed"; // 默认状态为 completed +// +// for (KeywordTask task : keywordTasks) { +// String status = task.getStatus(); +//// if ("canceled".equals(status)) { +//// finalStatus = "canceled"; +//// break; // 遇到 canceled 直接跳出循环,因为优先级最高 +//// } else +// if ("false".equals(status)) { +// finalStatus = "false"; +// // 不跳出循环,继续检查是否存在 canceled +// } +// } +// 更新关键词状态 + String orderId = keyword.getOrder_id(); if (orderId == null || orderId.isEmpty()) { System.out.println("关键词[" + keyword.getKeyword_id() + "]未关联订单,跳过订单状态更新"); @@ -623,10 +899,9 @@ System.out.println("订单[" + orderId + "]下无关键词,跳过状态更新"); return; } - // 3. 检查所有关键词的状态是否均为 completed 或 false boolean allValid = orderKeywords.stream() - .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus())); + .allMatch(k -> "completed".equals(k.getStatus()) || "false".equals(k.getStatus()) || "canceled".equals(k.getStatus())); // 4. 若所有关键词状态均有效,更新订单状态为3 if (allValid) { @@ -639,7 +914,7 @@ System.out.println("未找到订单[" + orderId + "],无法更新状态"); } } - } + Orders orders = orderService.getById(keyword.getOrder_id()); // 2. 批量查询所有问题 @@ -725,14 +1000,14 @@ return reference; }) .collect(Collectors.toList());*/ -// 初始化引用列表(避免null) + // 初始化引用列表(避免null) List<Reference> references = new ArrayList<>(); List<TaskResultResponse.Reference> originalReferences = questionResult.getReferences(); if (originalReferences == null) { originalReferences = Collections.emptyList(); } -// 遍历原始引用列表,转换为Reference对象 + // 遍历原始引用列表,转换为Reference对象 for (TaskResultResponse.Reference ref : originalReferences) { // 注意:需将“原引用类型”替换为实际类型(如QuestionResult中的引用类型) Reference reference = new Reference(); // 设置基本字段 @@ -745,7 +1020,7 @@ reference.setKeyword_id(keyword.getKeyword_id()); reference.setCreate_time(LocalDateTime.now()); - // 处理平台和类型关联 + /* // 处理平台和类型关联 Platform platform = platformService.getPlatformByDomain(reference.getDomain()); if (platform == null) { // 平台不存在,创建新平台(类型默认为“默认”) @@ -774,7 +1049,11 @@ reference.setType_id(type.getType_id()); } } - +*/ + // 关键:使用优化后的方法获取平台,避免重复创建 + Platform platform = getOrCreatePlatform(ref.getDomain()); + reference.setPlatform_id(platform.getPlatform_id()); + reference.setType_id(platform.getType_id()); // 直接从平台获取类型ID,更可靠 // 添加到结果列表 references.add(reference); } -- Gitblit v1.7.1