package com.linghu.controller; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import javax.servlet.http.HttpServletRequest; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.ParameterizedTypeReference; import org.springframework.http.*; import org.springframework.web.client.RestTemplate; import org.springframework.web.reactive.function.client.WebClient; import org.springframework.http.client.HttpComponentsClientHttpRequestFactory; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; import com.linghu.model.common.ResponseResult; import com.linghu.model.dto.HealthResponse; import com.linghu.model.dto.SearchTaskRequest; import com.linghu.model.entity.Keyword; import com.linghu.model.entity.Question; import com.linghu.model.entity.User; import com.linghu.service.KeywordService; import com.linghu.service.QuestionService; import com.linghu.service.ReferenceService; import com.linghu.utils.JwtUtils; import com.linghu.model.dto.SearchTaskResponse; import com.linghu.model.dto.TaskStatusResponse; import com.linghu.model.dto.TaskCancelResponse; import com.linghu.model.dto.TaskListResponse; import io.jsonwebtoken.lang.Collections; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; import reactor.core.publisher.Mono; import org.springframework.web.bind.annotation.*; import org.springframework.http.HttpStatus; import com.linghu.model.dto.TaskResultResponse; import com.linghu.model.dto.TaskResultResponse.QuestionResult; import com.linghu.model.dto.TaskResultResponse.UserResult; import com.linghu.model.entity.Reference; import java.util.stream.Collectors; @RestController @RequestMapping("/collect") @Api(value = "采集接口", tags = "采集管理") public class CollectController { @Autowired private ReferenceService referenceService; @Value("${linghu.url}") private String baseUrl; @Autowired private WebClient webClient; @Autowired private JwtUtils jwtUtils; @Autowired private KeywordService keywordService; @Autowired private QuestionService questionService; @PostMapping("/search") @ApiOperation(value = "开始采集") public Mono> createSearchTask( @RequestBody SearchTaskRequest searchTaskRequest, HttpServletRequest request) { String token = request.getHeader("Authorization"); User user = jwtUtils.parseToken(token); List users = new ArrayList<>(); users.add(user); searchTaskRequest.setUsers(users); return webClient.post() .uri(baseUrl + "/search") .contentType(MediaType.APPLICATION_JSON) .bodyValue(searchTaskRequest) .retrieve() .bodyToMono(new ParameterizedTypeReference>() { }) .flatMap(responseResult -> { // 提取任务ID SearchTaskResponse taskResponse = responseResult.getData(); if (taskResponse != null && taskResponse.getTask_id() != null) { // 保存任务ID到关键词 LambdaUpdateWrapper updateWrapper = new LambdaUpdateWrapper<>(); updateWrapper.eq(Keyword::getKeyword_id, searchTaskRequest.getKeyword_id()); updateWrapper.set(Keyword::getTask_id, taskResponse.getTask_id()); keywordService.update(updateWrapper); // 可选:更新响应中的其他信息 // taskResponse.setMessage("任务已提交并保存,ID: " + taskResponse.getTaskId()); } return Mono.just(responseResult); }) .onErrorResume(e -> { return Mono.just(ResponseResult.error("调用失败: " + e.getMessage())); }); } @ApiOperation(value = "查询任务状态") @GetMapping("/status") public Mono getTaskStatus(String taskId) { return webClient.get() .uri(baseUrl + "/tasks/" + taskId) .accept(MediaType.APPLICATION_JSON) .retrieve() .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class) .flatMap(errorBody -> Mono.error(new RuntimeException("任务不存在: " + errorBody)))) .bodyToMono(TaskStatusResponse.class) .flatMap(result -> { TaskStatusResponse taskStatusResponse = result; if (taskStatusResponse != null && taskStatusResponse.getStatus() != null) { List updateQuestions = taskStatusResponse.getQuestions_status().stream() .map(qs -> { Question question = new Question(); question.setQuestion_id(qs.getQuestion_id()); question.setStatus(qs.getStatus()); return question; }).collect(Collectors.toList()); questionService.updateBatchById(updateQuestions); } return Mono.just(result); }); } @PostMapping("/cancel/{taskId}") @ApiOperation(value = "取消任务") public Mono> cancelTask(@PathVariable String taskId) { return webClient.post() .uri(baseUrl + "/tasks/" + taskId + "/cancel") .contentType(MediaType.APPLICATION_JSON) .bodyValue(Collections.emptyMap()) // 添加空请求体 .retrieve() .onStatus(HttpStatus::isError, response -> { if (response.statusCode() == HttpStatus.NOT_FOUND) { return response.bodyToMono(String.class) .flatMap(errorBody -> Mono.error(new RuntimeException("任务不存在"))); } else if (response.statusCode() == HttpStatus.BAD_REQUEST) { return response.bodyToMono(String.class) .flatMap(errorBody -> Mono.error(new RuntimeException("任务已经完成,无法取消"))); } return response.createException().flatMap(Mono::error); }) .bodyToMono(TaskCancelResponse.class) .map(data -> ResponseResult.success(data)) .onErrorResume(e -> { if (e.getMessage().contains("任务不存在")) { return Mono.just(ResponseResult.error(404, "任务不存在")); } else if (e.getMessage().contains("无法取消")) { return Mono.just(ResponseResult.error(400, "任务已完成,无法取消")); } return Mono.just(ResponseResult.error(500, "取消任务失败: " + e.getMessage())); }); } @ApiOperation(value = "获取任务结果") @GetMapping("/tasks/{taskId}") public Mono> getTaskResult(@PathVariable String taskId) { return webClient.get() .uri(baseUrl + "/tasks/" + taskId + "/result") .accept(MediaType.APPLICATION_JSON) .retrieve() .onStatus(HttpStatus::is4xxClientError, response -> { if (response.statusCode() == HttpStatus.NOT_FOUND) { return response.bodyToMono(String.class) .flatMap(errorBody -> Mono.error(new RuntimeException("任务不存在"))); } else if (response.statusCode() == HttpStatus.BAD_REQUEST) { return response.bodyToMono(String.class) .flatMap(errorBody -> Mono.error(new RuntimeException("任务未完成,无法获取结果"))); } return response.createException().flatMap(Mono::error); }) .bodyToMono(new ParameterizedTypeReference>() { }) .flatMap(responseResult -> { TaskResultResponse result = responseResult.getData(); if (result != null && result.getResults() != null) { return updateQuestionAndReference(result) .thenReturn(responseResult); } return Mono.just(responseResult); }) .onErrorResume(e -> { System.out.println("获取任务结果失败"); return Mono.just(ResponseResult.error(e.getMessage())); }); } /* * private Mono updateQuestionAndReference(TaskResultResponse result) { * return Mono.fromRunnable(() -> { * // 1. 更新关键词状态 * LambdaUpdateWrapper keywordUpdate = new LambdaUpdateWrapper<>(); * keywordUpdate.eq(Keyword::getTask_id, result.getTask_id()) * .set(Keyword::getStatus, "completed"); * keywordService.update(keywordUpdate); * * // 查询关键词ID * LambdaQueryWrapper keywordQuery = new LambdaQueryWrapper<>(); * keywordQuery.eq(Keyword::getTask_id, result.getTask_id()); * Keyword keyword = keywordService.getOne(keywordQuery); * * if (keyword == null) { * System.out.println("未找到关联的关键词,task_id: " + result.getTask_id()); * return; * } * * // 2. 处理每个用户的问题结果 * for (UserResult userResult : result.getResults()) { * for (QuestionResult questionResult : userResult.getQuestions_results()) { * // 2.1 查询问题ID * LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); * queryWrapper.eq(Question::getQuestion, questionResult.getQuestion()) * .eq(Question::getKeyword_id, keyword.getKeyword_id()); * Question question = questionService.getOne(queryWrapper); * * if (question != null) { * // 更新问题状态 * LambdaUpdateWrapper updateWrapper = new LambdaUpdateWrapper<>(); * updateWrapper.eq(Question::getQuestion_id, question.getQuestion_id()) * .set(Question::getStatus, questionResult.getStatus()) * .set(Question::getResponse, questionResult.getResponse()) * .set(Question::getExtracted_count, questionResult.getExtracted_count()) * .set(Question::getError, questionResult.getError()) * .set(Question::getTimestamp, LocalDateTime.parse( * questionResult.getTimestamp(), * DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS") * )); * questionService.update(updateWrapper); * * // 2.2 保存引用数据 * List references = questionResult.getReferences().stream() * .map(ref -> { * Reference reference = new Reference(); * reference.setQuestion_id(question.getQuestion_id()); * reference.setTitle(ref.getTitle()); * reference.setUrl(ref.getUrl()); * reference.setDomain(ref.getDomain()); * reference.setCreate_time(new Date()); * return reference; * }) * .collect(Collectors.toList()); * * if (!references.isEmpty()) { * referenceService.saveBatch(references); * } * } else { * System.out.println("未找到匹配的问题,question " + question.getQuestion()); * * } * } * } * }); * } */ private Mono updateQuestionAndReference(TaskResultResponse result) { return Mono.fromRunnable(() -> { try { // 1. 更新关键词状态 LambdaUpdateWrapper keywordUpdate = new LambdaUpdateWrapper<>(); keywordUpdate.eq(Keyword::getTask_id, result.getTask_id()) .set(Keyword::getStatus, "completed"); keywordService.update(keywordUpdate); // 查询关键词ID LambdaQueryWrapper keywordQuery = new LambdaQueryWrapper<>(); keywordQuery.eq(Keyword::getTask_id, result.getTask_id()); Keyword keyword = keywordService.getOne(keywordQuery); if (keyword == null) { // log.error("未找到关联的关键词,task_id: {}", result.getTask_id()); System.out.println("未找到关联的关键词,task_id: " + result.getTask_id()); return; } // 2. 批量查询所有问题(假设Question有task_id和keyword_id字段) LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); queryWrapper.eq(Question::getKeyword_id, keyword.getKeyword_id()); List questions = questionService.list(queryWrapper); // 构建问题映射表,用于快速查找 Map questionMap = questions.stream() .collect(Collectors.toMap(Question::getQuestion, q -> q)); // 3. 收集所有需要更新的问题和引用 List questionsToUpdate = new ArrayList<>(); List allReferences = new ArrayList<>(); // 遍历结果,只收集数据不执行数据库操作 for (UserResult userResult : result.getResults()) { for (QuestionResult questionResult : userResult.getQuestions_results()) { try { Question question = questionMap.get(questionResult.getQuestion()); if (question != null) { // 更新问题对象 question.setStatus(questionResult.getStatus()); question.setResponse(questionResult.getResponse()); question.setExtracted_count(questionResult.getExtracted_count()); question.setError(questionResult.getError()); // 解析时间戳 if (questionResult.getTimestamp() != null) { DateTimeFormatter formatter = DateTimeFormatter .ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"); question.setTimestamp( LocalDateTime.parse(questionResult.getTimestamp(), formatter)); } questionsToUpdate.add(question); // 收集引用数据 List references = questionResult.getReferences().stream() .map(ref -> { Reference reference = new Reference(); reference.setQuestion_id(question.getQuestion_id()); reference.setTitle(ref.getTitle()); reference.setUrl(ref.getUrl()); reference.setDomain(ref.getDomain()); reference.setCreate_time(LocalDateTime.now()); return reference; }) .collect(Collectors.toList()); allReferences.addAll(references); } else { // log.warn("未找到匹配的问题,question: {}, keyword_id: {}", // questionResult.getQuestion(), keyword.getKeyword_id()); } } catch (Exception e) { // log.error("处理问题结果失败,question: {}, error: {}", // questionResult.getQuestion(), e.getMessage(), e); } } } // 4. 批量更新问题 if (!questionsToUpdate.isEmpty()) { questionService.updateBatchById(questionsToUpdate); // log.info("成功批量更新 {} 个问题", questionsToUpdate.size()); } // 5. 批量插入引用 if (!allReferences.isEmpty()) { // 分批处理,每批1000条记录,避免内存溢出 int batchSize = 1000; for (int i = 0; i < allReferences.size(); i += batchSize) { List batch = allReferences.subList( i, Math.min(i + batchSize, allReferences.size())); referenceService.saveBatch(batch); } // log.info("成功批量插入 {} 条引用数据", allReferences.size()); } } catch (Exception e) { // log.error("更新问题和引用数据失败,task_id: {}, error: {}", // result.getTask_id(), e.getMessage(), e); throw new RuntimeException("更新问题和引用数据失败", e); } }); } @GetMapping("/tasks/all") @ApiOperation(value = "获取所有任务列表") public Mono> getAllTasks() { return webClient.get() .uri(baseUrl + "/tasks") .accept(MediaType.APPLICATION_JSON) .retrieve() .bodyToMono(new ParameterizedTypeReference>() { }) .onErrorResume(e -> { return Mono.just(ResponseResult.error("获取任务列表失败: " + e.getMessage())); }); } @GetMapping("/health") public Mono checkThirdPartyHealth() { return webClient.get() .uri(baseUrl + "/health") // 假设第三方健康检查接口路径为/health .retrieve() .bodyToMono(HealthResponse.class) .onErrorResume(e -> Mono.just( new HealthResponse("unhealthy", null, "", e.getMessage()))); } }