| | |
| | | import java.time.LocalDateTime; |
| | | import java.time.format.DateTimeFormatter; |
| | | import java.util.ArrayList; |
| | | import java.util.Date; |
| | | import java.util.List; |
| | | import java.util.Map; |
| | | import java.util.Optional; |
| | | import java.util.stream.Collectors; |
| | | |
| | | import javax.servlet.http.HttpServletRequest; |
| | | |
| | | import com.fasterxml.jackson.core.JsonProcessingException; |
| | | import com.fasterxml.jackson.databind.JsonNode; |
| | | import com.fasterxml.jackson.databind.ObjectMapper; |
| | | import com.linghu.model.dto.*; |
| | | import org.springframework.beans.BeanUtils; |
| | | import org.springframework.beans.factory.annotation.Autowired; |
| | | import org.springframework.beans.factory.annotation.Value; |
| | | import org.springframework.core.ParameterizedTypeReference; |
| | |
| | | import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; |
| | | import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; |
| | | import com.linghu.model.common.ResponseResult; |
| | | import com.linghu.model.dto.HealthResponse; |
| | | import com.linghu.model.dto.SearchTaskRequest; |
| | | import com.linghu.model.entity.Keyword; |
| | | import com.linghu.model.entity.Question; |
| | | import com.linghu.model.entity.User; |
| | |
| | | import com.linghu.service.QuestionService; |
| | | import com.linghu.service.ReferenceService; |
| | | import com.linghu.utils.JwtUtils; |
| | | import com.linghu.model.dto.SearchTaskResponse; |
| | | import com.linghu.model.dto.TaskStatusResponse; |
| | | import com.linghu.model.dto.TaskCancelResponse; |
| | | import com.linghu.model.dto.TaskListResponse; |
| | | |
| | | import io.jsonwebtoken.lang.Collections; |
| | | import io.swagger.annotations.Api; |
| | | import io.swagger.annotations.ApiOperation; |
| | | import reactor.core.publisher.Mono; |
| | | |
| | | import org.springframework.web.bind.annotation.*; |
| | | import org.springframework.web.bind.annotation.* ; |
| | | import org.springframework.http.HttpStatus; |
| | | import com.linghu.model.dto.TaskResultResponse; |
| | | import com.linghu.model.dto.TaskResultResponse.QuestionResult; |
| | | import com.linghu.model.dto.TaskResultResponse.UserResult; |
| | | import com.linghu.model.entity.Reference; |
| | | import java.util.stream.Collectors; |
| | | import java.util.stream.IntStream; |
| | | |
| | | @RestController |
| | | @RequestMapping("/collect") |
| | |
| | | |
| | | @PostMapping("/search") |
| | | @ApiOperation(value = "开始采集") |
| | | public Mono<ResponseResult<SearchTaskResponse>> createSearchTask( |
| | | public Mono<SearchTaskResponse> createSearchTask( |
| | | @RequestBody SearchTaskRequest searchTaskRequest, |
| | | HttpServletRequest request) { |
| | | HttpServletRequest request) throws JsonProcessingException { |
| | | String token = request.getHeader("Authorization"); |
| | | User user = jwtUtils.parseToken(token); |
| | | List<User> users = new ArrayList<>(); |
| | | users.add(user); |
| | | // 复制到UserDto |
| | | UserDto userDto = new UserDto(); |
| | | userDto.setName(user.getUser_name()); |
| | | userDto.setEmail(user.getUser_email()); |
| | | userDto.setPassword(user.getPassword()); |
| | | |
| | | // List<User> users = new ArrayList<>(); |
| | | // users.add(user); |
| | | List<UserDto> users = new ArrayList<>(); |
| | | users.add(userDto); |
| | | searchTaskRequest.setUsers(users); |
| | | // json格式 |
| | | ObjectMapper objectMapper = new ObjectMapper(); |
| | | System.out.println(objectMapper.writeValueAsString(searchTaskRequest)); |
| | | |
| | | return webClient.post() |
| | | .uri(baseUrl + "/search") |
| | | .uri(baseUrl + "/api/v1/search") |
| | | .contentType(MediaType.APPLICATION_JSON) |
| | | .bodyValue(searchTaskRequest) |
| | | .retrieve() |
| | | .bodyToMono(new ParameterizedTypeReference<ResponseResult<SearchTaskResponse>>() { |
| | | .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class) |
| | | .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody)))) |
| | | .bodyToMono(new ParameterizedTypeReference<SearchTaskResponse>() { |
| | | }) |
| | | .flatMap(responseResult -> { |
| | | // 提取任务ID |
| | | SearchTaskResponse taskResponse = responseResult.getData(); |
| | | SearchTaskResponse taskResponse = responseResult; |
| | | if (taskResponse != null && taskResponse.getTask_id() != null) { |
| | | // 保存任务ID到关键词 |
| | | LambdaUpdateWrapper<Keyword> updateWrapper = new LambdaUpdateWrapper<>(); |
| | |
| | | // 可选:更新响应中的其他信息 |
| | | // taskResponse.setMessage("任务已提交并保存,ID: " + taskResponse.getTaskId()); |
| | | } |
| | | return Mono.just(responseResult); |
| | | return Mono.just(taskResponse); |
| | | }) |
| | | .onErrorResume(e -> { |
| | | return Mono.just(ResponseResult.error("调用失败: " + e.getMessage())); |
| | | // return Mono.just(ResponseResult.error("调用失败: " + e.getMessage())); |
| | | SearchTaskResponse task = new SearchTaskResponse(); |
| | | task.setMessage("调用失败: " + e.getMessage()); |
| | | return Mono.just(task); |
| | | }); |
| | | } |
| | | |
| | |
| | | @GetMapping("/status") |
| | | public Mono<TaskStatusResponse> getTaskStatus(String taskId) { |
| | | return webClient.get() |
| | | .uri(baseUrl + "/tasks/" + taskId) |
| | | .uri(baseUrl + "/api/v1/tasks/" + taskId) |
| | | .accept(MediaType.APPLICATION_JSON) |
| | | .retrieve() |
| | | .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(String.class) |
| | | .flatMap(errorBody -> Mono.error(new RuntimeException("任务不存在: " + errorBody)))) |
| | | .onStatus(HttpStatus::is4xxClientError, response -> response.bodyToMono(TaskStatusResponse.class) |
| | | .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody.getDetail())))) |
| | | .bodyToMono(TaskStatusResponse.class) |
| | | .flatMap(result -> { |
| | | TaskStatusResponse taskStatusResponse = result; |
| | |
| | | questionService.updateBatchById(updateQuestions); |
| | | } |
| | | return Mono.just(result); |
| | | }) |
| | | .onErrorResume(e -> { |
| | | // 创建一个自定义的错误响应对象 |
| | | TaskStatusResponse errorResponse = new TaskStatusResponse(); |
| | | errorResponse.setStatus("ERROR"); |
| | | errorResponse.setMessage(e.getMessage()); |
| | | errorResponse.setDetail(e.getMessage()); |
| | | |
| | | return Mono.just(errorResponse); |
| | | }); |
| | | } |
| | | |
| | |
| | | @ApiOperation(value = "取消任务") |
| | | public Mono<ResponseResult<TaskCancelResponse>> cancelTask(@PathVariable String taskId) { |
| | | return webClient.post() |
| | | .uri(baseUrl + "/tasks/" + taskId + "/cancel") |
| | | .uri(baseUrl + "/api/v1/tasks/" + taskId + "/cancel") |
| | | .contentType(MediaType.APPLICATION_JSON) |
| | | .bodyValue(Collections.emptyMap()) // 添加空请求体 |
| | | .retrieve() |
| | | .onStatus(HttpStatus::isError, response -> { |
| | | if (response.statusCode() == HttpStatus.NOT_FOUND) { |
| | | return response.bodyToMono(String.class) |
| | | .flatMap(errorBody -> Mono.error(new RuntimeException("任务不存在"))); |
| | | } else if (response.statusCode() == HttpStatus.BAD_REQUEST) { |
| | | return response.bodyToMono(String.class) |
| | | .flatMap(errorBody -> Mono.error(new RuntimeException("任务已经完成,无法取消"))); |
| | | } |
| | | return response.createException().flatMap(Mono::error); |
| | | }) |
| | | .onStatus(HttpStatus::isError, response -> response.bodyToMono(TaskCancelResponse.class) |
| | | .flatMap(errorBody -> Mono.error(new RuntimeException(errorBody.getDetail())))) |
| | | .bodyToMono(TaskCancelResponse.class) |
| | | .map(data -> ResponseResult.success(data)) |
| | | .onErrorResume(e -> { |
| | |
| | | |
| | | @ApiOperation(value = "获取任务结果") |
| | | @GetMapping("/tasks/{taskId}") |
| | | public Mono<ResponseResult<TaskResultResponse>> getTaskResult(@PathVariable String taskId) { |
| | | public Mono<TaskResultResponse> getTaskResult(@PathVariable String taskId) { |
| | | return webClient.get() |
| | | .uri(baseUrl + "/tasks/" + taskId + "/result") |
| | | .uri(baseUrl + "/api/v1/tasks/" + taskId + "/result") |
| | | .accept(MediaType.APPLICATION_JSON) |
| | | .retrieve() |
| | | .onStatus(HttpStatus::is4xxClientError, response -> { |
| | |
| | | } |
| | | return response.createException().flatMap(Mono::error); |
| | | }) |
| | | .bodyToMono(new ParameterizedTypeReference<ResponseResult<TaskResultResponse>>() { |
| | | }) |
| | | .bodyToMono(TaskResultResponse.class) |
| | | .flatMap(responseResult -> { |
| | | TaskResultResponse result = responseResult.getData(); |
| | | TaskResultResponse result = responseResult; |
| | | if (result != null && result.getResults() != null) { |
| | | return updateQuestionAndReference(result) |
| | | .thenReturn(responseResult); |
| | |
| | | }) |
| | | .onErrorResume(e -> { |
| | | System.out.println("获取任务结果失败"); |
| | | return Mono.just(ResponseResult.error(e.getMessage())); |
| | | TaskResultResponse result = new TaskResultResponse(); |
| | | result.setDetail("获取任务结果失败: " + e.getMessage()); |
| | | return Mono.just(result); |
| | | }); |
| | | } |
| | | |
| | | /* |
| | | * private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { |
| | | * return Mono.fromRunnable(() -> { |
| | | * // 1. 更新关键词状态 |
| | | * LambdaUpdateWrapper<Keyword> keywordUpdate = new LambdaUpdateWrapper<>(); |
| | | * keywordUpdate.eq(Keyword::getTask_id, result.getTask_id()) |
| | | * .set(Keyword::getStatus, "completed"); |
| | | * keywordService.update(keywordUpdate); |
| | | * |
| | | * // 查询关键词ID |
| | | * LambdaQueryWrapper<Keyword> keywordQuery = new LambdaQueryWrapper<>(); |
| | | * keywordQuery.eq(Keyword::getTask_id, result.getTask_id()); |
| | | * Keyword keyword = keywordService.getOne(keywordQuery); |
| | | * |
| | | * if (keyword == null) { |
| | | * System.out.println("未找到关联的关键词,task_id: " + result.getTask_id()); |
| | | * return; |
| | | * } |
| | | * |
| | | * // 2. 处理每个用户的问题结果 |
| | | * for (UserResult userResult : result.getResults()) { |
| | | * for (QuestionResult questionResult : userResult.getQuestions_results()) { |
| | | * // 2.1 查询问题ID |
| | | * LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>(); |
| | | * queryWrapper.eq(Question::getQuestion, questionResult.getQuestion()) |
| | | * .eq(Question::getKeyword_id, keyword.getKeyword_id()); |
| | | * Question question = questionService.getOne(queryWrapper); |
| | | * |
| | | * if (question != null) { |
| | | * // 更新问题状态 |
| | | * LambdaUpdateWrapper<Question> updateWrapper = new LambdaUpdateWrapper<>(); |
| | | * updateWrapper.eq(Question::getQuestion_id, question.getQuestion_id()) |
| | | * .set(Question::getStatus, questionResult.getStatus()) |
| | | * .set(Question::getResponse, questionResult.getResponse()) |
| | | * .set(Question::getExtracted_count, questionResult.getExtracted_count()) |
| | | * .set(Question::getError, questionResult.getError()) |
| | | * .set(Question::getTimestamp, LocalDateTime.parse( |
| | | * questionResult.getTimestamp(), |
| | | * DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS") |
| | | * )); |
| | | * questionService.update(updateWrapper); |
| | | * |
| | | * // 2.2 保存引用数据 |
| | | * List<Reference> references = questionResult.getReferences().stream() |
| | | * .map(ref -> { |
| | | * Reference reference = new Reference(); |
| | | * reference.setQuestion_id(question.getQuestion_id()); |
| | | * reference.setTitle(ref.getTitle()); |
| | | * reference.setUrl(ref.getUrl()); |
| | | * reference.setDomain(ref.getDomain()); |
| | | * reference.setCreate_time(new Date()); |
| | | * return reference; |
| | | * }) |
| | | * .collect(Collectors.toList()); |
| | | * |
| | | * if (!references.isEmpty()) { |
| | | * referenceService.saveBatch(references); |
| | | * } |
| | | * } else { |
| | | * System.out.println("未找到匹配的问题,question " + question.getQuestion()); |
| | | * |
| | | * } |
| | | * } |
| | | * } |
| | | * }); |
| | | * } |
| | | */ |
| | | // private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { |
| | | // return Mono.fromRunnable(() -> { |
| | | // // 1. 更新关键词状态 |
| | | // LambdaUpdateWrapper<Keyword> keywordUpdate = new LambdaUpdateWrapper<>(); |
| | | // keywordUpdate.eq(Keyword::getTask_id, result.getTask_id()) |
| | | // .set(Keyword::getStatus, "completed"); |
| | | // keywordService.update(keywordUpdate); |
| | | // |
| | | // // 查询关键词ID |
| | | // LambdaQueryWrapper<Keyword> keywordQuery = new LambdaQueryWrapper<>(); |
| | | // keywordQuery.eq(Keyword::getTask_id, result.getTask_id()); |
| | | // Keyword keyword = keywordService.getOne(keywordQuery); |
| | | // |
| | | // if (keyword == null) { |
| | | // System.out.println("未找到关联的关键词,task_id: " + result.getTask_id()); |
| | | // return; |
| | | // } |
| | | // |
| | | // // 2. 处理每个用户的问题结果 |
| | | // for (UserResult userResult : result.getResults()) { |
| | | // for (QuestionResult questionResult : userResult.getQuestions_results()) { |
| | | // // 2.1 查询问题ID |
| | | // LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>(); |
| | | // queryWrapper.eq(Question::getQuestion, questionResult.getQuestion()) |
| | | // .eq(Question::getKeyword_id, keyword.getKeyword_id()); |
| | | // Question question = questionService.getOne(queryWrapper); |
| | | // |
| | | // if (question != null) { |
| | | // // 更新问题状态 |
| | | // LambdaUpdateWrapper<Question> updateWrapper = new LambdaUpdateWrapper<>(); |
| | | // updateWrapper.eq(Question::getQuestion_id, question.getQuestion_id()) |
| | | // .set(Question::getStatus, questionResult.getStatus()) |
| | | // .set(Question::getResponse, questionResult.getResponse()) |
| | | // .set(Question::getExtracted_count, questionResult.getExtracted_count()) |
| | | // .set(Question::getError, questionResult.getError()) |
| | | // .set(Question::getTimestamp, LocalDateTime.parse( |
| | | // questionResult.getTimestamp(), |
| | | // DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS") |
| | | // )); |
| | | // questionService.update(updateWrapper); |
| | | // |
| | | // // 2.2 保存引用数据 |
| | | // List<Reference> references = questionResult.getReferences().stream() |
| | | // .map(ref -> { |
| | | // Reference reference = new Reference(); |
| | | // reference.setQuestion_id(question.getQuestion_id()); |
| | | // reference.setTitle(ref.getTitle()); |
| | | // reference.setUrl(ref.getUrl()); |
| | | // reference.setDomain(ref.getDomain()); |
| | | // reference.setCreate_time(LocalDateTime.now()); |
| | | // return reference; |
| | | // }) |
| | | // .collect(Collectors.toList()); |
| | | // |
| | | // if (!references.isEmpty()) { |
| | | // referenceService.saveBatch(references); |
| | | // } |
| | | // } else { |
| | | // System.out.println("未找到匹配的问题,question " + question.getQuestion()); |
| | | // |
| | | // } |
| | | // } |
| | | // } |
| | | // }); |
| | | // } |
| | | |
| | | private Mono<Void> updateQuestionAndReference(TaskResultResponse result) { |
| | | return Mono.fromRunnable(() -> { |
| | |
| | | Keyword keyword = keywordService.getOne(keywordQuery); |
| | | |
| | | if (keyword == null) { |
| | | // log.error("未找到关联的关键词,task_id: {}", result.getTask_id()); |
| | | System.out.println("未找到关联的关键词,task_id: " + result.getTask_id()); |
| | | return; |
| | | } |
| | | |
| | | // 2. 批量查询所有问题(假设Question有task_id和keyword_id字段) |
| | | // 2. 批量查询所有问题 |
| | | LambdaQueryWrapper<Question> queryWrapper = new LambdaQueryWrapper<>(); |
| | | queryWrapper.eq(Question::getKeyword_id, keyword.getKeyword_id()); |
| | | List<Question> questions = questionService.list(queryWrapper); |
| | |
| | | List<Question> questionsToUpdate = new ArrayList<>(); |
| | | List<Reference> allReferences = new ArrayList<>(); |
| | | |
| | | // 遍历结果,只收集数据不执行数据库操作 |
| | | // 遍历结果 |
| | | for (UserResult userResult : result.getResults()) { |
| | | for (QuestionResult questionResult : userResult.getQuestions_results()) { |
| | | try { |
| | |
| | | question.setTimestamp( |
| | | LocalDateTime.parse(questionResult.getTimestamp(), formatter)); |
| | | } |
| | | //更新 |
| | | // questionService.updateById(question); |
| | | |
| | | questionsToUpdate.add(question); |
| | | |
| | | // 收集引用数据 |
| | | List<Reference> references = questionResult.getReferences().stream() |
| | | .map(ref -> { |
| | | Reference reference = new Reference(); |
| | | reference.setQuestion_id(question.getQuestion_id()); |
| | | reference.setTitle(ref.getTitle()); |
| | | reference.setUrl(ref.getUrl()); |
| | | reference.setDomain(ref.getDomain()); |
| | | reference.setCreate_time(LocalDateTime.now()); |
| | | return reference; |
| | | }) |
| | | .collect(Collectors.toList()); |
| | | // 收集引用数据,处理空集合情况 |
| | | List<Reference> references = |
| | | Optional.ofNullable(questionResult.getReferences()) |
| | | .orElse(Collections.emptyList()) |
| | | .stream() |
| | | .map(ref -> { |
| | | Reference reference = new Reference(); |
| | | reference.setQuestion_id(question.getQuestion_id()); |
| | | reference.setTitle(ref.getTitle()); |
| | | reference.setUrl(ref.getUrl()); |
| | | reference.setDomain(ref.getDomain()); |
| | | reference.setCreate_time(LocalDateTime.now()); |
| | | return reference; |
| | | }) |
| | | .collect(Collectors.toList()); |
| | | |
| | | allReferences.addAll(references); |
| | | } else { |
| | | // log.warn("未找到匹配的问题,question: {}, keyword_id: {}", |
| | | // questionResult.getQuestion(), keyword.getKeyword_id()); |
| | | // 添加到总引用列表 |
| | | if (!references.isEmpty()) { |
| | | allReferences.addAll(references); |
| | | } |
| | | } |
| | | } catch (Exception e) { |
| | | // log.error("处理问题结果失败,question: {}, error: {}", |
| | | // questionResult.getQuestion(), e.getMessage(), e); |
| | | System.out.println("处理问题结果失败: " + e.getMessage()); |
| | | } |
| | | } |
| | | } |
| | | |
| | | // 4. 批量更新问题 |
| | | System.out.println(questionsToUpdate); |
| | | if (!questionsToUpdate.isEmpty()) { |
| | | questionService.updateBatchById(questionsToUpdate); |
| | | // log.info("成功批量更新 {} 个问题", questionsToUpdate.size()); |
| | | System.out.println("成功批量更新 " + questionsToUpdate.size() + " 个问题"); |
| | | } |
| | | |
| | | // 5. 批量插入引用 |
| | | // 5. 批量插入引用,使用流式分批处理 |
| | | if (!allReferences.isEmpty()) { |
| | | // 分批处理,每批1000条记录,避免内存溢出 |
| | | int batchSize = 1000; |
| | | for (int i = 0; i < allReferences.size(); i += batchSize) { |
| | | List<Reference> batch = allReferences.subList( |
| | | i, Math.min(i + batchSize, allReferences.size())); |
| | | referenceService.saveBatch(batch); |
| | | } |
| | | // log.info("成功批量插入 {} 条引用数据", allReferences.size()); |
| | | IntStream.iterate(0, i -> i + batchSize) |
| | | .limit((allReferences.size() + batchSize - 1) / batchSize) |
| | | .forEach(i -> { |
| | | List<Reference> batch = allReferences.subList( |
| | | i, Math.min(i + batchSize, allReferences.size())); |
| | | referenceService.saveBatch(batch); |
| | | }); |
| | | System.out.println("成功批量插入 " + allReferences.size() + " 条引用数据"); |
| | | } |
| | | |
| | | } catch (Exception e) { |
| | | // log.error("更新问题和引用数据失败,task_id: {}, error: {}", |
| | | // result.getTask_id(), e.getMessage(), e); |
| | | System.out.println("更新问题和引用数据失败: " + e.getMessage()); |
| | | throw new RuntimeException("更新问题和引用数据失败", e); |
| | | } |
| | | }); |
| | |
| | | |
| | | @GetMapping("/tasks/all") |
| | | @ApiOperation(value = "获取所有任务列表") |
| | | public Mono<ResponseResult<TaskListResponse>> getAllTasks() { |
| | | public Mono<TaskListResponse> getAllTasks() { |
| | | return webClient.get() |
| | | .uri(baseUrl + "/tasks") |
| | | .uri(baseUrl + "/api/v1/tasks") |
| | | .accept(MediaType.APPLICATION_JSON) |
| | | .retrieve() |
| | | .bodyToMono(new ParameterizedTypeReference<ResponseResult<TaskListResponse>>() { |
| | | .bodyToMono(new ParameterizedTypeReference<TaskListResponse>() { |
| | | }) |
| | | .onErrorResume(e -> { |
| | | return Mono.just(ResponseResult.error("获取任务列表失败: " + e.getMessage())); |
| | | TaskListResponse response = new TaskListResponse(); |
| | | response.setDetail("获取任务列表失败: " + e.getMessage()); |
| | | return Mono.just(response); |
| | | |
| | | // return Mono.just(ResponseResult.error("获取任务列表失败: " + e.getMessage())); |
| | | }); |
| | | } |
| | | |