ansj segment

This commit is contained in:
robin
2023-10-26 15:05:01 +08:00
parent d0895d0d43
commit 2258421541
11 changed files with 396479 additions and 75 deletions

View File

@ -82,6 +82,13 @@
<groupId>ai.chat2db</groupId>
<artifactId>chat2db-spi</artifactId>
</dependency>
<dependency>
<groupId>org.ansj</groupId>
<artifactId>ansj_seg</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -1,5 +1,65 @@
package ai.chat2db.server.web.api.controller.ai;
import ai.chat2db.server.domain.api.enums.AiSqlSourceEnum;
import ai.chat2db.server.domain.api.model.Config;
import ai.chat2db.server.domain.api.model.DataSource;
import ai.chat2db.server.domain.api.param.ShowCreateTableParam;
import ai.chat2db.server.domain.api.param.TableQueryParam;
import ai.chat2db.server.domain.api.service.ConfigService;
import ai.chat2db.server.domain.api.service.DataSourceService;
import ai.chat2db.server.domain.api.service.TableService;
import ai.chat2db.server.tools.base.enums.WhiteListTypeEnum;
import ai.chat2db.server.tools.base.wrapper.result.DataResult;
import ai.chat2db.server.tools.common.exception.ParamBusinessException;
import ai.chat2db.server.tools.common.util.EasyEnumUtils;
import ai.chat2db.server.web.api.aspect.ConnectionInfoAspect;
import ai.chat2db.server.web.api.controller.ai.azure.client.AzureOpenAIClient;
import ai.chat2db.server.web.api.controller.ai.azure.listener.AzureOpenAIEventSourceListener;
import ai.chat2db.server.web.api.controller.ai.azure.model.AzureChatMessage;
import ai.chat2db.server.web.api.controller.ai.azure.model.AzureChatRole;
import ai.chat2db.server.web.api.controller.ai.chat2db.client.Chat2dbAIClient;
import ai.chat2db.server.web.api.controller.ai.claude.client.ClaudeAIClient;
import ai.chat2db.server.web.api.controller.ai.claude.listener.ClaudeAIEventSourceListener;
import ai.chat2db.server.web.api.controller.ai.claude.model.ClaudeChatCompletionsOptions;
import ai.chat2db.server.web.api.controller.ai.claude.model.ClaudeChatMessage;
import ai.chat2db.server.web.api.controller.ai.config.LocalCache;
import ai.chat2db.server.web.api.controller.ai.converter.ChatConverter;
import ai.chat2db.server.web.api.controller.ai.enums.PromptType;
import ai.chat2db.server.web.api.controller.ai.fastchat.client.FastChatAIClient;
import ai.chat2db.server.web.api.controller.ai.fastchat.embeddings.FastChatEmbeddingResponse;
import ai.chat2db.server.web.api.controller.ai.fastchat.listener.FastChatAIEventSourceListener;
import ai.chat2db.server.web.api.controller.ai.fastchat.model.FastChatMessage;
import ai.chat2db.server.web.api.controller.ai.fastchat.model.FastChatRole;
import ai.chat2db.server.web.api.controller.ai.openai.client.OpenAIClient;
import ai.chat2db.server.web.api.controller.ai.openai.listener.OpenAIEventSourceListener;
import ai.chat2db.server.web.api.controller.ai.request.ChatQueryRequest;
import ai.chat2db.server.web.api.controller.ai.request.ChatRequest;
import ai.chat2db.server.web.api.controller.ai.rest.client.RestAIClient;
import ai.chat2db.server.web.api.controller.ai.rest.listener.RestAIEventSourceListener;
import ai.chat2db.server.web.api.http.GatewayClientService;
import ai.chat2db.server.web.api.http.model.EsTableSchema;
import ai.chat2db.server.web.api.http.model.TableSchema;
import ai.chat2db.server.web.api.http.request.EsTableSchemaRequest;
import ai.chat2db.server.web.api.http.request.TableSchemaRequest;
import ai.chat2db.server.web.api.http.request.WhiteListRequest;
import ai.chat2db.server.web.api.http.response.EsTableSchemaResponse;
import ai.chat2db.server.web.api.http.response.TableSchemaResponse;
import ai.chat2db.server.web.api.util.ApplicationContextUtil;
import ai.chat2db.server.web.api.util.SegmentUtils;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONUtil;
import com.alibaba.fastjson2.JSON;
import com.google.common.collect.Lists;
import com.unfbx.chatgpt.entity.chat.Message;
import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
import java.io.IOException;
import java.math.BigDecimal;
import java.time.Duration;
@ -10,76 +70,6 @@ import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import ai.chat2db.server.domain.api.enums.AiSqlSourceEnum;
import ai.chat2db.server.domain.api.model.Config;
import ai.chat2db.server.domain.api.model.DataSource;
import ai.chat2db.server.domain.api.param.ShowCreateTableParam;
import ai.chat2db.server.domain.api.param.TableQueryParam;
import ai.chat2db.server.domain.api.service.ConfigService;
import ai.chat2db.server.domain.api.service.DataSourceService;
import ai.chat2db.server.domain.api.service.TableService;
import ai.chat2db.server.tools.base.wrapper.result.DataResult;
import ai.chat2db.server.tools.common.exception.ParamBusinessException;
import ai.chat2db.server.tools.common.util.EasyEnumUtils;
import ai.chat2db.server.web.api.aspect.ConnectionInfoAspect;
import ai.chat2db.server.web.api.controller.ai.azure.client.AzureOpenAIClient;
import ai.chat2db.server.web.api.controller.ai.azure.model.AzureChatMessage;
import ai.chat2db.server.web.api.controller.ai.azure.model.AzureChatRole;
import ai.chat2db.server.web.api.controller.ai.chat2db.client.Chat2dbAIClient;
import ai.chat2db.server.web.api.controller.ai.claude.client.ClaudeAIClient;
import ai.chat2db.server.web.api.controller.ai.claude.model.ClaudeChatCompletionsOptions;
import ai.chat2db.server.web.api.controller.ai.claude.model.ClaudeChatMessage;
import ai.chat2db.server.web.api.controller.ai.config.LocalCache;
import ai.chat2db.server.web.api.controller.ai.converter.ChatConverter;
import ai.chat2db.server.web.api.controller.ai.enums.PromptType;
import ai.chat2db.server.web.api.controller.ai.azure.listener.AzureOpenAIEventSourceListener;
import ai.chat2db.server.web.api.controller.ai.claude.listener.ClaudeAIEventSourceListener;
import ai.chat2db.server.web.api.controller.ai.fastchat.client.FastChatAIClient;
import ai.chat2db.server.web.api.controller.ai.fastchat.embeddings.FastChatEmbeddingResponse;
import ai.chat2db.server.web.api.controller.ai.fastchat.listener.FastChatAIEventSourceListener;
import ai.chat2db.server.web.api.controller.ai.fastchat.model.FastChatMessage;
import ai.chat2db.server.web.api.controller.ai.fastchat.model.FastChatRole;
import ai.chat2db.server.web.api.controller.ai.openai.listener.OpenAIEventSourceListener;
import ai.chat2db.server.web.api.controller.ai.rest.listener.RestAIEventSourceListener;
import ai.chat2db.server.web.api.controller.ai.request.ChatQueryRequest;
import ai.chat2db.server.web.api.controller.ai.request.ChatRequest;
import ai.chat2db.server.web.api.controller.ai.rest.client.RestAIClient;
import ai.chat2db.server.web.api.http.GatewayClientService;
import ai.chat2db.server.web.api.http.model.EsTableSchema;
import ai.chat2db.server.web.api.http.model.TableSchema;
import ai.chat2db.server.web.api.http.request.EsTableSchemaRequest;
import ai.chat2db.server.web.api.http.request.TableSchemaRequest;
import ai.chat2db.server.web.api.http.response.EsTableSchemaResponse;
import ai.chat2db.server.web.api.http.response.TableSchemaResponse;
import ai.chat2db.server.web.api.util.ApplicationContextUtil;
import ai.chat2db.server.web.api.controller.ai.openai.client.OpenAIClient;
import ai.chat2db.spi.model.TableColumn;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONUtil;
import com.alibaba.fastjson2.JSON;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.unfbx.chatgpt.OpenAiApi;
import com.unfbx.chatgpt.entity.chat.Message;
import com.unfbx.chatgpt.entity.embeddings.Embedding;
import com.unfbx.chatgpt.entity.embeddings.EmbeddingResponse;
import io.reactivex.Single;
import jakarta.annotation.Resource;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.CrossOrigin;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestHeader;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
/**
* 描述:
*
@ -501,7 +491,13 @@ public class ChatController {
TableQueryParam queryParam = chatConverter.chat2tableQuery(queryRequest);
properties = buildTableColumn(queryParam, queryRequest.getTableNames());
} else {
properties = querySchemaByEs(queryRequest);
String apiKey = getApiKey();
if (StringUtils.isNotBlank(apiKey)) {
boolean res = gatewayClientService.checkInWhite(new WhiteListRequest(apiKey, WhiteListTypeEnum.VECTOR.getCode())).getData();
if (res) {
properties = queryDatabaseSchema(queryRequest);
}
}
}
String prompt = queryRequest.getMessage();
String promptType = StringUtils.isBlank(queryRequest.getPromptType()) ? PromptType.NL_2_SQL.getCode()
@ -524,6 +520,26 @@ public class ChatController {
return schemaProperty;
}
/**
* query chat2db apikey
*
* @return
*/
private String getApiKey() {
ConfigService configService = ApplicationContextUtil.getBean(ConfigService.class);
Config config = configService.find(RestAIClient.AI_SQL_SOURCE).getData();
String aiSqlSource = AiSqlSourceEnum.CHAT2DBAI.getCode();
// only sync for chat2db ai
if (Objects.isNull(config) || !aiSqlSource.equals(config.getContent())) {
return null;
}
Config keyConfig = configService.find(Chat2dbAIClient.CHAT2DB_OPENAI_KEY).getData();
if (Objects.isNull(keyConfig) || StringUtils.isBlank(keyConfig.getContent())) {
return null;
}
return keyConfig.getContent();
}
/**
* query database type
*
@ -550,7 +566,9 @@ public class ChatController {
*/
public String queryDatabaseSchema(ChatQueryRequest queryRequest) {
// request embedding
FastChatEmbeddingResponse response = distributeAIEmbedding(queryRequest.getMessage());
String input = SegmentUtils.baseAnalysis(queryRequest.getMessage());
log.info("search message:{}", input);
FastChatEmbeddingResponse response = distributeAIEmbedding(input);
List<List<BigDecimal>> contentVector = new ArrayList<>();
contentVector.add(response.getData().get(0).getEmbedding());
@ -574,7 +592,9 @@ public class ChatController {
schemas.add(data.getTableSchema());
}
}
return JSON.toJSONString(schemas);
String res = JSON.toJSONString(schemas);
log.info("search vector result:{}", res);
return res;
} catch (Exception exception) {
log.error("query table error, do nothing");
return "";

View File

@ -77,7 +77,7 @@ public class RdbDdlController extends EmbeddingController {
singleThreadExecutor.submit(() -> {
try {
Chat2DBContext.putContext(connectInfo);
syncTableEs(request);
syncTableVector(request);
} catch (Exception e) {
log.error("sync table vector error", e);
} finally {

View File

@ -68,7 +68,7 @@ public class TableController extends EmbeddingController {
singleThreadExecutor.submit(() -> {
try {
Chat2DBContext.putContext(connectInfo);
syncTableEs(request);
syncTableVector(request);
} catch (Exception e) {
log.error("sync table vector error", e);
} finally {

View File

@ -0,0 +1,94 @@
package ai.chat2db.server.web.api.util;
import lombok.extern.slf4j.Slf4j;
import org.ansj.domain.Result;
import org.ansj.domain.Term;
import org.ansj.splitWord.analysis.BaseAnalysis;
import org.ansj.splitWord.analysis.NlpAnalysis;
import org.ansj.splitWord.analysis.ToAnalysis;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Slf4j
public class SegmentUtils {
/**
* BaseAnalysis
*
* @param content
*/
public static String baseAnalysis(String content) {
Result result = BaseAnalysis.parse(delHTMLTag(content).replace("\n", "").replace(" ", "").replace("\t", ""));
log.info("base analysis result:" + result);
return convertResToString(result);
}
/**
* ToAnalysis
*
* @param content
*/
public static String toAnalysis(String content) {
Result result = ToAnalysis.parse(content);
log.info("to analysis result:" + result);
return convertResToString(result);
}
/**
* NlpAnalysis
*
* @param content
*/
public static String nlpAnalysis(String content) {
Result result = NlpAnalysis.parse(delHTMLTag(content).replace("\n", "").replace(" ", "").replace("\t", ""));
log.info("nlp analysis result:" + result);
return convertResToString(result);
}
/**
* convert result to string
*
* @param result
* @return
*/
private static String convertResToString(Result result) {
List<Term> terms = result.getTerms();
StringBuilder sb = new StringBuilder();
for (Term term : terms) {
String name = term.getName();
String nature = term.getNatureStr();
if (nature.equals("nt") || nature.equals("nr") || nature.equals("n")) {
sb.append(name).append(" ");
}
}
return sb.toString();
}
/**
* delete html tag
*
* @param htmlStr
* @return
*/
public static String delHTMLTag(String htmlStr) {
String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>";
String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>";
String regEx_html = "<[^>]+>";
Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
Matcher m_script = p_script.matcher(htmlStr);
htmlStr = m_script.replaceAll("");
Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
Matcher m_style = p_style.matcher(htmlStr);
htmlStr = m_style.replaceAll("");
Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
Matcher m_html = p_html.matcher(htmlStr);
htmlStr = m_html.replaceAll("");
return htmlStr.trim();
}
}