mirror of
https://github.com/lmstudio-ai/lms.git
synced 2025-09-18 19:06:03 +08:00
lms chat (#227)
* first working chat example "lms ask sup" or "lms chat" * print streaming response instead of waiting until the end * visual polish * removed ask alias, chat only. --prompt or -p to provide initial prompt (quits after response). model selection i.e. "lms chat phi-4" * use existing Chat class for context management, use existing optionalPositional() for model arg * prettier changes, added types, added quit instructions * allow combining -p with stdin for processing piped content with a prompt
This commit is contained in:
@ -1,5 +1,6 @@
|
|||||||
import { run, subcommands } from "cmd-ts";
|
import { run, subcommands } from "cmd-ts";
|
||||||
import { bootstrap } from "./subcommands/bootstrap.js";
|
import { bootstrap } from "./subcommands/bootstrap.js";
|
||||||
|
import { chat } from "./subcommands/chat.js";
|
||||||
import { clone } from "./subcommands/clone.js";
|
import { clone } from "./subcommands/clone.js";
|
||||||
import { create } from "./subcommands/create.js";
|
import { create } from "./subcommands/create.js";
|
||||||
import { dev } from "./subcommands/dev.js";
|
import { dev } from "./subcommands/dev.js";
|
||||||
@ -25,6 +26,7 @@ if (process.argv.length === 2) {
|
|||||||
const cli = subcommands({
|
const cli = subcommands({
|
||||||
name: "lms",
|
name: "lms",
|
||||||
cmds: {
|
cmds: {
|
||||||
|
chat,
|
||||||
status,
|
status,
|
||||||
server,
|
server,
|
||||||
ls,
|
ls,
|
||||||
|
169
src/subcommands/chat.ts
Normal file
169
src/subcommands/chat.ts
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
import { Chat, type LLM } from "@lmstudio/sdk";
|
||||||
|
import { command, option, optional, string } from "cmd-ts";
|
||||||
|
import * as readline from "readline";
|
||||||
|
import { createClient, createClientArgs } from "../createClient.js";
|
||||||
|
import { createLogger, logLevelArgs } from "../logLevel.js";
|
||||||
|
import { optionalPositional } from "../optionalPositional.js";
|
||||||
|
|
||||||
|
async function readStdin(): Promise<string> {
|
||||||
|
return new Promise(resolve => {
|
||||||
|
let input = "";
|
||||||
|
process.stdin.setEncoding("utf-8");
|
||||||
|
|
||||||
|
process.stdin.on("data", chunk => {
|
||||||
|
input += chunk;
|
||||||
|
});
|
||||||
|
|
||||||
|
process.stdin.on("end", () => {
|
||||||
|
resolve(input.trim());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export const chat = command({
|
||||||
|
name: "chat",
|
||||||
|
description: "Open an interactive chat with the currently loaded model.",
|
||||||
|
args: {
|
||||||
|
...logLevelArgs,
|
||||||
|
...createClientArgs,
|
||||||
|
model: optionalPositional({
|
||||||
|
displayName: "model",
|
||||||
|
description: "Model name to use",
|
||||||
|
type: string,
|
||||||
|
default: "",
|
||||||
|
}),
|
||||||
|
prompt: option({
|
||||||
|
type: optional(string),
|
||||||
|
long: "prompt",
|
||||||
|
short: "p",
|
||||||
|
description: "Print response to stdout and quit",
|
||||||
|
}),
|
||||||
|
systemPrompt: option({
|
||||||
|
type: optional(string),
|
||||||
|
long: "system-prompt",
|
||||||
|
short: "s",
|
||||||
|
description: "Custom system prompt to use for the chat",
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
async handler(args) {
|
||||||
|
const logger = createLogger(args);
|
||||||
|
const client = await createClient(logger, args);
|
||||||
|
|
||||||
|
let initialPrompt = "";
|
||||||
|
if (args.prompt) {
|
||||||
|
initialPrompt = args.prompt;
|
||||||
|
if (!process.stdin.isTTY) {
|
||||||
|
const stdinContent = await readStdin();
|
||||||
|
initialPrompt = `${initialPrompt}\n\n${stdinContent}`;
|
||||||
|
}
|
||||||
|
} else if (!process.stdin.isTTY) {
|
||||||
|
initialPrompt = await readStdin();
|
||||||
|
}
|
||||||
|
|
||||||
|
let model: LLM;
|
||||||
|
if (args.model) {
|
||||||
|
try {
|
||||||
|
model = await client.llm.model(args.model);
|
||||||
|
} catch (e) {
|
||||||
|
logger.error(`Model "${args.model}" not found, check available models with:`);
|
||||||
|
logger.error(" lms ls");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
model = await client.llm.model();
|
||||||
|
} catch (e) {
|
||||||
|
logger.error("No loaded default model found, load one first:");
|
||||||
|
logger.error(" lms load");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!initialPrompt) {
|
||||||
|
logger.info(`Chatting with ${model.identifier}. Type 'exit', 'quit' or Ctrl+C to quit`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const chat = Chat.empty();
|
||||||
|
chat.append(
|
||||||
|
"system",
|
||||||
|
args.systemPrompt ??
|
||||||
|
"You are a technical AI assistant. Answer questions clearly, concisely and to-the-point.",
|
||||||
|
);
|
||||||
|
|
||||||
|
if (initialPrompt) {
|
||||||
|
chat.append("user", initialPrompt);
|
||||||
|
try {
|
||||||
|
const prediction = model.respond(chat);
|
||||||
|
let lastFragment = "";
|
||||||
|
for await (const fragment of prediction) {
|
||||||
|
process.stdout.write(fragment.content);
|
||||||
|
lastFragment = fragment.content;
|
||||||
|
}
|
||||||
|
const result = await prediction.result();
|
||||||
|
chat.append("assistant", result.content);
|
||||||
|
|
||||||
|
if (!lastFragment.endsWith("\n")) {
|
||||||
|
// Newline before new shell prompt if not already there
|
||||||
|
process.stdout.write("\n");
|
||||||
|
}
|
||||||
|
process.exit(0);
|
||||||
|
} catch (err) {
|
||||||
|
logger.error("Error during chat:", err);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (process.stdin.isTTY) {
|
||||||
|
const rl = readline.createInterface({
|
||||||
|
input: process.stdin,
|
||||||
|
output: process.stdout,
|
||||||
|
prompt: "› ",
|
||||||
|
});
|
||||||
|
|
||||||
|
process.stdout.write("\n");
|
||||||
|
rl.prompt();
|
||||||
|
|
||||||
|
rl.on("line", async (line: string) => {
|
||||||
|
const input = line.trim();
|
||||||
|
if (input === "exit" || input === "quit") {
|
||||||
|
rl.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip empty input
|
||||||
|
if (!input) {
|
||||||
|
rl.prompt();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
chat.append("user", input);
|
||||||
|
process.stdout.write("\n● ");
|
||||||
|
const prediction = model.respond(chat);
|
||||||
|
|
||||||
|
// Temporarily pause the readline interface
|
||||||
|
rl.pause();
|
||||||
|
|
||||||
|
for await (const fragment of prediction) {
|
||||||
|
process.stdout.write(fragment.content);
|
||||||
|
}
|
||||||
|
const result = await prediction.result();
|
||||||
|
chat.append("assistant", result.content);
|
||||||
|
|
||||||
|
// Resume readline and write a new prompt
|
||||||
|
process.stdout.write("\n\n");
|
||||||
|
rl.resume();
|
||||||
|
rl.prompt();
|
||||||
|
} catch (err) {
|
||||||
|
logger.error("Error during chat:", err);
|
||||||
|
rl.prompt();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
rl.on("close", () => {
|
||||||
|
process.exit(0);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
Reference in New Issue
Block a user