LLM 서빙 API 사용
Was this helpful?
Was this helpful?
Was this helpful?
import requests
import re
import json
serving_id = 15
bearer_token = 'GenOS에서 발급한 인증키'
genos_url = 'GenOS 주소'
url = f"{genos_url}/api/gateway/rep/serving/{llm_id}"
headers = dict(Authorization=f"Bearer {bearer_token}")
endpoint = f"{url}/v1/models"
res = requests.get(endpoint, headers=headers)
print(res.json()){
'object': 'list',
'data': [
{
'id': '/model/snapshots/9eb2daaa8597bf192a8b0e73f848f3a102794df5',
'object': 'model',
'created': 1737017356,
'owned_by': 'vllm',
'root': '/model/snapshots/9eb2daaa8597bf192a8b0e73f848f3a102794df5',
'parent': None,
'max_model_len': 2048,
'permission': [
{
'id': 'modelperm-8afda8e84a154576a911c5004602d0c9',
'object': 'model_permission',
'created': 1737017356,
'allow_create_engine': False,
'allow_sampling': True,
'allow_logprobs': True,
'allow_search_indices': False,
'allow_view': True,
'allow_fine_tuning': False,
'organization': '*',
'group': None,
'is_blocking': False
}
]
}
]
}body = {
'model': '/model/snapshots/9eb2daaa8597bf192a8b0e73f848f3a102794df5',
'messages': [
{'role': 'system', 'content': '당신은 유용한 어시스턴트입니다.'},
{'role': 'user', 'content': '안녕하세요, 잘 지내고 있나요?'}
]
}
endpoint = f"{url}/v1/chat/completions"
res = requests.post(endpoint, headers=headers, json=body)
print(res.json()){
'id': 'chat-23c86687e76242b38ce378920edf36ca',
'object': 'chat.completion',
'created': 1737017673,
'model': '/model/snapshots/9eb2daaa8597bf192a8b0e73f848f3a102794df5',
'choices': [
{
'index': 0,
'message': {
'role': 'assistant',
'content': '안녕하세요! 저는 잘 지내고 있습니다. 무엇이든 물어보세요!',
'tool_calls': []
},
'logprobs': None,
'finish_reason': 'stop',
'stop_reason': None
}
],
'usage': {
'prompt_tokens': 70,
'total_tokens': 94,
'completion_tokens': 24
},
'prompt_logprobs': None
}body = {
'model': '/model/snapshots/9eb2daaa8597bf192a8b0e73f848f3a102794df5',
'messages': [
{'role': 'system', 'content': '당신은 유용한 어시스턴트입니다.'},
{'role': 'user', 'content': '안녕하세요, 잘 지내고 있나요?'}
],
'stream': True
}
endpoint = f"{url}/v1/chat/completions"
response = requests.post(endpoint, headers=headers, json=body, stream=True)
for line in res.iter_lines():
if line:
data = line.decode('utf-8').removeprefix('data: ')
if data == '[DONE]':
break
json_data = json.loads(data)
choice = json_data.get('choices', [])[0]
message = choice.get('delta', {}).get('content', '')
print(message, end='', flush=True)안녕하세요! 저는 잘 지내고 있습니다. 무엇이든 물어보세요!{
'id': 'chat-5a11ac760ced4c549b02b41ffac3b62b',
'object': 'chat.completion.chunk',
'created': 1737018387,
'model': '/model/snapshots/9eb2daaa8597bf192a8b0e73f848f3a102794df5',
'choices': [
{
'index': 0,
'delta': {'content': ' 지내고'},
'logprobs': None,
'finish_reason': 'stop',
'stop_reason': None
}
]
}headers['x-genos-user-id'] = 653