LLM 서빙 API 사용
Last updated
Was this helpful?
Last updated
Was this helpful?
Was this helpful?
import requests
import re
import json
# import base64
serving_id = 43
bearer_token = '인증키입력'
genos_url = 'GenOS 주소'
endpoint = f"{genos_url}/api/gateway/rep/serving/{serving_id}"
headers = dict(Authorization=f"Bearer {bearer_token}")# endpoint의 모델이름을 확인합니다
response = requests.get(endpoint + '/v1/models', headers=headers, timeout=30)
model = response.json()['data'][0]['id']
print(model)payload = {
'model': model,
'messages': [
{'role': 'system', 'content': '당신은 유용한 어시스턴트입니다.'},
{'role': 'user', 'content': '안녕하세요, 잘 지내고 있나요?'}
]
}
response = requests.post(endpoint+'/v1/chat/completions', headers=headers, json=payload, timeout=30)
print(response.json()['choices'][0]['message']['content'])response = requests.post(endpoint+'/v1/chat/completions', headers=headers, json=payload, stream=True)
for line in response.iter_lines():
if line:
data = line.decode('utf-8').removeprefix('data: ')
if data == '[DONE]':
break
json_data = json.loads(data)
choice = json_data.get('choices', [])[0]
message = choice.get('delta', {}).get('content', '')
print(message, end='', flush=True)안녕하세요! 저는 잘 지내고 있습니다. 무엇이든 물어보세요!{
'id': 'chat-5a11ac760ced4c549b02b41ffac3b62b',
'object': 'chat.completion.chunk',
'created': 1737018387,
'model': '/model/snapshots/9eb2daaa8597bf192a8b0e73f848f3a102794df5',
'choices': [
{
'index': 0,
'delta': {'content': ' 지내고'},
'logprobs': None,
'finish_reason': 'stop',
'stop_reason': None
}
]
}headers['x-genos-user-id'] = 653