File tree 4 files changed +5
-6
lines changed
doc/source/getting_started
4 files changed +5
-6
lines changed Original file line number Diff line number Diff line change @@ -27,7 +27,7 @@ Starting the Supervisor
27
27
On the server where you want to run the Xinference supervisor, run the following command:
28
28
29
29
.. code-block :: bash
30
-
30
+
31
31
xinference-supervisor -H " ${supervisor_host} "
32
32
33
33
Replace ${supervisor_host} with the actual host of your supervisor server.
@@ -38,8 +38,7 @@ Starting the Workers
38
38
On each of the other servers where you want to run Xinference workers, run the following command:
39
39
40
40
.. code-block :: bash
41
-
41
+
42
42
xinference-worker -e " http://${supervisor_host} :9997"
43
43
44
44
Once Xinference is running, an endpoint will be accessible for model management via CLI or Xinference client.
45
-
Original file line number Diff line number Diff line change @@ -43,8 +43,8 @@ def _install():
43
43
from .pytorch .core import PytorchChatModel , PytorchModel
44
44
from .pytorch .falcon import FalconPytorchChatModel , FalconPytorchModel
45
45
from .pytorch .llama_2 import LlamaPytorchChatModel , LlamaPytorchModel
46
- from .pytorch .vicuna import VicunaPytorchChatModel
47
46
from .pytorch .rwkv import RWKVPilePytorchModel
47
+ from .pytorch .vicuna import VicunaPytorchChatModel
48
48
from .vllm .core import VLLMChatModel , VLLMModel
49
49
50
50
# register llm classes.
Original file line number Diff line number Diff line change @@ -39,7 +39,7 @@ def __init__(
39
39
40
40
def _load_model (self , kwargs : dict ):
41
41
try :
42
- from transformers import RwkvForCausalLM , AutoTokenizer
42
+ from transformers import AutoTokenizer , RwkvForCausalLM
43
43
except ImportError :
44
44
error_message = "Failed to import module 'transformers'"
45
45
installation_guide = [
@@ -75,4 +75,3 @@ def match(
75
75
if "generate" not in llm_family .model_ability :
76
76
return False
77
77
return True
78
-
Original file line number Diff line number Diff line change @@ -587,6 +587,7 @@ def generate_stream_chatglm(
587
587
588
588
yield completion_chunk , completion_usage
589
589
590
+
590
591
@torch .inference_mode ()
591
592
def generate_stream_rwkv (
592
593
model ,
You can’t perform that action at this time.
0 commit comments