[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tool-mesh-llm":3,"related-mesh-llm":36},{"id":4,"name":5,"slug":6,"logo_url":7,"tagline":8,"capability_type":9,"pricing_type":10,"platforms":11,"view_count":15,"click_count":16,"featured":17,"is_trending":17,"editor_rating":18,"heat_score":16,"editor_summary":18,"status":19,"created_at":20,"categories":21,"tags":22,"description":23,"website_url":24,"github_url":25,"demo_url":18,"pricing_detail":18,"models":26,"screenshots":27,"features":28,"install_command":18,"compatible_clients":35,"editor_rating_detail":18,"seo_title":18,"seo_description":18,"updated_at":18},"0f23122f-3796-49a9-948a-4a2fa098d924","Mesh LLM","mesh-llm","https:\u002F\u002Fgithub.com\u002Ffavicon.ico","将多台机器的闲置GPU组成分布式推理网格，跑超大模型不再需要单卡80G","tool","open_source",[12,13,14],"cli","mac","linux",3,0,false,null,"published","2026-04-06T01:06:03",[],[],"Mesh LLM 是一个开源的分布式 LLM 推理工具，能将多台机器上的闲置 GPU 算力整合为统一的推理资源池，暴露为 OpenAI 兼容 API（localhost:9337）。密集模型自动使用流水线并行分层到各节点，MoE 模型使用专家分片实现零跨节点推理流量。支持零传输 GGUF 加载（模型加载从 111 秒降至 5 秒）、Speculative Decoding（代码生成吞吐量提升 38%）、多模态请求（视觉+音频）。提供 Web 控制台查看拓扑和 VRAM 状态。支持公共网格自动发现或创建私有命名网格进行团队协作。基于 Rust + TypeScript 构建，使用修改版 llama.cpp 作为底层推理引擎。","https:\u002F\u002Fdocs.anarchai.org\u002F","https:\u002F\u002Fgithub.com\u002Fmichaelneale\u002Fmesh-llm",[],[],[29,30,31,32,33,34],"多机GPU池化","OpenAI兼容API","流水线并行","专家分片MoE","零传输GGUF加载","Web控制台",[],[]]