Everything appended to the end of an API call makes it such that very few providers will accept it. You are basically asking the API to do everything everywhere
While--on open router-- for example this routes to needlessly expensive endpoints. Please stop. All of this is needlessly appended:
generation.","parameters":{"type":"object","properties":{"request":{"type":"object","properties":{"lyrics":{"anyOf":[{"maxLength":1000,"type":"string"},{"type":"null"}],"description":"The song lyrics, if relevant. Each part should begin with a structure tag ([verse], [chorus], [bridge], [intro-short], [intro-medium], [outro-short], [outro-medium], [outro-long], [inst-short], [inst-medium], [inst-long], [silence]) and a newline, and end with two newlines. These segments should not contain lyrics: ([intro], [intro-short], [intro-medium], [inst], [inst-short], [inst-medium], [outro], [outro-short], [outro-medium])"},"genre":{"type":"string","enum":["pop","rhythm_and_blues","dance","jazz","folk","rock","chinese_modern","eastern_traditional","eastern_opera","metal","reggae","automatic"],"description":"The musical genre."},"music_type":{"type":"string","enum":["instrumental","vocals","mixed"],"description":"Whether to make instrumental music, only vocals, or a fully mixed song. If requesting vocals or mixed, lyrics MUST be provided."},"duration":{"type":"integer","minimum":1,"description":"The desired duration, in seconds, of the video, sound effect, or other media. Does not affect music length."},"end_image":{"type":"string","description":"The final frame image for image-to-video, as a URL or base64 string. If you want a video to loop, give the same start and end image."},"fps":{"type":"integer","minimum":1,"maximum":24,"description":"The number of frames per second."},"i2i_type":{"type":"string","enum":["canny","face","edit"],"description":"Whether to do edit-based i2i (edit), edge-based i2i (canny), or face/person-based i2i (face). Edit preserves the most of the structure and style of the original image; canny preserves the image structure, but not its style; face preserves neither, but keeps the visual identity/face of a person in the source image."},"mask_image":{"type":"string","description":"The mask image, as a URL or base64 string."},"preserve_transparency":{"type":"boolean","description":"If given, the transparency of the non-mask image(s) will be preserved. Useful if your input image has clear parts and you want to keep them clear."},"init_images":{"type":"array","items":{"type":"string"},"description":"Optional. Reference images. A list of base64 image string(s) and/or URL(s). If not provided, text-to-image will be done on the text prompt. Because of this, it is recommended to use text-to-image first and select the preferred appearance."},"octree_resolution":{"type":"integer","minimum":17,"maximum":512,"description":"The octree resolution to generate for 3D models. Lower resolutions will generate faster, but be less detailed."},"texture_size":{"type":"integer","minimum":16,"maximum":2048,"description":"The pixel width of the generated texture for 3D models. Lower resolutions will generate faster, but be less detailed."},"target_face_number":{"type":"integer","minimum":10,"maximum":100000,"description":"The number of faces to decimate the mesh to after generation. If not given, the mesh will not be simplified. Setting this, especially to very low values, can make a request take noticeably longer."},"output_type":{"type":"string","enum":["glb","stl","obj","ply","vrm"],"description":"What file type to output."},"speed":{"type":"string","enum":["turbo","normal"],"description":"Enabling turbo will make generations much faster but with slightly worse output."},"do_texture":{"type":"boolean","description":"Whether to generate a texture as well."},"do_autorig":{"type":"boolean","description":"Whether to generate a rig as well."},"autorig_type":{"type":"string","enum":["biped","quadruped","unknown","none"],"description":"The type of rigging to use. Bipedal is common and will create much better rigs for humans and other bipeds. The quadruped option is best for non-anthropomorphic animals. If you are not sure which to pick, use unknown."},"transcript":{"type":"string","description":"The text to read out."},"voice":{"type":"string","enum":["bright_female_20s","gentle_female_30s","whispery_female_40s","formal_female_30s","professional_female_30s","resonant_male_40s","light_male_20s","animated_male_20s","calm_female_20s"],"description":"Select a voice to use."},"seed":{"type":"integer","description":"Use this only if reproducible results are important."},"prompt":{"type":"string","description":"It should be descriptive of exactly what you want, and fairly detailed."},"negative_prompt":{"type":"string","description":"What you do not want in the result."},"aspect_ratio":{"type":"string","enum":["w1h1","w1h2","w2h1","w16h9","w21h9","w2h3","w3h2","w3h4","w4h3","w4h5","w5h4","w9h16","w9h21"],"description":"The desired aspect ratio."},"num_inference_steps":{"type":"integer","minimum":2,"maximum":100,"description":"Larger values may produce better outputs at the cost of longer generation times. Values over 50 are usually overkill."},"remove_background":{"type":"boolean","description":"Whether to remove the background of any image(s) before/after this operation."},"init_image":{"type":"string","description":"Base64 image string, or a URL to an image."},"strength":{"type":"number","minimum":0.001,"maximum":1,"description":"The strength of any image-to-image transformation."},"generation_type":{"type":"string","enum":["text_to_speech","text_to_image","text_to_video","image_to_image","image_to_video","text_to_sfx","text_to_music","text_to_3d","image_to_3d"],"description":"The type of media to generate."}}},"return_raw":{"type":"boolean","description":"If true, the raw media files will be returned also, in base64 format visible to the user only."}},"required":["request"]}}}],"tool_choice":"auto"}
Everything appended to the end of an API call makes it such that very few providers will accept it. You are basically asking the API to do everything everywhere
While--on open router-- for example this routes to needlessly expensive endpoints. Please stop. All of this is needlessly appended:
generation.","parameters":{"type":"object","properties":{"request":{"type":"object","properties":{"lyrics":{"anyOf":[{"maxLength":1000,"type":"string"},{"type":"null"}],"description":"The song lyrics, if relevant. Each part should begin with a structure tag ([verse], [chorus], [bridge], [intro-short], [intro-medium], [outro-short], [outro-medium], [outro-long], [inst-short], [inst-medium], [inst-long], [silence]) and a newline, and end with two newlines. These segments should not contain lyrics: ([intro], [intro-short], [intro-medium], [inst], [inst-short], [inst-medium], [outro], [outro-short], [outro-medium])"},"genre":{"type":"string","enum":["pop","rhythm_and_blues","dance","jazz","folk","rock","chinese_modern","eastern_traditional","eastern_opera","metal","reggae","automatic"],"description":"The musical genre."},"music_type":{"type":"string","enum":["instrumental","vocals","mixed"],"description":"Whether to make instrumental music, only vocals, or a fully mixed song. If requesting vocals or mixed, lyrics MUST be provided."},"duration":{"type":"integer","minimum":1,"description":"The desired duration, in seconds, of the video, sound effect, or other media. Does not affect music length."},"end_image":{"type":"string","description":"The final frame image for image-to-video, as a URL or base64 string. If you want a video to loop, give the same start and end image."},"fps":{"type":"integer","minimum":1,"maximum":24,"description":"The number of frames per second."},"i2i_type":{"type":"string","enum":["canny","face","edit"],"description":"Whether to do edit-based i2i (edit), edge-based i2i (canny), or face/person-based i2i (face). Edit preserves the most of the structure and style of the original image; canny preserves the image structure, but not its style; face preserves neither, but keeps the visual identity/face of a person in the source image."},"mask_image":{"type":"string","description":"The mask image, as a URL or base64 string."},"preserve_transparency":{"type":"boolean","description":"If given, the transparency of the non-mask image(s) will be preserved. Useful if your input image has clear parts and you want to keep them clear."},"init_images":{"type":"array","items":{"type":"string"},"description":"Optional. Reference images. A list of base64 image string(s) and/or URL(s). If not provided, text-to-image will be done on the text prompt. Because of this, it is recommended to use text-to-image first and select the preferred appearance."},"octree_resolution":{"type":"integer","minimum":17,"maximum":512,"description":"The octree resolution to generate for 3D models. Lower resolutions will generate faster, but be less detailed."},"texture_size":{"type":"integer","minimum":16,"maximum":2048,"description":"The pixel width of the generated texture for 3D models. Lower resolutions will generate faster, but be less detailed."},"target_face_number":{"type":"integer","minimum":10,"maximum":100000,"description":"The number of faces to decimate the mesh to after generation. If not given, the mesh will not be simplified. Setting this, especially to very low values, can make a request take noticeably longer."},"output_type":{"type":"string","enum":["glb","stl","obj","ply","vrm"],"description":"What file type to output."},"speed":{"type":"string","enum":["turbo","normal"],"description":"Enabling turbo will make generations much faster but with slightly worse output."},"do_texture":{"type":"boolean","description":"Whether to generate a texture as well."},"do_autorig":{"type":"boolean","description":"Whether to generate a rig as well."},"autorig_type":{"type":"string","enum":["biped","quadruped","unknown","none"],"description":"The type of rigging to use. Bipedal is common and will create much better rigs for humans and other bipeds. The quadruped option is best for non-anthropomorphic animals. If you are not sure which to pick, use unknown."},"transcript":{"type":"string","description":"The text to read out."},"voice":{"type":"string","enum":["bright_female_20s","gentle_female_30s","whispery_female_40s","formal_female_30s","professional_female_30s","resonant_male_40s","light_male_20s","animated_male_20s","calm_female_20s"],"description":"Select a voice to use."},"seed":{"type":"integer","description":"Use this only if reproducible results are important."},"prompt":{"type":"string","description":"It should be descriptive of exactly what you want, and fairly detailed."},"negative_prompt":{"type":"string","description":"What you do not want in the result."},"aspect_ratio":{"type":"string","enum":["w1h1","w1h2","w2h1","w16h9","w21h9","w2h3","w3h2","w3h4","w4h3","w4h5","w5h4","w9h16","w9h21"],"description":"The desired aspect ratio."},"num_inference_steps":{"type":"integer","minimum":2,"maximum":100,"description":"Larger values may produce better outputs at the cost of longer generation times. Values over 50 are usually overkill."},"remove_background":{"type":"boolean","description":"Whether to remove the background of any image(s) before/after this operation."},"init_image":{"type":"string","description":"Base64 image string, or a URL to an image."},"strength":{"type":"number","minimum":0.001,"maximum":1,"description":"The strength of any image-to-image transformation."},"generation_type":{"type":"string","enum":["text_to_speech","text_to_image","text_to_video","image_to_image","image_to_video","text_to_sfx","text_to_music","text_to_3d","image_to_3d"],"description":"The type of media to generate."}}},"return_raw":{"type":"boolean","description":"If true, the raw media files will be returned also, in base64 format visible to the user only."}},"required":["request"]}}}],"tool_choice":"auto"}