DEV Community

SupermanSpace
SupermanSpace

Posted on

Unity + OpenAI Vision and Voice

Image

Hey Unity devs!🌟

Let us explore how to integrate OpenAI with Unity today with two powerful scripts from this amazing github repository. Big props to the creative minds behind it! πŸ™Œ

TextToSpeech ScriptπŸ—£οΈ:

Have you ever wanted to convert text to speech without any effort? The TextToSpeech script can do just that! It uses OpenAI's magic to transform your text into a masterwork of audio. Here's a little sample of what it can achieve:

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.Networking;
using System.Text;
using System.IO;

public class TextToSpeech : MonoBehaviour
{
    private string apiKey = "YOUR_API_KEY"; 
    private string baseUrl = "https://api.openai.com/v1/audio/speech";
    private string model = "tts-1";
    private string voice = "alloy";
    private string inputText = "Hello World, This is a test to see the TTS of OpenAI!";
    private string audioFileName = "speech.mp3";

    private void Start()
    {
        StartCoroutine(GenerateSpeech());
    }

    private IEnumerator GenerateSpeech()
    {

        var payload = new
        {
            model = model,
            voice = voice,
            input = inputText
        };

        // Convert the payload to a JSON string.
        string jsonPayload = JsonUtility.ToJson(payload);

        using (UnityWebRequest www = new UnityWebRequest(baseUrl, "POST"))
        {
            www.uploadHandler = new UploadHandlerRaw(Encoding.UTF8.GetBytes(jsonPayload));
            www.downloadHandler = new DownloadHandlerBuffer();
            www.SetRequestHeader("Authorization", "Bearer " + apiKey);
            www.SetRequestHeader("Content-Type", "application/json");

            yield return www.SendWebRequest();

            if (www.result == UnityWebRequest.Result.Success)
            {
                // Saving the audio data as an MP3 file.
                File.WriteAllBytes(audioFileName, www.downloadHandler.data);
                Debug.Log("Audio file saved as: " + audioFileName);
            }
            else
            {
                Debug.LogError("Failed to generate speech: " + www.error);
            }
        }
    }
}
Enter fullscreen mode Exit fullscreen mode

Just enter in your text, hit play, and voila! Your text is now a melody. πŸŽ‰

Image

OpenAI Vision Script πŸ“Έ:

Let us move on to discuss the AI Vision script, a script that lets you use OpenAI's GPT-4 Vision model to intelligently query images. Notice how simple it is:

using System.Collections;
using System.Collections.Generic;
using [System.IO](http://system.io/);
using UnityEngine;
using UnityEngine.Networking;

public class AIVision : MonoBehaviour
{
[SerializeField] private string openAIUrl = "https://api.openai.com/v1/chat/completions";
[SerializeField] private string apiKey = "YOUR_API_KEY";

public string[] imageUrls;
public string queryMessage = "What are in these images? Is there any difference between them?";

void Start()
{
    if (imageUrls.Length > 0)
    {
        StartCoroutine(PostImageQueryRequest(imageUrls));
    }
}

public void OnClickSend()
{
StartCoroutine(PostImageQueryRequest(imageUrls));
}

IEnumerator PostImageQueryRequest(string[] urls)
{
    var requestBody = new
    {
        model = "gpt-4-vision-preview",
        messages = BuildImageQueryMessages(urls),
        max_tokens = 300
    };

    string json = JsonUtility.ToJson(requestBody);

    using (UnityWebRequest webRequest = UnityWebRequest.Post(openAIUrl, "POST"))
    {
        byte[] jsonToSend = new System.Text.UTF8Encoding().GetBytes(json);
        webRequest.uploadHandler = new UploadHandlerRaw(jsonToSend);
        webRequest.uploadHandler.contentType = "application/json";
        webRequest.downloadHandler = new DownloadHandlerBuffer();
        webRequest.SetRequestHeader("Content-Type", "application/json");
        webRequest.SetRequestHeader("Authorization", "Bearer " + apiKey);

        yield return webRequest.SendWebRequest();

        if (webRequest.result != UnityWebRequest.Result.Success)
        {
            Debug.LogError("Error: " + webRequest.error);
        }
        else
        {
            Debug.Log("Response: " + webRequest.downloadHandler.text);
        }
    }
}

private object[] BuildImageQueryMessages(string[] urls)
{
    var messages = new List<object>
    {
        new { type = "text", text = queryMessage }
    };

    foreach (var url in urls)
    {
        messages.Add(new { type = "image_url", image_url = url });
    }

    return messages.ToArray();
}

public void LoadImagesFromFile(string filePath)
{
    if (!File.Exists(filePath))
    {
        Debug.LogError("File not found: " + filePath);
        return;
    }

    try
    {
        string[] lines = File.ReadAllLines(filePath);
        StartCoroutine(PostImageQueryRequest(lines));
    }
    catch (IOException e)
    {
        Debug.LogError("Error reading the file: " + e.Message);
    }
 }
}
Enter fullscreen mode Exit fullscreen mode

With just a snippet of code, you're equipped to query images and receive insightful responses. How cool is that? 😎

Image

How to Get Started:

  1. - API Key: Grab your OpenAI API key.
  2. - Integration: Copy these scripts into your Unity project.
  3. - Customisation: Tweak parameters to fit your needs.
  4. - Run: Hit play and enjoy!

Feel free to explore the full capabilities, and don't forget to give a shoutout to the creators of this repository!

Top comments (0)