forked from aazam-gh/clarifai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
123 lines (92 loc) · 4.94 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import time
import streamlit as st
from dotenv import load_dotenv
import os
from clarifai.client.model import Model
import json
import base64
from moviepy.editor import ImageSequenceClip
from moviepy.editor import AudioFileClip
from moviepy.editor import vfx
load_dotenv()
clarifai_pat = os.getenv('CLARIFAI_PAT')
def generate_prompts(user_prompt):
system_prompt = "You are an experienced social media growth and content maker. Your job is to make a short form video of 30 secs long based on the user's prompt. To do this you will generate 8 detailed prompts for feeding into DALLE to generate the images for the video and 1 tts prompt explaining the entire video with the help of the generated images. Return only the prompts as JSON response. The JSON should be in a format of image_prompts:[prompt:text] and tts_prompt:text"
inference_params = dict(temperature=0.9 ,top_p=1, max_tokens=2048, system_prompt=system_prompt)
# Model Predict
model_prediction = Model("https://clarifai.com/openai/chat-completion/models/gpt-4-turbo").predict_by_bytes(user_prompt.encode(), input_type="text", inference_params=inference_params)
testin = model_prediction.outputs[0].data.text.raw
clean_output = testin.replace('json', '', 1).strip('`')
valid = validate_json(clean_output)
return valid
def validate_json(json_str):
try:
json.loads(json_str)
return json.loads(json_str)
except json.JSONDecodeError:
return False
def generate_image(prompt, index):
inference_params = dict(quality="standard", size='1024x1792')
model_prediction = Model("https://clarifai.com/openai/dall-e/models/dall-e-3").predict_by_bytes(prompt.encode(), input_type="text", inference_params=inference_params)
output_base64 = model_prediction.outputs[0].data.image.base64
filename = f'image_{index:03d}.png'
with open(filename, 'wb') as f:
f.write(output_base64)
return filename
def create_video(image_filenames, fps):
# Create a video clip from images
clip = ImageSequenceClip(image_filenames, fps=fps)
audio = AudioFileClip("audio.mp3")
clip = clip.fx(vfx.speedx, 0.5)
clip = clip.set_audio(audio)
clip.write_videofile("output.mp4", codec="libx264")
def understand_file(uploaded_file):
system_prompt = "You are an experienced social media growth and content maker. Your job is to make a short form video of 30 secs long based on the user's image. To do this you will generate 8 detailed prompts for feeding into DALLE to generate the images for the video and 1 tts prompt explaining the entire video with the help of the generated images. Return only the prompts as JSON response. The JSON should be in a format of image_prompts:[prompt:text] and tts_prompt:text"
base64image = base64.b64encode(uploaded_file).decode('utf-8')
inference_params = dict(temperature=0.2, max_tokens=100, image_base64=uploaded_file)
model_prediction = Model("https://clarifai.com/openai/chat-completion/models/gpt-4-vision").predict_by_bytes(system_prompt.encode(), input_type="text", inference_params=inference_params)
print(model_prediction.outputs[0].data.text.raw)
pass
def generate_tts(prompt):
inference_params = dict(voice="alloy", speed=1.0)
model_prediction = Model("https://clarifai.com/openai/tts/models/openai-tts-1-hd").predict_by_bytes(prompt.encode(), input_type="text", inference_params=inference_params)
output_base64 = model_prediction.outputs[0].data.audio.base64
with open("audio.mp3", "wb") as f:
f.write(output_base64)
return output_base64
def main(user_prompt):
prompts = generate_prompts(user_prompt)
image_filenames = []
testing = generate_tts(prompts["tts_prompt"])
for i in range(len(prompts["image_prompts"])):
filename = generate_image(prompts["image_prompts"][i]["prompt"], i)
image_filenames.append(filename)
create_video(image_filenames, fps=0.5)
for file_name in image_filenames:
os.remove(file_name)
os.remove("audio.mp3")
video_file = open('output.mp4', 'rb')
video_bytes = video_file.read()
st.video(video_bytes)
st.title("AI Reels Generator")
rad=st.sidebar.radio("Navigation",["Home","About Us"])
if rad=="Home":
user_prompt = st.text_input("Enter the description of the video you want", " ")
# Generate image button
if st.button("Generate Video from prompt"):
try:
main(user_prompt)
with open("output.mp4", "rb") as file:
btn = st.download_button(
label="Download Video",
data=file,
file_name="output.mp4",
mime="video/mp4"
)
except Exception as e:
st.error(f"Error: {e}")
uploaded_file=st.file_uploader("Upload a file to use it as the video script")
if st.button("Generate Video from file") and uploaded_file is not None:
bytes_data = uploaded_file.read()
st.text(bytes_data)
vid = understand_file(bytes_data)