| import databases |
| import orm |
| import asyncio, os |
| import uuid, random |
| from pydub import AudioSegment |
| from .DescriptAPI import Speak |
|
|
| |
| from .CharacterAPI import CharacterAITTS |
| from .Vercel import AsyncImageGenerator |
| from .Video3d import VideoGenerator |
| import aiohttp |
| from typing import List |
| from pydantic import BaseModel |
| import tempfile |
| import json |
| from .Modal import ModalImageGenerator |
|
|
| SUPABASE = os.environ.get("SUPABASE", "RANDOM_STRING") |
| database_url = SUPABASE |
| database = databases.Database(database_url) |
| models = orm.ModelRegistry(database=database) |
|
|
|
|
| class WordAlignment(BaseModel): |
| text: str |
| alignedWord: str |
| start: float |
| end: float |
| hasFailedAlignment: bool |
|
|
| @classmethod |
| def from_old_format(cls, data: dict, offset: float = 0.0): |
| return cls( |
| text=data["word"], |
| alignedWord=data["alignedWord"], |
| start=data["startTime"] + offset, |
| end=data["endTime"] + offset, |
| hasFailedAlignment=data["hasFailedAlignment"], |
| ) |
|
|
|
|
| def transform_alignment_data(data: List[dict], offset: float = 0.0) -> List[dict]: |
| return [WordAlignment.from_old_format(item, offset).model_dump() for item in data] |
|
|
|
|
| class Project(orm.Model): |
| tablename = "projects" |
| start = 0 |
| registry = models |
| fields = { |
| "id": orm.Integer(primary_key=True), |
| "name": orm.String(max_length=10_000), |
| "aspect_ratio": orm.Float(allow_null=True, default=0), |
| "transcript": orm.JSON(allow_null=True, default=[]), |
| "duration": orm.Integer(allow_null=True, default=0), |
| "assets": orm.JSON(allow_null=True, default=[]), |
| "links": orm.JSON(allow_null=True, default=[]), |
| "constants": orm.JSON(allow_null=True, default={}), |
| } |
|
|
| async def get_all_scenes(self): |
| return await Scene.objects.filter(project=self).order_by("id").all() |
|
|
| async def generate_json(self): |
| project_scenes: List[Scene] = await self.get_all_scenes() |
| self.links = [] |
| self.assets = [] |
| image_assets = [] |
| video_assets = [] |
| audio_assets = [] |
| text_stream = [] |
|
|
| transitions = [ |
| |
| |
| |
| |
| |
| |
| |
| |
| "Happy_transparent.webm", |
| |
| |
| |
| ] |
|
|
| self.links.append( |
| { |
| "file_name": "sfx_1.mp3", |
| "link": "https://dm0qx8t0i9gc9.cloudfront.net/previews/audio/BsTwCwBHBjzwub4i4/click-match_My50GP4u_NWM.mp3?type=preview&origin=AUDIOBLOCKS×tamp_ms=1715843203035&publicKey=kUhrS9sKVrQMTvByQMAGMM0jwRbJ4s31HTPVkfDGmwGhYqzmWJHsjIw5fZCkI7ba&organizationId=105711&apiVersion=2.0&stockItemId=28820&resolution=&endUserId=414d29f16694d76c58e7998200a8dcf6f28dc165&projectId=f734c6d7-e39d-4c1d-8f41-417f94cd37ce&searchId=adb77624-5919-41ee-84c6-58e7af098a6d&searchPageId=9124f65b-3e21-47ac-af6b-81387328b7b5", |
| } |
| ) |
| for scene in project_scenes: |
| _, file_name = os.path.split(scene.narration_path) |
| self.duration += scene.narration_duration |
| self.links.append({"file_name": file_name, "link": scene.narration_link}) |
|
|
| |
| temp = await scene.generate_scene_transcript(offset=self.start) |
| await asyncio.sleep(1) |
| end_word = temp[-1] |
|
|
| |
| audio_assets.append( |
| { |
| "type": "audio", |
| "name": file_name, |
| "start": self.start, |
| "end": end_word["start"], |
| "props": { |
| "startFrom": 0, |
| "endAt": end_word["start"] * 30, |
| |
| }, |
| } |
| ) |
| text_stream.extend(temp[:-1]) |
|
|
| sample_image_extension = scene.images[0].split(".")[-1] |
|
|
| if sample_image_extension == "mp4": |
| |
| for image in scene.images: |
| file_name = str(uuid.uuid4()) + ".mp4" |
| self.links.append({"file_name": file_name, "link": image}) |
| video_assets.append( |
| { |
| "type": "video", |
| "name": file_name, |
| "start": self.start, |
| "loop": "true", |
| "end": self.start + scene.image_duration, |
| "props": { |
| "volume": 0, |
| "startFrom": 1 * 30, |
| "endAt": 2.9 * 30, |
| "playbackRate": 0.7, |
| "style": { |
| "transform": "translate(-50%, -50%)", |
| "position": "absolute", |
| "top": "50%", |
| "left": "50%", |
| "width": 1920, |
| "height": 1080, |
| "objectFit": "cover", |
| }, |
| }, |
| } |
| ) |
| self.start = self.start + scene.image_duration |
|
|
| else: |
| |
| for image in scene.images: |
| file_name = str(uuid.uuid4()) + ".png" |
| self.links.append({"file_name": file_name, "link": image}) |
| image_assets.append( |
| { |
| "type": "image", |
| "name": file_name, |
| "start": self.start, |
| "end": self.start + scene.image_duration, |
| } |
| ) |
| self.start = self.start + scene.image_duration |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| self.assets.append({"type": "audio", "sequence": audio_assets}) |
| |
| self.assets.append({"type": "image", "sequence": image_assets}) |
| self.assets.append( |
| {"type": "video", "sequence": video_assets}, |
| ) |
| self.constants = { |
| "duration": self.duration * 30, |
| "height": 1920, |
| "width": 1080, |
| } |
| self.assets.append({"type": "text", "sequence": text_stream}) |
|
|
| await self.update(**self.__dict__) |
| return {"links": self.links, "assets": self.assets, "constants": self.constants} |
|
|
|
|
| class Scene(orm.Model): |
| tts = CharacterAITTS() |
| voice = "" |
| |
| tablename = "scenes" |
| registry = models |
| fields = { |
| "id": orm.Integer(primary_key=True), |
| "voice": orm.String(max_length=100, allow_null=True, default=""), |
| "project": orm.ForeignKey(Project), |
| "images": orm.JSON(default=None), |
| "narration": orm.String(max_length=10_000, allow_null=True, default=""), |
| "image_prompts": orm.JSON(default=None), |
| "narration_duration": orm.Float(allow_null=True, default=0), |
| "image_duration": orm.Float(allow_null=True, default=0), |
| "narration_path": orm.String( |
| max_length=100, |
| allow_null=True, |
| default="", |
| ), |
| "narration_link": orm.String(max_length=10_000, allow_null=True, default=""), |
| } |
|
|
| async def generate_scene_transcript(self, offset): |
| links = [self.narration_link] |
| text = self.narration + " master" |
| transcript = await self.tts._make_transcript(links=links, text=text) |
| return transform_alignment_data(data=transcript, offset=offset) |
|
|
| async def generate_scene_data( |
| self, reference_image_url: str = None, ip_adapter_weight: float = 0.4 |
| ): |
| |
| await asyncio.gather( |
| self.narrate(), self.generate_images(reference_image_url, ip_adapter_weight) |
| ) |
| self.calculate_durations() |
|
|
| async def narrate(self): |
| link, path = await self.retry_narration_generation() |
| self.narration_path = path |
| self.narration_link = link |
|
|
| async def retry_narration_generation(self): |
| retry_count = 0 |
| while retry_count < 3: |
| try: |
| return await self.tts.say( |
| text=self.narration + " master", speaker=self.voice |
| ) |
| except Exception as e: |
| print(f"Failed to generate narration: {e}") |
| retry_count += 1 |
| await asyncio.sleep(1) |
|
|
| print("Failed to generate narration after 3 attempts.") |
|
|
| def calculate_durations(self): |
| file_format = self.narration_path.split(".")[-1] |
| audio_file = AudioSegment.from_file(self.narration_path, format=file_format) |
| self.narration_duration = int(len(audio_file) / 1000) |
| self.image_duration = self.narration_duration / len(self.image_prompts) |
|
|
| async def generate_images( |
| self, |
| reference_image_url: str = "https://image.lexica.art/full_webp/d6ddd5c5-060c-4aba-b9d0-cf0e02dc65bd", |
| ip_adapter_weight: float = 0.4, |
| ): |
| self.images = [] |
| async with aiohttp.ClientSession() as session: |
| image_generator = ModalImageGenerator(session) |
| for prompt in self.image_prompts: |
| try: |
| image_url = await image_generator.generate_image( |
| prompt, reference_image_url, ip_adapter_weight |
| ) |
| self.images.append(image_url) |
| except Exception as e: |
| print(f"Failed to generate image for prompt '{prompt}': {str(e)}") |
| await asyncio.sleep(1) |
|
|
|
|
| class Transition(orm.Model): |
| tablename = "transitions" |
| registry = models |
| fields = { |
| "id": orm.Integer(primary_key=True), |
| "name": orm.String(max_length=100), |
| "file_path": orm.String(max_length=100), |
| } |
|
|
|
|
| class BackgroundMusic(orm.Model): |
| tablename = "background_music" |
| registry = models |
| fields = { |
| "id": orm.Integer(primary_key=True), |
| "name": orm.String(max_length=100), |
| "file_path": orm.String(max_length=100), |
| } |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
|
|
| |
| |
| |
|
|
| |
| |
| |
| |
|
|