diff --git a/apps/cf/lib/llms/statements_creator.ex b/apps/cf/lib/llms/statements_creator.ex index 5005b198..f6337546 100644 --- a/apps/cf/lib/llms/statements_creator.ex +++ b/apps/cf/lib/llms/statements_creator.ex @@ -30,29 +30,38 @@ defmodule CF.LLMs.StatementsCreator do Create statements from a video that has captions using LLMs """ def process_video!(video_id) do - DB.Schema.Video - |> join(:inner, [v], vc in DB.Schema.VideoCaption, on: v.id == vc.video_id) - |> where([v, vc], v.id == ^video_id) - |> order_by([v, vc], desc: vc.inserted_at) - |> limit(1) - |> select([v, vc], {v, vc}) - |> DB.Repo.one() - |> case do + video = DB.Repo.get(DB.Schema.Video, video_id) + video_caption = fetch_or_download_captions(video) + + if video_caption != nil do + video_caption.parsed + |> chunk_captions() + |> Enum.map(fn captions -> + video + |> get_llm_suggested_statements(captions) + |> filter_known_statements(video) + |> create_statements_from_inputs(video) + |> broadcast_statements(video) + + Process.sleep(500) + end) + end + end + + defp fetch_or_download_captions(video) do + case DB.Schema.VideoCaption + |> where([vc], vc.video_id == ^video.id) + |> order_by(desc: :inserted_at) + |> limit(1) + |> DB.Repo.one() do nil -> - raise "Video or captions not found" - - {video, video_caption} -> - video_caption.parsed - |> chunk_captions() - |> Enum.map(fn captions -> - video - |> get_llm_suggested_statements(captions) - |> filter_known_statements(video) - |> create_statements_from_inputs(video) - |> broadcast_statements(video) - - Process.sleep(500) - end) + case CF.Videos.download_captions(video) do + {:ok, video_caption} -> video_caption + _ -> nil + end + + video_caption -> + video_caption end end diff --git a/apps/cf/lib/llms/templates/statements_extractor_user_prompt.eex b/apps/cf/lib/llms/templates/statements_extractor_user_prompt.eex index 466c9848..4efc3c69 100644 --- a/apps/cf/lib/llms/templates/statements_extractor_user_prompt.eex +++ b/apps/cf/lib/llms/templates/statements_extractor_user_prompt.eex @@ -4,8 +4,8 @@ "title": "<%= video.id %>" }, "captions": <%= captions |> Enum.map(fn caption -> %{ - "start": floor(caption["start"]), - "text": String.trim(caption["text"]) + start: floor(caption["start"]), + text: String.trim(caption["text"]) } end) |> Jason.encode! %> } ``` diff --git a/apps/cf/lib/videos/videos.ex b/apps/cf/lib/videos/videos.ex index 05b26302..e975a42f 100644 --- a/apps/cf/lib/videos/videos.ex +++ b/apps/cf/lib/videos/videos.ex @@ -166,10 +166,7 @@ defmodule CF.Videos do @doc """ Download and store captions for a video. - Returns captions if success or {:error, reason} if something bad happend. - - Usage: - iex> download_captions(video) + Returns captions if success or {:error, reason} if something bad happened. """ def download_captions(video = %Video{}) do # Try to fetch new captions @@ -181,6 +178,19 @@ defmodule CF.Videos do captions_base |> VideoCaption.changeset(Map.merge(captions, %{video_id: video.id})) |> Repo.insert_or_update() + |> case do + # The Atoms become strings when saving/loading from the DB, let's make things consistent + {:error, changeset} -> + {:error, changeset} + + {:ok, _video_caption} -> + video + |> get_existing_captions() + |> case do + nil -> {:error, :not_found} + existing -> {:ok, existing} + end + end # If no Youtube caption found, insert a dummy entry in DB to prevent retrying for 30 days {:error, :not_found} -> @@ -194,16 +204,13 @@ defmodule CF.Videos do end {:error, :not_found} - - result -> - result end end defp get_existing_captions(video) do VideoCaption |> where([vc], vc.video_id == ^video.id) - |> order_by(desc: :inserted_at) + |> order_by(desc: :updated_at) |> limit(1) |> Repo.one() end diff --git a/apps/cf_jobs/lib/jobs/download_captions.ex b/apps/cf_jobs/lib/jobs/download_captions.ex index d955ef33..c5d0214d 100644 --- a/apps/cf_jobs/lib/jobs/download_captions.ex +++ b/apps/cf_jobs/lib/jobs/download_captions.ex @@ -5,13 +5,10 @@ defmodule CF.Jobs.DownloadCaptions do import Ecto.Query alias DB.Repo - alias DB.Schema.UserAction alias DB.Schema.Video alias DB.Schema.VideoCaption alias DB.Schema.UsersActionsReport - alias CF.Jobs.ReportManager - @name :download_captions @analyser_id UsersActionsReport.analyser_id(@name) @@ -55,7 +52,7 @@ defmodule CF.Jobs.DownloadCaptions do on: captions.video_id == v.id, where: is_nil(captions.id) or - captions.inserted_at < ^DateTime.add(DateTime.utc_now(), -30 * 24 * 60 * 60, :second), + captions.updated_at < ^DateTime.add(DateTime.utc_now(), -30 * 24 * 60 * 60, :second), group_by: v.id, order_by: [desc: v.inserted_at] )