Skip to content

Commit

Permalink
Merge pull request #478 from CaptainFact/staging
Browse files Browse the repository at this point in the history
Release
  • Loading branch information
Betree authored Sep 16, 2024
2 parents ff39336 + 7b6e999 commit 94c5fab
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 36 deletions.
53 changes: 31 additions & 22 deletions apps/cf/lib/llms/statements_creator.ex
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,38 @@ defmodule CF.LLMs.StatementsCreator do
Create statements from a video that has captions using LLMs
"""
def process_video!(video_id) do
DB.Schema.Video
|> join(:inner, [v], vc in DB.Schema.VideoCaption, on: v.id == vc.video_id)
|> where([v, vc], v.id == ^video_id)
|> order_by([v, vc], desc: vc.inserted_at)
|> limit(1)
|> select([v, vc], {v, vc})
|> DB.Repo.one()
|> case do
video = DB.Repo.get(DB.Schema.Video, video_id)
video_caption = fetch_or_download_captions(video)

if video_caption != nil do
video_caption.parsed
|> chunk_captions()
|> Enum.map(fn captions ->
video
|> get_llm_suggested_statements(captions)
|> filter_known_statements(video)
|> create_statements_from_inputs(video)
|> broadcast_statements(video)

Process.sleep(500)
end)
end
end

defp fetch_or_download_captions(video) do
case DB.Schema.VideoCaption
|> where([vc], vc.video_id == ^video.id)
|> order_by(desc: :inserted_at)
|> limit(1)
|> DB.Repo.one() do
nil ->
raise "Video or captions not found"

{video, video_caption} ->
video_caption.parsed
|> chunk_captions()
|> Enum.map(fn captions ->
video
|> get_llm_suggested_statements(captions)
|> filter_known_statements(video)
|> create_statements_from_inputs(video)
|> broadcast_statements(video)

Process.sleep(500)
end)
case CF.Videos.download_captions(video) do
{:ok, video_caption} -> video_caption
_ -> nil
end

video_caption ->
video_caption
end
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
"title": "<%= video.id %>"
},
"captions": <%= captions |> Enum.map(fn caption -> %{
"start": floor(caption["start"]),
"text": String.trim(caption["text"])
start: floor(caption["start"]),
text: String.trim(caption["text"])
} end) |> Jason.encode! %>
}
```
23 changes: 15 additions & 8 deletions apps/cf/lib/videos/videos.ex
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,7 @@ defmodule CF.Videos do

@doc """
Download and store captions for a video.
Returns captions if success or {:error, reason} if something bad happend.
Usage:
iex> download_captions(video)
Returns captions if success or {:error, reason} if something bad happened.
"""
def download_captions(video = %Video{}) do
# Try to fetch new captions
Expand All @@ -181,6 +178,19 @@ defmodule CF.Videos do
captions_base
|> VideoCaption.changeset(Map.merge(captions, %{video_id: video.id}))
|> Repo.insert_or_update()
|> case do
# The Atoms become strings when saving/loading from the DB, let's make things consistent
{:error, changeset} ->
{:error, changeset}

{:ok, _video_caption} ->
video
|> get_existing_captions()
|> case do
nil -> {:error, :not_found}
existing -> {:ok, existing}
end
end

# If no Youtube caption found, insert a dummy entry in DB to prevent retrying for 30 days
{:error, :not_found} ->
Expand All @@ -194,16 +204,13 @@ defmodule CF.Videos do
end

{:error, :not_found}

result ->
result
end
end

defp get_existing_captions(video) do
VideoCaption
|> where([vc], vc.video_id == ^video.id)
|> order_by(desc: :inserted_at)
|> order_by(desc: :updated_at)
|> limit(1)
|> Repo.one()
end
Expand Down
5 changes: 1 addition & 4 deletions apps/cf_jobs/lib/jobs/download_captions.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,10 @@ defmodule CF.Jobs.DownloadCaptions do
import Ecto.Query

alias DB.Repo
alias DB.Schema.UserAction
alias DB.Schema.Video
alias DB.Schema.VideoCaption
alias DB.Schema.UsersActionsReport

alias CF.Jobs.ReportManager

@name :download_captions
@analyser_id UsersActionsReport.analyser_id(@name)

Expand Down Expand Up @@ -55,7 +52,7 @@ defmodule CF.Jobs.DownloadCaptions do
on: captions.video_id == v.id,
where:
is_nil(captions.id) or
captions.inserted_at < ^DateTime.add(DateTime.utc_now(), -30 * 24 * 60 * 60, :second),
captions.updated_at < ^DateTime.add(DateTime.utc_now(), -30 * 24 * 60 * 60, :second),
group_by: v.id,
order_by: [desc: v.inserted_at]
)
Expand Down

0 comments on commit 94c5fab

Please sign in to comment.