diff --git a/poetry.lock b/poetry.lock index 6918f4c..5ecb985 100644 --- a/poetry.lock +++ b/poetry.lock @@ -341,6 +341,21 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "dataclasses-json" +version = "0.6.7" +description = "Easily serialize dataclasses to and from JSON." +optional = false +python-versions = "<4.0,>=3.7" +files = [ + {file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"}, + {file = "dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + [[package]] name = "distro" version = "1.9.0" @@ -742,8 +757,8 @@ langchain-core = ">=0.2.35,<0.3.0" langchain-text-splitters = ">=0.2.0,<0.3.0" langsmith = ">=0.1.17,<0.2.0" numpy = [ - {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""}, {version = ">=1,<2", markers = "python_version < \"3.12\""}, + {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""}, ] pydantic = ">=1,<3" PyYAML = ">=5.3" @@ -751,6 +766,32 @@ requests = ">=2,<3" SQLAlchemy = ">=1.4,<3" tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" +[[package]] +name = "langchain-community" +version = "0.2.15" +description = "Community contributed LangChain integrations." +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "langchain_community-0.2.15-py3-none-any.whl", hash = "sha256:edcf8e9829559822a044a193b66cbea40600b8b9ce9b435fa0ae96f69377be46"}, + {file = "langchain_community-0.2.15.tar.gz", hash = "sha256:5bd5a6d055b07ee228eb6a9f0ca59d8124a1d450e4c82a6a62cec3add3cb73a9"}, +] + +[package.dependencies] +aiohttp = ">=3.8.3,<4.0.0" +dataclasses-json = ">=0.5.7,<0.7" +langchain = ">=0.2.15,<0.3.0" +langchain-core = ">=0.2.37,<0.3.0" +langsmith = ">=0.1.0,<0.2.0" +numpy = [ + {version = ">=1,<2", markers = "python_version < \"3.12\""}, + {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""}, +] +PyYAML = ">=5.3" +requests = ">=2,<3" +SQLAlchemy = ">=1.4,<3" +tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" + [[package]] name = "langchain-core" version = "0.2.37" @@ -767,8 +808,8 @@ jsonpatch = ">=1.33,<2.0" langsmith = ">=0.1.75,<0.2.0" packaging = ">=23.2,<25" pydantic = [ - {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""}, {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""}, + {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""}, ] PyYAML = ">=5.3" tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" @@ -819,8 +860,8 @@ files = [ httpx = ">=0.23.0,<1" orjson = ">=3.9.14,<4.0.0" pydantic = [ - {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""}, {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""}, + {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""}, ] requests = ">=2,<3" @@ -1047,6 +1088,25 @@ files = [ {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, ] +[[package]] +name = "marshmallow" +version = "3.22.0" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = false +python-versions = ">=3.8" +files = [ + {file = "marshmallow-3.22.0-py3-none-any.whl", hash = "sha256:71a2dce49ef901c3f97ed296ae5051135fd3febd2bf43afe0ae9a82143a494d9"}, + {file = "marshmallow-3.22.0.tar.gz", hash = "sha256:4972f529104a220bb8637d595aa4c9762afbe7f7a77d82dc58c1615d70c5823e"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["marshmallow[tests]", "pre-commit (>=3.5,<4.0)", "tox"] +docs = ["alabaster (==1.0.0)", "autodocsumm (==0.2.13)", "sphinx (==8.0.2)", "sphinx-issues (==4.1.0)", "sphinx-version-warning (==1.1.2)"] +tests = ["pytest", "pytz", "simplejson"] + [[package]] name = "multidict" version = "6.0.5" @@ -1146,6 +1206,17 @@ files = [ {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, ] +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + [[package]] name = "numpy" version = "1.26.4" @@ -1307,8 +1378,8 @@ files = [ annotated-types = ">=0.4.0" pydantic-core = "2.20.1" typing-extensions = [ - {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, {version = ">=4.6.1", markers = "python_version < \"3.13\""}, + {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, ] [package.extras] @@ -1818,6 +1889,21 @@ files = [ {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +optional = false +python-versions = "*" +files = [ + {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, + {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + [[package]] name = "urllib3" version = "2.2.2" @@ -1952,4 +2038,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "d5bdf9415cce9ae61d6a690f5dd7e912dc05bdb8221125ffc424634c7b114411" +content-hash = "c2bf9cc9f26dcf150a840ec6529c4e2211a2faed5ad0321ab51c6cae6a5365da" diff --git a/pyproject.toml b/pyproject.toml index 14d0957..7de6af5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ langchain = "^0.2.15" html5lib = "^1.1" pydantic = "^2.5.0" langchain-openai = "^0.1.23" +langchain-community = "^0.2.15" [build-system] diff --git a/sg_law_cookies/main.py b/sg_law_cookies/main.py index b99f04e..3adbccd 100644 --- a/sg_law_cookies/main.py +++ b/sg_law_cookies/main.py @@ -3,13 +3,13 @@ import requests from bs4 import BeautifulSoup from jinja2 import Environment, PackageLoader -from langchain.chat_models import ChatOpenAI from langchain.prompts import ( SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, AIMessagePromptTemplate, ) +from langchain_openai import ChatOpenAI from pydantic import BaseModel, AnyHttpUrl system_template = """As an AI expert in legal affairs, your task is to provide concise, yet comprehensive @@ -25,7 +25,7 @@ 2. Outline the main legal aspects, implications, and precedents highlighted in the article. 3. End the summary with a succinct conclusion or takeaway. -Aim for summaries to be no more than five sentences, but ensure they efficiently deliver the key legal insights, +The summaries should not be longer than 100 words, but ensure they efficiently deliver the key legal insights, making them beneficial for quick comprehension. The end goal is to help the lawyers understand the crux of the articles without having to read them in their entirety.""" @@ -142,8 +142,8 @@ def get_summary(article: ScrapedArticle) -> NewsArticle: messages = article_summary_prompt.format_prompt( article=article_content ).to_messages() - chat = ChatOpenAI(model_name="gpt-4o-mini") - summary_response = chat(messages) + chat = ChatOpenAI(model="gpt-4o-mini") + summary_response = chat.invoke(messages) return NewsArticle( category=article.category, title=article.title, @@ -193,9 +193,9 @@ def get_summaries(articles: list[ScrapedArticle]): day_messages = day_messages + day_summary_prompt.format_messages() - chat = ChatOpenAI(model_name="gpt-4", temperature=0.25) + chat = ChatOpenAI(model="gpt-4o", temperature=0.25) - day_summary = chat(day_messages) + day_summary = chat.invoke(day_messages) return summaries, day_summary.content.splitlines()