Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(llmobs): submit langchain similarity search spans #9971

Merged
merged 64 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
5a52bc7
Add llmobs set tags to tracer span
yahya-mouman Jul 18, 2024
66e14b2
Add parsing for similarity search tracing in LLMObs
yahya-mouman Jul 18, 2024
f124905
add import
yahya-mouman Jul 18, 2024
2e7361e
remove argument names
yahya-mouman Jul 28, 2024
9d73ce3
distinguish is_workflow case
yahya-mouman Jul 28, 2024
b60f91e
Add review note
yahya-mouman Jul 28, 2024
bd909a1
Add similarity search test
yahya-mouman Jul 28, 2024
8d4f767
Add pinecone fixture
yahya-mouman Jul 28, 2024
bed9371
refactor code using predefined api key
yahya-mouman Jul 28, 2024
d3ca383
Add langchain-pinecone to fixtures
yahya-mouman Jul 28, 2024
c5e6d8e
Use langchain-pinecone
yahya-mouman Jul 28, 2024
c890eb5
Merge branch 'main' into yahya/add-langchain-similarity-search-support
yahya-mouman Jul 29, 2024
6ff7689
added release notes
yahya-mouman Jul 29, 2024
290e369
Merge remote-tracking branch 'origin/yahya/add-langchain-similarity-s…
yahya-mouman Jul 29, 2024
10e6fe4
Update ddtrace/llmobs/_integrations/langchain.py
yahya-mouman Jul 29, 2024
50786bb
fix issues
yahya-mouman Jul 29, 2024
c8964a6
Merge remote-tracking branch 'origin/yahya/add-langchain-similarity-s…
yahya-mouman Jul 29, 2024
0e7dac0
remove fake api key
yahya-mouman Jul 29, 2024
e78c7fd
format
yahya-mouman Jul 29, 2024
8cdeb9c
Update ddtrace/llmobs/_integrations/langchain.py
yahya-mouman Jul 31, 2024
0f6dbc8
Update ddtrace/llmobs/_integrations/langchain.py
yahya-mouman Jul 31, 2024
efe50a4
Update ddtrace/llmobs/_integrations/langchain.py
yahya-mouman Aug 1, 2024
7997447
Update releasenotes/notes/feat-llmobs-submit-langchain-similarity-sea…
yahya-mouman Aug 1, 2024
61ffd20
address comments
yahya-mouman Aug 1, 2024
9ce8e2c
add test in community test suite
yahya-mouman Aug 1, 2024
b574d69
Merge branch 'refs/heads/main' into yahya/add-langchain-similarity-se…
yahya-mouman Aug 1, 2024
2effb9c
fix indentation and lint
yahya-mouman Aug 1, 2024
c1afca0
add pinecone client
yahya-mouman Aug 1, 2024
0b20def
add retrieval to supported operations
yahya-mouman Aug 1, 2024
d256c32
Use pinecone client for langchain < 0.1
yahya-mouman Aug 1, 2024
6e2407e
refactor
yahya-mouman Aug 1, 2024
6e38858
black
yahya-mouman Aug 1, 2024
5a1162a
format spell
yahya-mouman Aug 2, 2024
3027ba5
add support for retrieval expected span event
yahya-mouman Aug 8, 2024
ae21a4d
remove pinecone fixture
yahya-mouman Aug 8, 2024
b72142b
add pinecone version check
yahya-mouman Aug 8, 2024
7de55ce
Update assertions
yahya-mouman Aug 8, 2024
9d4ff9c
format
yahya-mouman Aug 8, 2024
681d2db
initialize id with empty string if not provided
yahya-mouman Aug 9, 2024
4e5ec20
Mock output documents to avoid writing long text in code
yahya-mouman Aug 9, 2024
5e04400
Remove model provider on retrieval spans
yahya-mouman Aug 9, 2024
8283ed1
refactor code
yahya-mouman Aug 9, 2024
fb55291
format code
yahya-mouman Aug 9, 2024
9a670a7
Merge branch 'refs/heads/main' into yahya/add-langchain-similarity-se…
yahya-mouman Aug 20, 2024
aafa540
Resolve conflict
yahya-mouman Aug 20, 2024
3b66854
submit to llmobs
yahya-mouman Aug 20, 2024
56492fb
Reformat span output
yahya-mouman Aug 21, 2024
fc02550
Update base test
yahya-mouman Aug 21, 2024
bba5848
Added cassette for community test
yahya-mouman Aug 21, 2024
ead542a
Fix typo
yahya-mouman Aug 21, 2024
f63c780
Fix typo
yahya-mouman Aug 21, 2024
b2b3cf5
Fix typo and add id and name
yahya-mouman Aug 21, 2024
38dd6f5
remove break
yahya-mouman Aug 21, 2024
fc10611
refactor tests
yahya-mouman Aug 21, 2024
21d6981
rufff
yahya-mouman Aug 21, 2024
ca96d2f
patch langchain_community similarity search
yahya-mouman Aug 22, 2024
21266dd
black
yahya-mouman Aug 22, 2024
7bffb9b
Discard redundant langchain_community patch
yahya-mouman Aug 22, 2024
2337bc2
Merge branch 'main' into yahya/add-langchain-similarity-search-support
yahya-mouman Aug 22, 2024
bc307e9
add get len safe embedding
yahya-mouman Aug 22, 2024
039aeb8
Merge remote-tracking branch 'origin/yahya/add-langchain-similarity-s…
yahya-mouman Aug 22, 2024
5a6613e
add get len safe embedding
yahya-mouman Aug 26, 2024
172f038
Address PR comments
yahya-mouman Aug 27, 2024
600a8e1
Remove try catches and replace them with default values
yahya-mouman Aug 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions ddtrace/contrib/langchain/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,14 @@ def traced_similarity_search(langchain, pin, func, instance, args, kwargs):
integration.metric(span, "incr", "request.error", 1)
raise
finally:
if integration.is_pc_sampled_llmobs(span):
yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved
integration.llmobs_set_tags(
"retrieval",
span,
query,
documents,
error=bool(span.error),
)
span.finish()
integration.metric(span, "dist", "request.duration", span.duration_ns)
if integration.is_pc_sampled_log(span):
Expand Down
54 changes: 52 additions & 2 deletions ddtrace/llmobs/_integrations/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,17 @@
from ddtrace.llmobs._constants import METRICS
from ddtrace.llmobs._constants import MODEL_NAME
from ddtrace.llmobs._constants import MODEL_PROVIDER
from ddtrace.llmobs._constants import OUTPUT_DOCUMENTS
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
from ddtrace.llmobs._constants import OUTPUT_VALUE
from ddtrace.llmobs._constants import SPAN_KIND

from ..utils import Document
from .base import BaseLLMIntegration


log = get_logger(__name__)


API_KEY = "langchain.request.api_key"
MODEL = "langchain.request.model"
PROVIDER = "langchain.request.provider"
Expand All @@ -48,7 +49,7 @@ class LangChainIntegration(BaseLLMIntegration):

def llmobs_set_tags(
self,
operation: str, # oneof "llm","chat","chain"
operation: str, # oneof "llm","chat","chain","retrieval"
span: Span,
inputs: Any,
response: Any = None,
Expand Down Expand Up @@ -79,6 +80,8 @@ def llmobs_set_tags(
self._llmobs_set_meta_tags_from_chat_model(span, inputs, response, error, is_workflow=is_workflow)
elif operation == "chain":
self._llmobs_set_meta_tags_from_chain(span, inputs, response, error)
elif operation == "retrieval":
self._llmobs_set_meta_tags_from_similarity_search(span, inputs, response, error, is_workflow=is_workflow)
span.set_tag_str(METRICS, json.dumps({}))

def _llmobs_set_metadata(self, span: Span, model_provider: Optional[str] = None) -> None:
Expand Down Expand Up @@ -194,6 +197,53 @@ def _llmobs_set_meta_tags_from_chain(
except TypeError:
log.warning("Failed to serialize chain output data to JSON")

def _llmobs_set_meta_tags_from_similarity_search(
self,
span: Span,
input_query: str,
output_documents: Union[List[Any], None],
error: bool = False,
is_workflow: bool = False,
) -> None:
span.set_tag_str(SPAN_KIND, "workflow" if is_workflow else "retrieval")
span.set_tag_str(MODEL_NAME, span.get_tag(MODEL) or "")
span.set_tag_str(MODEL_PROVIDER, span.get_tag(PROVIDER) or "")

input_tag_key = INPUT_VALUE if is_workflow else INPUT_MESSAGES
yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved
output_tag_key = OUTPUT_VALUE if is_workflow else OUTPUT_DOCUMENTS

if input_query is not None:
try:
formatted_inputs = self.format_io(input_query)
if isinstance(formatted_inputs, str):
span.set_tag_str(input_tag_key, formatted_inputs)
else:
span.set_tag_str(input_tag_key, json.dumps(self.format_io(input_query)))
yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved
except TypeError:
log.warning("Failed to serialize similarity input query to JSON")
yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved
if error:
span.set_tag_str(output_tag_key, "")
elif isinstance(output_documents, list):
if is_workflow:
span.set_tag_str(output_tag_key, json.dumps(output_documents))
else:
try:
documents = [
Document(
id=d.id,
yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved
text=d.page_content,
yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved
name=d.metadata["source"],
Yun-Kim marked this conversation as resolved.
Show resolved Hide resolved
)
for d in output_documents
]
except AttributeError:
log.warning("Failed to extract document information from similarity output")
yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved
documents = [Document(text=d.page_content) for d in output_documents]
try:
span.set_tag_str(output_tag_key, json.dumps(self.format_io(documents)))
except TypeError:
log.warning("Failed to serialize similarity output documents to JSON")

def _set_base_span_tags( # type: ignore[override]
self,
span: Span,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
features:
- |
LLM Observability: The Langchain integration now submits similarity search spans to LLM Observability.
yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved
16 changes: 16 additions & 0 deletions tests/contrib/langchain/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,19 @@ def langchain_anthropic(ddtrace_config_langchain, mock_logs, mock_metrics, langc
yield langchain_anthropic
except ImportError:
yield


@pytest.fixture
def langchain_pinecone(ddtrace_config_langchain, mock_logs, mock_metrics, langchain):
with override_env(
dict(
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY", "<not-a-real-key>"),
PINECONE_ENV=os.getenv("PINECONE_ENV", "<not-a-real-env>"),
)
):
try:
import langchain_pinecone

yield langchain_pinecone
except ImportError:
yield
43 changes: 43 additions & 0 deletions tests/contrib/langchain/test_langchain_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,14 @@ def _invoke_chain(cls, chain, prompt, mock_tracer, cassette_name, batch=False):
LLMObs.disable()
return mock_tracer.pop_traces()[0]

@classmethod
def _similarity_search(cls, vector_db, prompt, k, mock_tracer, cassette_name):
LLMObs.enable(ml_app=cls.ml_app, integrations_enabled=False, _tracer=mock_tracer)
with get_request_vcr(subdirectory_name=cls.cassette_subdirectory_name).use_cassette(cassette_name):
vector_db.similarity_search(prompt, k)
yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved
LLMObs.disable()
return mock_tracer.pop_traces()[0][0]


@pytest.mark.skipif(not PATCH_LANGCHAIN_V0, reason="These tests are for langchain < 0.1.0")
class TestLLMObsLangchain(BaseTestLLMObsLangchain):
Expand Down Expand Up @@ -324,6 +332,41 @@ def test_llmobs_chain_schema_io(self, langchain, mock_llmobs_span_writer, mock_t
)
_assert_expected_llmobs_llm_span(trace[1], mock_llmobs_span_writer, mock_io=True)

@pytest.mark.skipif(sys.version_info < (3, 10, 0), reason="Requires unnecessary cassette file for Python 3.9")
yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved
def test_llmobs_similarity_search(self, langchain, langchain_pinecone, mock_llmobs_span_writer, mock_tracer):
if langchain_pinecone is None:
pytest.skip("langchain_pinecone not installed which is required for this test.")
embedding = langchain.embeddings.OpenAIEmbeddings(model="text-embedding-ada-002")
index_name = "langchain-retrieval"
namespace = "langchain-retrieval"
vectorstore = langchain_pinecone.PineconeVectorStore(
index_name=index_name,
embedding=embedding,
namespace=namespace,
)
trace = self._similarity_search(
vectorstore, "Who was Alan Turing?", 1, mock_tracer, "openai_pinecone_similarity_search.yaml"
)
assert mock_llmobs_span_writer.enqueue.call_count == 1
span = trace[0] if isinstance(trace, list) else trace
mock_llmobs_span_writer.enqueue.assert_called_with(
_expected_llmobs_non_llm_span_event(
span,
span_kind="similarity_search",
input_value="Who was Alan Turing?",
output_value=[
{
"id": 13,
"title": "Alan Turing",
"text": "A brilliant mathematician and cryptographer Alan was to become the founder of "
"modern-day computer science and artificial intelli...",
}
],
tags={"ml_app": ""},
yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved
integration="langchain",
)
)

yahya-mouman marked this conversation as resolved.
Show resolved Hide resolved

@flaky(1735812000, reason="Community cassette tests are flaky")
@pytest.mark.skipif(PATCH_LANGCHAIN_V0, reason="These tests are for langchain >= 0.1.0")
Expand Down
Loading