Skip to content

Commit

Permalink
Base moodle's file_exist method on the file content instead of headers
Browse files Browse the repository at this point in the history
In moodle we can't make a API call to check if a file exists and we have
to rely on the URL where file will be downloaded.

When a problem occurs we get a 200 response with  a JSON document
with the reason of the error.

We don't want to always download the document as it will actually
download the full PDF file for success cases, the most common case.

Until now we been relying on the headers of a HEAD HTTP request,
interpreting JSON responses as the file being missing.

We have found at least one school that doesn't include the content-type
header on the response so he are switch the approach to inspect the
first bytes of the response and check if we are getting a PDF back from
Moodle.
  • Loading branch information
marcospri committed Nov 5, 2024
1 parent 0765b53 commit 5ecbd43
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 13 deletions.
15 changes: 8 additions & 7 deletions lms/services/moodle.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,15 @@ def list_files(self, course_id: int):
def file_exists(self, file_id) -> bool:
"""Check if the file exists in the course."""
# Moodle file IDs are URLs, but they need the token to be accessible
response = self._http.request("HEAD", f"{file_id}&token={self.token}")
response = self._http.request(
"GET", f"{file_id}&token={self.token}", headers={"Range": "bytes=0-5"}
)
# Moodle API doesn't use status codes, we can't rely on that.
# We don't want to download the full file so we'll do a HEAD request and assume:
# - JSON response, it's an error response
# - Anything else, it's the file we are after

LOG.info("Headers from Moodle file check %s", response.headers)
return not response.headers["content-type"].startswith("application/json")
# We don't want to download the full file so we'll do a GET request for the first bytes
# and check if the files is indeed a PDF.
# Most error response end up being JSON but not all of them are, some are HTML.
LOG.info("Content from Moodle file check %s", response.text)
return "%PDF" in response.text

def page(self, course_id, page_id) -> dict | None:
url = self._api_url(Function.GET_PAGES)
Expand Down
14 changes: 8 additions & 6 deletions tests/unit/lms/services/moodle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,18 +150,20 @@ def test_list_files(self, svc, http_service, contents):
]

@pytest.mark.parametrize(
"header,expected",
"response,expected",
[
("application/json", False),
("application/pdf", True),
("some other content", False),
("%PDF-14", True),
],
)
def test_file_exists(self, svc, http_service, header, expected):
http_service.request.return_value = Mock(headers={"content-type": header})
def test_file_exists(self, svc, http_service, response, expected):
http_service.request.return_value = Mock(text=response)

assert svc.file_exists("URL") == expected

http_service.request.assert_called_once_with("HEAD", "URL&token=sentinel.token")
http_service.request.assert_called_once_with(
"GET", "URL&token=sentinel.token", headers={"Range": "bytes=0-5"}
)

def test_list_pages(self, svc, http_service, contents):
http_service.post.return_value.json.return_value = contents
Expand Down

0 comments on commit 5ecbd43

Please sign in to comment.