Skip to content

Commit

Permalink
attachextract: add options to handle timeout and idle connections
Browse files Browse the repository at this point in the history
Signed-off-by: Robert Stepanek <[email protected]>
  • Loading branch information
rsto committed Apr 17, 2023
1 parent 206184a commit 72ae24b
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 20 deletions.
52 changes: 52 additions & 0 deletions cassandane/Cassandane/Cyrus/SearchFuzzy.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2169,4 +2169,56 @@ sub test_squatter_attachextract_nolock_slow
$self->assert($released_timestamp lt $extractor_timestamp lt $reacquired_timestamp);
}

sub test_squatter_attachextract_timeout_slow
:min_version_3_9 :needs_search_xapian :SearchAttachmentExtractor :NoCheckSyslog
{
my ($self) = @_;
my $instance = $self->{instance};
my $imap = $self->{store}->get_client();

my $tracedir = tempdir (DIR => $instance->{basedir} . "/tmp");

# SearchAttachmentExtractor magic configures Cyrus to
# wait at most 3 seconds for a response from extractor

$self->start_echo_extractor(
tracedir => $tracedir,
response_delay_seconds => 4,
);

xlog $self, "Make message with attachment";
$self->make_message("msg2",
mime_type => "multipart/related",
mime_boundary => "123456789abcdef",
body => ""
."\r\n--123456789abcdef\r\n"
."Content-Type: text/plain\r\n"
."\r\n"
."bodyterm"
."\r\n--123456789abcdef\r\n"
."Content-Type: application/pdf\r\n"
."\r\n"
."attachterm"
."\r\n--123456789abcdef--\r\n");

xlog $self, "Clear syslog";
$self->{instance}->getsyslog();

xlog $self, "Run squatter (allowing partials)";
$self->{instance}->run_command({cyrus => 1}, 'squatter', '-v', '-p');

xlog "Assert text body is indexed";
my $uids = $imap->search('fuzzy', 'body', 'bodyterm');
$self->assert_deep_equals([1], $uids);

xlog "Assert attachement isnot indexed";
$uids = $imap->search('fuzzy', 'xattachmentbody', 'attachterm');
$self->assert_deep_equals([], $uids);

xlog "Assert extractor got only called once";
my @tracefiles = glob($tracedir."/*");
$self->assert_num_equals(1, scalar @tracefiles);
$self->assert_matches(qr/req1_GET_/, $tracefiles[0]);
}

1;
8 changes: 5 additions & 3 deletions cassandane/Cassandane/Cyrus/TestCase.pm
Original file line number Diff line number Diff line change
Expand Up @@ -369,12 +369,14 @@ magic(JMAPExtensions => sub {
});
magic(SearchAttachmentExtractor => sub {
my $port = Cassandane::PortManager::alloc("localhost");
shift->config_set('search_attachment_extractor_url' =>
my $self = shift;
$self->config_set('search_attachment_extractor_url' =>
"http://localhost:$port/extractor");
$self->config_set('search_attachment_extractor_request_timeout' => '3s');
$self->config_set('search_attachment_extractor_idle_timeout' => '3s');
});
magic(SearchLanguage => sub {
my $self = shift;
$self->config_set('search_index_language' => 'yes');
shift->config_set('search_index_language' => 'yes');
});
magic(SieveUTF8Fileinto => sub {
shift->config_set('sieve_utf8fileinto' => 'yes');
Expand Down
20 changes: 20 additions & 0 deletions changes/next/squatter_nolock_attachment_text_extraction
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Description:

Updates squatter to not lock a mailbox while extracting text
from attachments.


Config changes:

search_attachment_extractor_request_timeout
search_attachment_extractor_idle_timeout


Upgrade instructions:

None.


GitHub issue:

None.
51 changes: 34 additions & 17 deletions imap/attachextract.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ struct extractor_ctx {

static char *attachextract_cachedir = NULL;
static int attachextract_cacheonly = 0;
static unsigned attachextract_idle_timeout = 5 * 60;
static unsigned attachextract_request_timeout = 5 * 60;

static struct extractor_ctx *global_extractor = NULL;

Expand All @@ -91,23 +93,20 @@ static void extractor_disconnect(struct extractor_ctx *ext)
}

static struct prot_waitevent *
extractor_timeout(struct protstream *s __attribute__((unused)),
struct prot_waitevent *ev __attribute__((unused)),
void *rock)
extractor_idle_timeout_cb(struct protstream *s __attribute__((unused)),
struct prot_waitevent *ev __attribute__((unused)),
void *rock)
{
struct extractor_ctx *ext = rock;

syslog(LOG_DEBUG, "extractor_timeout(%p)", ext);
syslog(LOG_DEBUG, "extractor_idle_timeout(%p)", ext);

/* too long since we last used the extractor - disconnect */
extractor_disconnect(ext);

return NULL;
}


#define IDLE_TIMEOUT (5 * 60) /* 5 min */

static int login(struct backend *s __attribute__((unused)),
const char *userid __attribute__((unused)),
sasl_callback_t *cb __attribute__((unused)),
Expand Down Expand Up @@ -142,7 +141,9 @@ static int extractor_connect(struct extractor_ctx *ext)
be = ext->be;
if (be && be->sock != -1) {
// extend the timeout
if (be->timeout) be->timeout->mark = now + IDLE_TIMEOUT;
if (be->timeout) {
be->timeout->mark = now + attachextract_idle_timeout;
}
return 0;
}

Expand All @@ -157,12 +158,14 @@ static int extractor_connect(struct extractor_ctx *ext)
return IMAP_IOERROR;
}

// set request timeout
prot_settimeout(be->in, attachextract_request_timeout);

if (ext->clientin) {
/* add a default timeout */
/* set idle timeout */
be->clientin = ext->clientin;
be->timeout = prot_addwaitevent(ext->clientin,
now + IDLE_TIMEOUT,
extractor_timeout, ext);
now + attachextract_idle_timeout, extractor_idle_timeout_cb, ext);
}

return 0;
Expand Down Expand Up @@ -281,6 +284,8 @@ EXPORTED int attachextract_extract(const struct attachextract_record *axrec,
hostlen = strcspn(ext->hostname, "/");
guidstr = message_guid_encode(&axrec->guid);

prot_settimeout(be->in, attachextract_idle_timeout);

/* try to fetch previously extracted text */
unsigned statuscode = 0;
prot_printf(be->out,
Expand All @@ -294,7 +299,7 @@ EXPORTED int attachextract_extract(const struct attachextract_record *axrec,
"\r\n",
ext->path, guidstr, HTTP_VERSION,
(int) hostlen, be->hostname, CYRUS_VERSION,
IDLE_TIMEOUT, config_search_maxsize);
attachextract_idle_timeout, config_search_maxsize);
prot_flush(be->out);

/* Read GET response */
Expand Down Expand Up @@ -356,7 +361,7 @@ EXPORTED int attachextract_extract(const struct attachextract_record *axrec,
"X-Truncate-Length: " SIZE_T_FMT "\r\n"
"\r\n",
ext->path, guidstr, HTTP_VERSION,
(int) hostlen, be->hostname, CYRUS_VERSION, IDLE_TIMEOUT,
(int) hostlen, be->hostname, CYRUS_VERSION, attachextract_idle_timeout,
buf_cstring(&ctypehdr), buf_len(data), config_search_maxsize);
prot_putbuf(be->out, data);
prot_flush(be->out);
Expand All @@ -382,13 +387,14 @@ EXPORTED int attachextract_extract(const struct attachextract_record *axrec,
goto gotdata;
}

if (statuscode >= 400 && statuscode <= 499) {
if ((statuscode >= 400 && statuscode <= 499) || statuscode == 599) {
/* indexer can't extract this for some reason, never try again */
goto done;
}

/* any other status code is an error */
syslog(LOG_ERR, "extract GOT STATUSCODE %d with timeout %d: %s", statuscode, IDLE_TIMEOUT, errstr);
syslog(LOG_ERR, "extract GOT STATUSCODE %d with timeout %d: %s",
statuscode, attachextract_request_timeout, errstr);
}

// dropped out of the loop? Then we failed!
Expand Down Expand Up @@ -446,12 +452,23 @@ EXPORTED int attachextract_extract(const struct attachextract_record *axrec,

EXPORTED void attachextract_init(struct protstream *clientin)
{
syslog(LOG_DEBUG, "extractor_init(%p)", clientin);

/* Read config */
attachextract_idle_timeout =
config_getduration(IMAPOPT_SEARCH_ATTACHMENT_EXTRACTOR_IDLE_TIMEOUT, 's');

attachextract_request_timeout =
config_getduration(IMAPOPT_SEARCH_ATTACHMENT_EXTRACTOR_REQUEST_TIMEOUT, 's');

if (attachextract_idle_timeout < attachextract_request_timeout)
attachextract_idle_timeout = attachextract_request_timeout;

const char *exturl =
config_getstring(IMAPOPT_SEARCH_ATTACHMENT_EXTRACTOR_URL);
if (!exturl) return;

syslog(LOG_DEBUG, "extractor_init(%p)", clientin);

/* Initialize extractor URL */
char scheme[6], server[100], path[256], *p;
unsigned https, port;

Expand Down
13 changes: 13 additions & 0 deletions lib/imapoptions
Original file line number Diff line number Diff line change
Expand Up @@ -2356,7 +2356,20 @@ If all partitions are over that limit, this feature is not used anymore.
to the extractor.
Xapian only.
*/
{ "search_attachment_extractor_request_timeout", "5m", DURATION, "3.9.0" }
/* Defines the duration after which to cancel non-responding
requests to the search attachment extractor service.

If no unit is specified, seconds is assumed. */
*/
{ "search_attachment_extractor_idle_timeout", "5m", DURATION, "3.9.0" }
/* Defines the duration after which to close unused connections to
the search attachment extractor service. If the idle timeout is
less than search_attachment_extractor_request_timeout, then it
is ignored and request timeout used instead.

If no unit is specified, seconds is assumed. */
*/
{ "search_index_language", 0, SWITCH, "3.3.1" }
/*
If enabled, then messages bodies are stemmed by detected language
Expand Down

0 comments on commit 72ae24b

Please sign in to comment.