Skip to content

Commit

Permalink
feat: parse nanosecond from log (#1026)
Browse files Browse the repository at this point in the history
* feat: parse nanosecond in strptime

* benchmark

* fix timezone

* optimize code

* unittest for timezone

* fix
  • Loading branch information
Abingcbc authored and yyuuttaaoo committed Aug 20, 2023
1 parent 6cd261d commit 091241e
Show file tree
Hide file tree
Showing 30 changed files with 731 additions and 325 deletions.
68 changes: 57 additions & 11 deletions core/common/strptime.c → core/common/Strptime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@

#include <ctype.h>
#include <string.h>
#include <time.h>
#include <stdlib.h>
#include "Strptime.h"

namespace logtail {
/*
* We do not implement alternate representations. However, we always
* check whether a given modifier is allowed for a certain conversion.
Expand All @@ -45,9 +46,7 @@
}

static char gmt[] = {"GMT"};
#ifdef TM_ZONE
static char utc[] = {"UTC"};
#endif

/* RFC-822/RFC-2822 */
static const char* const nast[5] = {"EST", "CST", "MST", "PST", "\0\0\0"};
Expand All @@ -73,27 +72,44 @@ static const char* abmon[12] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
static const char* am_pm[2] = {"AM", "PM"};

static const unsigned char* conv_num(const unsigned char*, int*, unsigned int, unsigned int);
static const unsigned char* conv_nanosecond(const unsigned char*, long*, int*);
static const unsigned char* find_string(const unsigned char*, int*, const char* const*, const char* const*, int);


const char* strptime(const char* buf, const char* fmt, struct tm* tm) {
// Parse time string into two part: second and nanosecond
const char* strptime_ns(const char* buf, const char* fmt, struct tm* tm, long* nanosecond, int* nanosecondLength) {
// Replenish %s support.
if (0 == strcmp("%s", fmt)) {
char* cp;
long long n;
n = strtoll(buf, &cp, 10);
// Assert the length of second timestamp to be less than 10.
size_t bufLength = strlen(buf);
size_t secondTimestampLength = bufLength >= 10 ? 10 : bufLength;
for (size_t i = 0; i < bufLength - secondTimestampLength; ++i) {
n /= 10;
}
time_t t;
if ((long long)(t = n) != n)
if (n == 0 || (long long)(t = n) != n)
return NULL;
#ifdef _MSC_VER
if (localtime_s(tm, &t) != 0)
return NULL;
return cp;
#else
if (NULL == localtime_r(&t, tm))
return NULL;
#endif

*nanosecond = 0;
*nanosecondLength = 0;
conv_nanosecond((const unsigned char*)(buf + secondTimestampLength), nanosecond, nanosecondLength);
return ((const char*)cp);
}

unsigned char c;
const unsigned char *bp, *ep;
int alt_format, i, split_year = 0, neg = 0, offs;
const char* new_fmt;
*nanosecond = 0;

bp = (const unsigned char*)buf;

Expand Down Expand Up @@ -175,7 +191,7 @@ const char* strptime(const char* buf, const char* fmt, struct tm* tm) {
case 'x': /* The date, using the locale's format. */
new_fmt = "%m/%d/%y";
recurse:
bp = (const unsigned char*)strptime((const char*)bp, new_fmt, tm);
bp = (const unsigned char*)strptime_ns((const char*)bp, new_fmt, tm, nanosecond, nanosecondLength);
LEGAL_ALT(ALT_E);
continue;

Expand Down Expand Up @@ -213,6 +229,11 @@ const char* strptime(const char* buf, const char* fmt, struct tm* tm) {
LEGAL_ALT(ALT_O);
continue;

case 'f': /* Nanosecond */
bp = conv_nanosecond(bp, nanosecond, nanosecondLength);
LEGAL_ALT(ALT_O);
continue;

case 'k': /* The hour (24-hour clock representation). */
LEGAL_ALT(0);
/* FALLTHROUGH */
Expand Down Expand Up @@ -331,7 +352,7 @@ const char* strptime(const char* buf, const char* fmt, struct tm* tm) {
continue;

case 'Z':
if (strncmp((const char*)bp, gmt, 3) == 0) {
if (strncasecmp((const char*)bp, gmt, 3) == 0 || strncasecmp((const char *)bp, utc, 3) == 0) {
tm->tm_isdst = 0;
#ifdef TM_GMTOFF
tm->TM_GMTOFF = 0;
Expand Down Expand Up @@ -500,7 +521,6 @@ const char* strptime(const char* buf, const char* fmt, struct tm* tm) {
return NULL;
}
}

return ((const char*)bp);
}

Expand Down Expand Up @@ -530,6 +550,30 @@ static const unsigned char* conv_num(const unsigned char* buf, int* dest, unsign
return buf;
}

static const unsigned char* conv_nanosecond(const unsigned char* buf, long* dest, int* nanosecondLength) {
unsigned int result = 0;
unsigned char ch;
int digitNum = 0;
const unsigned char* start = buf;

ch = *buf;
if (ch < '0' || ch > '9')
return NULL;

do {
result *= 10;
result += ch - '0';
++digitNum;
ch = *++buf;
} while (ch >= '0' && ch <= '9');
for (int i = 0; i < 9 - digitNum; i++) {
result *= 10;
}
*dest = result;
*nanosecondLength = buf - start;
return buf;
}

static int strncasecmp(const char* s1, const char* s2, size_t n) {
if (n == 0)
return 0;
Expand Down Expand Up @@ -562,4 +606,6 @@ find_string(const unsigned char* bp, int* tgt, const char* const* n1, const char

/* Nothing matched */
return NULL;
}
}

} // namespace logtail
23 changes: 23 additions & 0 deletions core/common/Strptime.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright 2023 iLogtail Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <time.h>

namespace logtail {

const char* strptime_ns(const char* buf, const char* fmt, struct tm* tm, long* nanosecond, int* nanosecondLength);

} // namespace logtail
92 changes: 57 additions & 35 deletions core/common/TimeUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#endif
#include "logger/Logger.h"
#include "common/LogtailCommonFlags.h"
#include "common/Strptime.h"

namespace logtail {

Expand Down Expand Up @@ -96,22 +97,37 @@ int DeduceYear(const struct tm* tm, const struct tm* currentTm) {
return currentTm->tm_year;
}

const char* Strptime(const char* buf, const char* fmt, struct tm* tm, int32_t specifiedYear /* = -1 */) {
if (specifiedYear < 0)
return strptime(buf, fmt, tm);

/*
Parse time (local timezone) from log
return the position of the parsing ends. If parsing fails, return NULL.
*/
const char* Strptime(const char* buf, const char* fmt, LogtailTime* ts, int& nanosecondLength, int32_t specifiedYear /* = -1 */) {
struct tm tm_ = {0};
struct tm* tm = &tm_;
const int32_t MIN_YEAR = std::numeric_limits<decltype(tm->tm_year)>::min();
auto bakYear = tm->tm_year;
tm->tm_year = MIN_YEAR;

auto ret = strptime_ns(buf, fmt, tm, &ts->tv_nsec, &nanosecondLength);
if (0 == strcmp("%f", fmt)) {
return ret;
}

if (specifiedYear < 0) {
ts->tv_sec = mktime(tm);
return ret;
}

// Do not specify: already got year information.
auto ret = strptime(buf, fmt, tm);
if (tm->tm_year != MIN_YEAR)
if (tm->tm_year != MIN_YEAR) {
ts->tv_sec = mktime(tm);
return ret;
}

// Mode 1.
if (specifiedYear > 0) {
tm->tm_year = specifiedYear - 1900;
ts->tv_sec = mktime(tm);
return ret;
}

Expand All @@ -131,6 +147,7 @@ const char* Strptime(const char* buf, const char* fmt, struct tm* tm, int32_t sp
auto deduction = DeduceYear(tm, &currentTm);
if (deduction != -1)
tm->tm_year = deduction;
ts->tv_sec = mktime(tm);
return ret;
}

Expand Down Expand Up @@ -258,17 +275,42 @@ void UpdateTimeDelta(time_t serverTime) {
APSARA_LOG_INFO(sLogger, ("update time delta by server time", serverTime)("delta", sTimeDelta));
}

// DEPRECATED: only for the compability of PreciseTimestamp
uint64_t GetPreciseTimestampFromLogtailTime(LogtailTime logTime,
const PreciseTimestampConfig& preciseTimestampConfig) {
uint64_t preciseTimestamp = logTime.tv_sec;
TimeStampUnit timeUnit = preciseTimestampConfig.unit;

if (TimeStampUnit::MILLISECOND == timeUnit) {
return preciseTimestamp * 1000 + logTime.tv_nsec / 1000000;
} else if (TimeStampUnit::MICROSECOND == timeUnit) {
return preciseTimestamp * 1000000 + logTime.tv_nsec / 1000;
} else if (TimeStampUnit::NANOSECOND == timeUnit) {
return preciseTimestamp * 1000000000 + logTime.tv_nsec;
}
return preciseTimestamp;
}

void SetLogTime(sls_logs::Log* log, time_t second, long nanosecond) {
log->set_time(second);
if (BOOL_FLAG(enable_timestamp_nanosecond)) {
log->set_time_ns(nanosecond);
}
}

LogtailTime GetCurrentLogtailTime() {
LogtailTime ts;
auto now = std::chrono::system_clock::now().time_since_epoch();
ts.tv_sec = std::chrono::duration_cast<std::chrono::seconds>(now).count();
ts.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(now).count() % 1000000000;
return ts;
}

// Parse ms/us/ns suffix from preciseTimeSuffix, joining with the input second timestamp.
// Will return value the precise timestamp.
uint64_t GetPreciseTimestamp(uint64_t secondTimestamp,
const char* preciseTimeSuffix,
const PreciseTimestampConfig& preciseTimestampConfig,
int32_t tzOffsetSecond) {
uint64_t adjustSecondTimestamp = secondTimestamp - tzOffsetSecond;
if (!preciseTimestampConfig.enabled) {
return adjustSecondTimestamp;
}

const PreciseTimestampConfig& preciseTimestampConfig) {
bool endFlag = false;
TimeStampUnit timeUnit = preciseTimestampConfig.unit;

Expand Down Expand Up @@ -310,35 +352,15 @@ uint64_t GetPreciseTimestamp(uint64_t secondTimestamp,
preciseTimeDigit = preciseTimeDigit * 10;
endFlag = true;
}
adjustSecondTimestamp *= 10;
secondTimestamp *= 10;
}

return adjustSecondTimestamp + preciseTimeDigit;
}

int64_t GetNanoSecondsFromPreciseTimestamp(uint64_t preciseTimestamp, TimeStampUnit unit) {
switch (unit) {
case TimeStampUnit::NANOSECOND:
return preciseTimestamp % 1000000000;
case TimeStampUnit::MICROSECOND:
return preciseTimestamp * 1000 % 1000000000;
case TimeStampUnit::MILLISECOND:
return preciseTimestamp * 1000000 % 1000000000;
}
return 0;
}

void SetLogTime(sls_logs::Log* log, time_t second, long nanosecond) {
log->set_time(second);
if (BOOL_FLAG(enable_timestamp_nanosecond)) {
log->set_time_ns(nanosecond);
}
return secondTimestamp + preciseTimeDigit;
}

uint64_t GetCurrentTimeInNanoSeconds() {
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch())
.count();
}


} // namespace logtail
22 changes: 11 additions & 11 deletions core/common/TimeUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <ctime>
#include <thread>
#include "log_pb/sls_logs.pb.h"
#include "common/Strptime.h"

// Time and timestamp utility.
namespace logtail {
Expand All @@ -33,6 +34,8 @@ struct PreciseTimestampConfig {
TimeStampUnit unit = TimeStampUnit::MILLISECOND;
};

typedef timespec LogtailTime;

// Convert @tm to string accroding to @format. TODO: Merge ConvertToTimeStamp and GetTimeStamp.
std::string ConvertToTimeStamp(const time_t& tm, const std::string& format = "%Y%m%d%H%M%S");
std::string GetTimeStamp(time_t tm, const std::string& format = "%Y%m%d%H%M%S");
Expand Down Expand Up @@ -65,25 +68,22 @@ inline void sleep(uint64_t s) {
// 2. You can set to 0 to ask Strptime to make a deduction according to current date.
// This mode only suits for real-time logs which lack of year information, for
// example, syslog following RFC3164 does not generate year information.
const char* Strptime(const char* buf, const char* fmt, struct tm* tm, int32_t specifiedYear = -1);
const char* Strptime(const char* buf, const char* fmt, LogtailTime* ts, int& nanosecondLength, int32_t specifiedYear = -1);

int32_t GetSystemBootTime();

// For feature enable_log_time_auto_adjust.
time_t GetTimeDelta();
void UpdateTimeDelta(time_t serverTime);

uint64_t GetPreciseTimestamp(uint64_t secondTimestamp,
const char* preciseTimeSuffix,
const PreciseTimestampConfig& preciseTimestampConfig,
int32_t tzOffsetSecond);

long GetNanoSecondsFromPreciseTimestamp(uint64_t preciseTimestamp, TimeStampUnit unit);
uint64_t GetPreciseTimestampFromLogtailTime(LogtailTime logTime,
const PreciseTimestampConfig& preciseTimestampConfig);

void SetLogTime(sls_logs::Log* log, time_t second, long nanosecond);

} // namespace logtail
LogtailTime GetCurrentLogtailTime();

#if defined(_MSC_VER)
extern "C" const char* strptime(const char* buf, const char* fmt, struct tm* tm);
#endif
uint64_t GetPreciseTimestamp(uint64_t secondTimestamp,
const char* preciseTimeSuffix,
const PreciseTimestampConfig& preciseTimestampConfig);
} // namespace logtail
Loading

0 comments on commit 091241e

Please sign in to comment.