Skip to content

Commit

Permalink
More fixes of NR CIFs
Browse files Browse the repository at this point in the history
  • Loading branch information
mem48 committed Sep 11, 2024
1 parent 4c746de commit 1d82b11
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 36 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Imports:
digest,
foreach,
future,
furrr,
future.apply,
geodist,
httr,
Expand Down
75 changes: 46 additions & 29 deletions R/atoc_export.R
Original file line number Diff line number Diff line change
Expand Up @@ -377,27 +377,34 @@ makeCalendar <- function(schedule, ncores = 1) {


if (ncores > 1) {
cl <- parallel::makeCluster(ncores)
# parallel::clusterExport(
# cl = cl,
# varlist = c("calendar", "UIDs"),
# envir = environment()
future::plan(future::multisession, workers = ncores)
res <- furrr::future_map(.x = calendar_split,
.f = makeCalendar.inner,
.progress = TRUE)
future::plan(future::sequential)

# cl <- parallel::makeCluster(ncores)
# # parallel::clusterExport(
# # cl = cl,
# # varlist = c("calendar", "UIDs"),
# # envir = environment()
# # )
# parallel::clusterEvalQ(cl, {
# loadNamespace("UK2GTFS")
# })
# pbapply::pboptions(use_lb = TRUE)
# res <- pbapply::pblapply(calendar_split,
# makeCalendar.inner,
# cl = cl
# )
parallel::clusterEvalQ(cl, {
loadNamespace("UK2GTFS")
})
pbapply::pboptions(use_lb = TRUE)
res <- pbapply::pblapply(calendar_split,
makeCalendar.inner,
cl = cl
)
parallel::stopCluster(cl)
rm(cl)
# parallel::stopCluster(cl)
# rm(cl)
} else {
res <- pbapply::pblapply(
calendar_split,
makeCalendar.inner)
res <- purrr::map(.x = calendar_split,
.f = makeCalendar.inner,
.progress = TRUE)
}
message("\n") # Newline beak after progress bars

res.calendar <- lapply(res, `[[`, 1)
res.calendar <- data.table::rbindlist(res.calendar, use.names=FALSE) #performance, was taking 10 minutes to execute bind_rows
Expand All @@ -422,24 +429,34 @@ makeCalendar <- function(schedule, ncores = 1) {

#res.calendar.split <- split(res.calendar, seq(1, nrow(res.calendar)))
#performance - doing this split on 500k rows takes 60s - longer than the parallel execution below and consumes 3gb memory.
WEEKDAY_NAME_VECTOR <- c("monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday")
CHECKROWS_NAME_VECTOR <- c(WEEKDAY_NAME_VECTOR, "duration", "start_date", "end_date")


res.calendar.days <- res.calendar[,CHECKROWS_NAME_VECTOR]
res.calendar.days <- res.calendar[,..CHECKROWS_NAME_VECTOR]
res.calendar.days <- data.table::transpose(res.calendar.days)
#transpose on the same size runs in around 3s, but causes named dataframe with mixed datatypes to be coerced to unnamed vector of integer.


if (ncores > 1) {
cl <- parallel::makeCluster(ncores)
parallel::clusterEvalQ(cl, {
loadNamespace("UK2GTFS")
})
keep <- pbapply::pbsapply(res.calendar.days, checkrows,
cl = cl
)
parallel::stopCluster(cl)
rm(cl)
future::plan(future::multisession, workers = ncores)
keep <- furrr::future_map(.x = res.calendar.days,
.f = checkrows,
.progress = TRUE)
future::plan(future::sequential)
keep <- unlist(keep)

# cl <- parallel::makeCluster(ncores)
# parallel::clusterEvalQ(cl, {
# loadNamespace("UK2GTFS")
# })
# keep <- pbapply::pbsapply(res.calendar.days, checkrows,
# cl = cl
# )
# parallel::stopCluster(cl)
# rm(cl)
} else {
keep <- pbapply::pbsapply(res.calendar.days, checkrows)
keep <- purrr::map(res.calendar.days, checkrows, .progress = TRUE)
}

res.calendar <- res.calendar[keep, ]
Expand Down
14 changes: 8 additions & 6 deletions R/atoc_main.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,15 @@ schedule2routes <- function(stop_times, stops, schedule, silent = TRUE, ncores =
calendar$end_date <- as.character(calendar$end_date)
calendar$end_date <- gsub("-", "", calendar$end_date)

calendar_dates <- calendar_dates[, c("UID", "start_date")]
names(calendar_dates) <- c("service_id", "date")
calendar_dates$date <- as.character(calendar_dates$date)
calendar_dates$date <- gsub("-", "", calendar_dates$date)
calendar_dates$exception_type <- 2 # all events passed to calendar_dates are single day cancellations

if(nrow(calendar_dates) > 0){
calendar_dates <- calendar_dates[, c("UID", "start_date")]
names(calendar_dates) <- c("service_id", "date")
calendar_dates$date <- as.character(calendar_dates$date)
calendar_dates$date <- gsub("-", "", calendar_dates$date)
calendar_dates$exception_type <- 2 # all events passed to calendar_dates are single day cancellations
}

calendar_dates = as.data.frame(calendar_dates)

### SECTION 3: ###############################################################################
# When splitting the calendar rowIDs are duplicated
Expand Down
3 changes: 2 additions & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ The UK has two main sources of public transport timetable data [**Traveline**](h

**Update November 2020**

The [Open Bus Data Service](https://data.bus-data.dft.gov.uk/downloads/) now offers a national GTFS download option based on [ITO World's](https://www.itoworld.com) TransXchange to GTFS converter. For non-expert users it will probably be easier to download there files. However this packages is still being maintained to support conversion of historical files, and because the conversion of TransXchange to GTFS is open to interpretation and having alternative converters is useful.
The [Open Bus Data Service](https://data.bus-data.dft.gov.uk/downloads/) now offers a national GTFS download option based on [ITO World's](https://www.itoworld.com) TransXchange to GTFS converter. These include bus an light rail but not heavy rail timetables. For non-expert users it will probably be easier to download there files. However this packages is still being maintained to support conversion of historical files, and because the conversion of TransXchange to GTFS is open to interpretation and having alternative converters is useful.

## Capabilities - why we need another package

Expand All @@ -51,6 +51,7 @@ There are a number of pre-existing options for converting data to GTFS. This pac
+ Compression
+ Validation
+ Subsetting
+ Analysis



Expand Down

0 comments on commit 1d82b11

Please sign in to comment.