More fixes of NR CIFs

itsleeds · Sep 11, 2024 · 1d82b11 · 1d82b11
1 parent 4c746de
commit 1d82b11
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 36 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -29,6 +29,7 @@ Imports:
   digest,
   foreach,
   future,
+  furrr,
   future.apply,
   geodist,
   httr,

diff --git a/R/atoc_export.R b/R/atoc_export.R
@@ -377,27 +377,34 @@ makeCalendar <- function(schedule, ncores = 1) {
 
 
   if (ncores > 1) {
-    cl <- parallel::makeCluster(ncores)
-    # parallel::clusterExport(
-    #   cl = cl,
-    #   varlist = c("calendar", "UIDs"),
-    #   envir = environment()
+    future::plan(future::multisession, workers = ncores)
+    res <- furrr::future_map(.x = calendar_split,
+                             .f = makeCalendar.inner,
+                             .progress = TRUE)
+    future::plan(future::sequential)
+
+    # cl <- parallel::makeCluster(ncores)
+    # # parallel::clusterExport(
+    # #   cl = cl,
+    # #   varlist = c("calendar", "UIDs"),
+    # #   envir = environment()
+    # # )
+    # parallel::clusterEvalQ(cl, {
+    #   loadNamespace("UK2GTFS")
+    # })
+    # pbapply::pboptions(use_lb = TRUE)
+    # res <- pbapply::pblapply(calendar_split,
+    #   makeCalendar.inner,
+    #   cl = cl
     # )
-    parallel::clusterEvalQ(cl, {
-      loadNamespace("UK2GTFS")
-    })
-    pbapply::pboptions(use_lb = TRUE)
-    res <- pbapply::pblapply(calendar_split,
-      makeCalendar.inner,
-      cl = cl
-    )
-    parallel::stopCluster(cl)
-    rm(cl)
+    # parallel::stopCluster(cl)
+    # rm(cl)
   } else {
-    res <- pbapply::pblapply(
-      calendar_split,
-      makeCalendar.inner)
+    res <- purrr::map(.x = calendar_split,
+                      .f = makeCalendar.inner,
+                      .progress = TRUE)
   }
+  message("\n") # Newline beak after progress bars
 
   res.calendar <- lapply(res, `[[`, 1)
   res.calendar <- data.table::rbindlist(res.calendar, use.names=FALSE) #performance, was taking 10 minutes to execute bind_rows
@@ -422,24 +429,34 @@ makeCalendar <- function(schedule, ncores = 1) {
 
   #res.calendar.split <- split(res.calendar, seq(1, nrow(res.calendar)))
   #performance - doing this split on 500k rows takes 60s - longer than the parallel execution below and consumes 3gb memory.
+  WEEKDAY_NAME_VECTOR <- c("monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday")
+  CHECKROWS_NAME_VECTOR <- c(WEEKDAY_NAME_VECTOR, "duration", "start_date", "end_date")
+
 
-  res.calendar.days <- res.calendar[,CHECKROWS_NAME_VECTOR]
+  res.calendar.days <- res.calendar[,..CHECKROWS_NAME_VECTOR]
   res.calendar.days <- data.table::transpose(res.calendar.days)
   #transpose on the same size runs in around 3s, but causes named dataframe with mixed datatypes to be coerced to unnamed vector of integer.
 
 
   if (ncores > 1) {
-    cl <- parallel::makeCluster(ncores)
-    parallel::clusterEvalQ(cl, {
-      loadNamespace("UK2GTFS")
-    })
-    keep <- pbapply::pbsapply(res.calendar.days, checkrows,
-      cl = cl
-    )
-    parallel::stopCluster(cl)
-    rm(cl)
+    future::plan(future::multisession, workers = ncores)
+    keep <- furrr::future_map(.x = res.calendar.days,
+                             .f = checkrows,
+                             .progress = TRUE)
+    future::plan(future::sequential)
+    keep <- unlist(keep)
+
+    # cl <- parallel::makeCluster(ncores)
+    # parallel::clusterEvalQ(cl, {
+    #   loadNamespace("UK2GTFS")
+    # })
+    # keep <- pbapply::pbsapply(res.calendar.days, checkrows,
+    #   cl = cl
+    # )
+    # parallel::stopCluster(cl)
+    # rm(cl)
   } else {
-    keep <- pbapply::pbsapply(res.calendar.days, checkrows)
+    keep <- purrr::map(res.calendar.days, checkrows, .progress = TRUE)
   }
 
   res.calendar <- res.calendar[keep, ]

diff --git a/R/atoc_main.R b/R/atoc_main.R
@@ -72,13 +72,15 @@ schedule2routes <- function(stop_times, stops, schedule, silent = TRUE, ncores =
   calendar$end_date <- as.character(calendar$end_date)
   calendar$end_date <- gsub("-", "", calendar$end_date)
 
-  calendar_dates <- calendar_dates[, c("UID", "start_date")]
-  names(calendar_dates) <- c("service_id", "date")
-  calendar_dates$date <- as.character(calendar_dates$date)
-  calendar_dates$date <- gsub("-", "", calendar_dates$date)
-  calendar_dates$exception_type <- 2 # all events passed to calendar_dates are single day cancellations
-
+  if(nrow(calendar_dates) > 0){
+    calendar_dates <- calendar_dates[, c("UID", "start_date")]
+    names(calendar_dates) <- c("service_id", "date")
+    calendar_dates$date <- as.character(calendar_dates$date)
+    calendar_dates$date <- gsub("-", "", calendar_dates$date)
+    calendar_dates$exception_type <- 2 # all events passed to calendar_dates are single day cancellations
+  }
 
+  calendar_dates = as.data.frame(calendar_dates)
 
   ### SECTION 3: ###############################################################################
   # When splitting the calendar rowIDs are duplicated

diff --git a/README.Rmd b/README.Rmd
@@ -28,7 +28,7 @@ The UK has two main sources of public transport timetable data [**Traveline**](h
 
 **Update November 2020**
 
-The [Open Bus Data Service](https://data.bus-data.dft.gov.uk/downloads/) now offers a national GTFS download option based on [ITO World's](https://www.itoworld.com) TransXchange to GTFS converter. For non-expert users it will probably be easier to download there files. However this packages is still being maintained to support conversion of historical files, and because the conversion of TransXchange to GTFS is open to interpretation and having alternative converters is useful.
+The [Open Bus Data Service](https://data.bus-data.dft.gov.uk/downloads/) now offers a national GTFS download option based on [ITO World's](https://www.itoworld.com) TransXchange to GTFS converter. These include bus an light rail but not heavy rail timetables. For non-expert users it will probably be easier to download there files. However this packages is still being maintained to support conversion of historical files, and because the conversion of TransXchange to GTFS is open to interpretation and having alternative converters is useful.
 
 ## Capabilities - why we need another package
 
@@ -51,6 +51,7 @@ There are a number of pre-existing options for converting data to GTFS. This pac
     + Compression
     + Validation
     + Subsetting
+    + Analysis