merge cube - partial correction

ammaciel · May 6, 2024 · f6c6422 · f6c6422
2 parents ce1d3f7 1ccc1ca
commit f6c6422
Show file tree

Hide file tree

Showing 13 changed files with 265 additions and 326 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -139,6 139,7 @@ Collate:
  'api_gdalcubes.R'
  'api_jobs.R'
  'api_label_class.R'
  'api_merge.R'
  'api_mixture_model.R'
  'api_ml_model.R'
  'api_mosaic.R'

diff --git a/R/api_colors.R b/R/api_colors.R
@@ -45,7 45,7 @@
  missing <- labels[!labels %in% names(colors)]
  if (.check_warnings()) {
  warning(.conf("messages", ".colors_get_missing"), toString(missing))
- warning(.conf("messages", ".colors_get_missing_palette", palette))
  warning(.conf("messages", ".colors_get_missing_palette"), palette)
  # grDevices does not work with one color missing
  }
  colors_pal <- grDevices::hcl.colors(

diff --git a/R/api_merge.R b/R/api_merge.R
@@ -0,0 1,24 @@
 .merge_diff_timelines <- function(t1, t2) {
  abs(as.Date(t1) - as.Date(t2))
 }
 
 .merge_fi <- function(data1, data2) {
  data1 <- slider::slide2_dfr(data1, data2, function(x, y) {
  .fi(x) <- dplyr::arrange(
  dplyr::bind_rows(.fi(x), .fi(y)),
  .data[["date"]],
  .data[["band"]],
  .data[["fid"]]
  )
  # remove duplicates
  .fi(x) <- dplyr::distinct(
  .fi(x),
  .data[["band"]],
  .data[["date"]],
  .keep_all = TRUE
  )
 
  return(x)
  })
  return(data1)
 }
diff --git a/R/api_tile.R b/R/api_tile.R
@@ -1491,14 1491,14 @@ NULL
 #'
 .tile_cog_sizes <- function(tile) {
  # run gdalinfo on file
- info <- capture.output(sf::gdal_utils(
  info <- utils::capture.output(sf::gdal_utils(
  source = .tile_path(tile),
  destination = NULL)
  )
  info2 <- stringr::str_split(info, pattern = "\n")
  # capture the line containg overview info
  over <- unlist(info2[grepl("Overview", info2)])
- if (!.has(over))
  if (!.has(over) || grepl("arbitrary", over))
  return(NULL)
  # get the value pairs
  over_values <- unlist(strsplit(over, split = ":", fixed = TRUE))[2]

diff --git a/R/api_view.R b/R/api_view.R
@@ -35,7 35,7 @@
  # find out if resampling is required (for big images)
  output_size <- .view_resample_size(
  cube = cube,
- ndates = length(dates)
  ndates = max(length(dates), 1)
  )
  # create a leaflet and add providers
  leaf_map <- .view_add_basic_maps()
@@ -136,7 136,7 @@
  # find out if resampling is required (for big images)
  output_size <- .view_resample_size(
  cube = cube,
- ndates = length(dates)
  ndates = max(length(dates), 1)
  )
  # create a leaflet and add providers
  leaf_map <- .view_add_basic_maps()

diff --git a/R/sits_merge.R b/R/sits_merge.R
@@ -11,6 11,17 @@
 #' To merge data cubes, they should share the same sensor, resolution,
 #' bounding box, timeline, and have different bands.
 #'
 #' When the user requests a sits_merge operation for two regular cubes with
 #' the same number of time intervals but with timelines that are not equal
 #' the system issues a warning and asks the user to provide a \code{tolerance}
 #' parameter which will be used in the merging operation.
 #' The temporal tolerance parameter should be less than the time interval
 #' between two images of both cubes.
 #' In this case the second cube will have its timeline and the image
 #' file names changed to match the timeline of the first cube.
 #' The images of the second cube will be written in \code{output_dir}
 #' directory.
 #'
 #' @param data1 Time series (tibble of class "sits")
 #' or data cube (tibble of class "raster_cube") .
 #' @param data2 Time series (tibble of class "sits")
@@ -23,6 34,14 @@
 #' @param suffix If there are duplicate bands in data1 and data2
 #' these suffixes will be added
 #' (character vector).
 #' @param tolerance A period tolerance to merge both cubes.
 #' ISO8601-compliant time period for regular data cubes,
 #' with number and unit, where "D", "M" and "Y" stand
 #' for days, month and year; e.g., "P16D" for 16 days.
 #' The temporal tolerance parameter should be less than
 #' the time interval between two images of both cubes.
 #' @param output_dir Valid directory for storing merged images.
 #'
 #' @return merged data sets (tibble of class "sits" or
 #' tibble of class "raster_cube")
 #' @examples
@@ -98,12 117,25 @@ sits_merge.raster_cube <- function(data1, data2, ...,
  # pre-condition - check cube type
  .check_is_raster_cube(data1)
  .check_is_raster_cube(data2)
  if (.has(tolerance)) {
  .check_period(tolerance)
  }
  if (.has(output_dir)) {
  .check_output_dir(output_dir)
  }
 
  # aligning tiles
  data1 <- dplyr::arrange(data1, .data[["tile"]])
  data2 <- dplyr::arrange(data2, .data[["tile"]])
  # Get cubes timeline
  d1_tl <- as.Date(unlist(.cube_timeline(data1)))
  d2_tl <- as.Date(unlist(.cube_timeline(data2)))
  # check timeline interval
  # tl_interval1 <- lubridate::int_diff()
  # join cube tiles
  common_tiles <- intersect(data1[["tile"]], data2[["tile"]])
  data1 <- dplyr::filter(data1, .data[["tile"]] %in% common_tiles)
  data2 <- dplyr::filter(data2, .data[["tile"]] %in% common_tiles)
  .check_that(all(sort(.cube_tiles(data1)) == sort(.cube_tiles(data2))))
  if (inherits(data1, "hls_cube") && inherits(data2, "hls_cube") &&
  (.cube_collection(data1) == "HLSS30" ||
@@ -115,29 147,40 @@ sits_merge.raster_cube <- function(data1, data2, ...,
  data1 <- .merge_fi(data1, data2)
  return(data1)
  }
- # Pre-conditions
- .check_period(tolerance)
- .check_output_dir(output_dir)
- warning(.conf("messages", "sits_raster_merge_cube_tolerance"),
- call. = FALSE)
  # Get difference in timelines
- diff_timelines <- .merge_diff_timeline(d1_tl, d2_tl)
  diff_timelines <- .merge_diff_timelines(d1_tl, d2_tl)
  # Verify the consistency of each difference
  if (!all(diff_timelines <= lubridate::period(tolerance))) {
- stop(.conf("messages", "sits_merge_raster_cube_error"))
  stop(.conf("messages", "sits_merge_raster_cube_error"),
  call. = FALSE
  )
  }
  if (!.has(output_dir)) {
  warning(
  paste("The images with the fixed timeline of the",
  "second cube will not be written. If you want",
  "to write it, use the `output_dir` parameter."
  ),
  call. = FALSE
  )
  }
 
  # Change file name to match reference timeline
  data2 <- slider::slide_dfr(data2, function(y) {
  fi_list <- purrr::map(.tile_bands(y), function(band) {
  fi_band <- .fi_filter_bands(.fi(y), bands = band)
  fi_band[["date"]] <- d1_tl
  if (!.has(output_dir)) {
  return(fi_band)
  }
  fi_paths <- .fi_paths(fi_band)
  file_names <- .file_eo_name(
- tile = y, band = band, date = d1_tl, output_dir = output_dir
  tile = y, band = band,
  date = d1_tl, output_dir = output_dir
  )
  file.copy(from = fi_paths, to = file_names)
  fi_band[["path"]] <- file_names
- fi_band[["date"]] <- d1_tl
- fi_band
  return(fi_band)
  })
  tile_fi <- dplyr::bind_rows(fi_list)
  tile_fi <- dplyr::arrange(
@@ -155,31 198,6 @@ sits_merge.raster_cube <- function(data1, data2, ...,
  return(data1)
 }
 
-.merge_diff_timeline <- function(t1, t2) {
- abs(as.Date(t1) - as.Date(t2))
-}
-
-.merge_fi <- function(data1, data2) {
- data1 <- slider::slide2_dfr(data1, data2, function(x, y) {
- .fi(x) <- dplyr::arrange(
- dplyr::bind_rows(.fi(x), .fi(y)),
- .data[["date"]],
- .data[["band"]],
- .data[["fid"]]
- )
- # remove duplicates
- .fi(x) <- dplyr::distinct(
- .fi(x),
- .data[["band"]],
- .data[["date"]],
- .keep_all = TRUE
- )
-
- return(x)
- })
- return(data1)
-}
-
 #' @rdname sits_merge
 #' @export
 sits_merge.default <- function(data1, data2, ...) {

diff --git a/contributing.md b/contributing.md
@@ -30,14 30,27 @@ their permission) for contributions that go beyond small typos in code or docume
 
 ### R programming models 
 
-- Most functions in `sits` use the S3 programming model with a strong emphasis on generic methods wich are specialized depending on the input data type. See for example the implementation of the `sits_bands()` function. Functions that use the `torch` package use the R6 model to be compatible with that package. See for example, the code in `sits_tempcnn.R` and `api_torch.R`. 
 - Most functions in `sits` use the S3 programming model with a strong emphasis on generic methods wich are specialized depending on the input data type. See for example the implementation of the `sits_bands()` function. 
 
 - Please do not include contributed code using the S4 programming model. Doing so would break the structure and the logic of existing code. Convert your code from S4 to S3.
 
 - Use generic functions as much as possible, as they improve modularity and maintenance. If your code has decision points using `if-else` clauses, such as `if A, do X; else do Y` consider using generic functions. 
 
 - Functions that use the `torch` package use the R6 model to be compatible with that package. See for example, the code in `sits_tempcnn.R` and `api_torch.R`. To convert `pyTorch` code to R and include it is straightforward. Please see the [Technical Annex](https://e-sensing.github.io/sitsbook/technical-annex.html) of the sits on-line book.
 
 ### Adherence to the `tidyverse`, `sf` and `terra`
 
 The sits `code` relies on the packages of the `tidyverse` to work with tables and list. We use `dplyr` and `tidyr` for data selection and wrangling, `purrr` and `slider` for loops on lists and table, `lubridate` to handle dates and times. 
 
 
 ### Adherence to the `sits` data types
 
 - The `sits` package in built on top of three data types: time series tibble, data cubes and models. Most `sits` functions have one or more of these types as inputs and one of them as return values.
 
 - The time series tibble contains data and metadata. The first six columns contain the metadata: spatial and temporal information, the label assigned to the sample, and the data cube from where the data has been extracted. The time_series column contains the time series data for each spatiotemporal location. All time series tibbles are objects of class `sits`. 
 
 - The `cube` data type is designed to store metadata about image files. In principle, images which are part of a data cube share the same geographical region, have the same bands, and have been regularized to fit into a pre-defined temporal interval. Data cubes in `sits` are organized by tiles. A tile is an element of a satellite's mission reference system, for example MGRS for Sentinel-2 and WRS2 for Landsat. A `cube` is a tibble where each row contains information about data covering one tile. Each row of the cube tibble contains a column named `file_info`; this column contains a list that stores a tibble 
 
 - The `cube` data type is specialised in `raster_cube` (ARD images), `vector_cube` (ARD cube with segmentation vectors). `probs_cube` (probabilities produced by classification algorithms on raster data), `probs_vector_cube`(probabilites generated by vector classification of segments), `uncertainty_cube` (cubes with uncertainty information), and `class_cube` (labelled maps). See the code in `sits_plot.R` as an example of specialisation of `plot` to handle different classes of raster data. 
 
 - All ML/DL models in `sits` which are the result of `sits_train` belong to the `ml_model` class. In addition, models are assigned a second class, which is unique to ML models (e.g, `rfor_model`, `svm_model`) and generic for all DL `torch` based models (`torch_model`). The class information is used for plotting models and for establishing if a model can run on GPUs. 

diff --git a/inst/extdata/config_messages.yml b/inst/extdata/config_messages.yml
@@ -372,7 372,7 @@ sits_labels_summary: "this function is deprecated; please use summary()"
 sits_lighttae: "wrong input parameters - see example in documentation"
 sits_list_collections: "invalid source parameter as data provider"
 sits_merge_default: "input should be objects of class sits or class raster_cube"
-sits_merge_raster_cube: "merge cubes requires same tiles in both cubes"
 sits_merge_raster_cube: "merge cubes requires same tiles and same timeline length in both cubes"
 sits_merge_raster_cube_tolerance: "cubes timeline are different; using tolerance to merge them"
 sits_merge_raster_cube_error: "temporal tolerance is smaller than difference between cube timelines"
 sits_merge_sits: "input data is NULL or has different number of rows"