Skip to content

Commit

Permalink
merge cube - partial correction
Browse files Browse the repository at this point in the history
  • Loading branch information
gilbertocamara committed May 6, 2024
2 parents ce1d3f7 1ccc1ca commit f6c6422
Show file tree
Hide file tree
Showing 13 changed files with 265 additions and 326 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 139,7 @@ Collate:
'api_gdalcubes.R'
'api_jobs.R'
'api_label_class.R'
'api_merge.R'
'api_mixture_model.R'
'api_ml_model.R'
'api_mosaic.R'
Expand Down
2 changes: 1 addition & 1 deletion R/api_colors.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 45,7 @@
missing <- labels[!labels %in% names(colors)]
if (.check_warnings()) {
warning(.conf("messages", ".colors_get_missing"), toString(missing))
warning(.conf("messages", ".colors_get_missing_palette", palette))
warning(.conf("messages", ".colors_get_missing_palette"), palette)
# grDevices does not work with one color missing
}
colors_pal <- grDevices::hcl.colors(
Expand Down
24 changes: 24 additions & 0 deletions R/api_merge.R
Original file line number Diff line number Diff line change
@@ -0,0 1,24 @@
.merge_diff_timelines <- function(t1, t2) {
abs(as.Date(t1) - as.Date(t2))
}

.merge_fi <- function(data1, data2) {
data1 <- slider::slide2_dfr(data1, data2, function(x, y) {
.fi(x) <- dplyr::arrange(
dplyr::bind_rows(.fi(x), .fi(y)),
.data[["date"]],
.data[["band"]],
.data[["fid"]]
)
# remove duplicates
.fi(x) <- dplyr::distinct(
.fi(x),
.data[["band"]],
.data[["date"]],
.keep_all = TRUE
)

return(x)
})
return(data1)
}
4 changes: 2 additions & 2 deletions R/api_tile.R
Original file line number Diff line number Diff line change
Expand Up @@ -1491,14 1491,14 @@ NULL
#'
.tile_cog_sizes <- function(tile) {
# run gdalinfo on file
info <- capture.output(sf::gdal_utils(
info <- utils::capture.output(sf::gdal_utils(
source = .tile_path(tile),
destination = NULL)
)
info2 <- stringr::str_split(info, pattern = "\n")
# capture the line containg overview info
over <- unlist(info2[grepl("Overview", info2)])
if (!.has(over))
if (!.has(over) || grepl("arbitrary", over))
return(NULL)
# get the value pairs
over_values <- unlist(strsplit(over, split = ":", fixed = TRUE))[2]
Expand Down
4 changes: 2 additions & 2 deletions R/api_view.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 35,7 @@
# find out if resampling is required (for big images)
output_size <- .view_resample_size(
cube = cube,
ndates = length(dates)
ndates = max(length(dates), 1)
)
# create a leaflet and add providers
leaf_map <- .view_add_basic_maps()
Expand Down Expand Up @@ -136,7 136,7 @@
# find out if resampling is required (for big images)
output_size <- .view_resample_size(
cube = cube,
ndates = length(dates)
ndates = max(length(dates), 1)
)
# create a leaflet and add providers
leaf_map <- .view_add_basic_maps()
Expand Down
88 changes: 53 additions & 35 deletions R/sits_merge.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 11,17 @@
#' To merge data cubes, they should share the same sensor, resolution,
#' bounding box, timeline, and have different bands.
#'
#' When the user requests a sits_merge operation for two regular cubes with
#' the same number of time intervals but with timelines that are not equal
#' the system issues a warning and asks the user to provide a \code{tolerance}
#' parameter which will be used in the merging operation.
#' The temporal tolerance parameter should be less than the time interval
#' between two images of both cubes.
#' In this case the second cube will have its timeline and the image
#' file names changed to match the timeline of the first cube.
#' The images of the second cube will be written in \code{output_dir}
#' directory.
#'
#' @param data1 Time series (tibble of class "sits")
#' or data cube (tibble of class "raster_cube") .
#' @param data2 Time series (tibble of class "sits")
Expand All @@ -23,6 34,14 @@
#' @param suffix If there are duplicate bands in data1 and data2
#' these suffixes will be added
#' (character vector).
#' @param tolerance A period tolerance to merge both cubes.
#' ISO8601-compliant time period for regular data cubes,
#' with number and unit, where "D", "M" and "Y" stand
#' for days, month and year; e.g., "P16D" for 16 days.
#' The temporal tolerance parameter should be less than
#' the time interval between two images of both cubes.
#' @param output_dir Valid directory for storing merged images.
#'
#' @return merged data sets (tibble of class "sits" or
#' tibble of class "raster_cube")
#' @examples
Expand Down Expand Up @@ -98,12 117,25 @@ sits_merge.raster_cube <- function(data1, data2, ...,
# pre-condition - check cube type
.check_is_raster_cube(data1)
.check_is_raster_cube(data2)
if (.has(tolerance)) {
.check_period(tolerance)
}
if (.has(output_dir)) {
.check_output_dir(output_dir)
}

# aligning tiles
data1 <- dplyr::arrange(data1, .data[["tile"]])
data2 <- dplyr::arrange(data2, .data[["tile"]])
# Get cubes timeline
d1_tl <- as.Date(unlist(.cube_timeline(data1)))
d2_tl <- as.Date(unlist(.cube_timeline(data2)))
# check timeline interval
# tl_interval1 <- lubridate::int_diff()
# join cube tiles
common_tiles <- intersect(data1[["tile"]], data2[["tile"]])
data1 <- dplyr::filter(data1, .data[["tile"]] %in% common_tiles)
data2 <- dplyr::filter(data2, .data[["tile"]] %in% common_tiles)
.check_that(all(sort(.cube_tiles(data1)) == sort(.cube_tiles(data2))))
if (inherits(data1, "hls_cube") && inherits(data2, "hls_cube") &&
(.cube_collection(data1) == "HLSS30" ||
Expand All @@ -115,29 147,40 @@ sits_merge.raster_cube <- function(data1, data2, ...,
data1 <- .merge_fi(data1, data2)
return(data1)
}
# Pre-conditions
.check_period(tolerance)
.check_output_dir(output_dir)
warning(.conf("messages", "sits_raster_merge_cube_tolerance"),
call. = FALSE)
# Get difference in timelines
diff_timelines <- .merge_diff_timeline(d1_tl, d2_tl)
diff_timelines <- .merge_diff_timelines(d1_tl, d2_tl)
# Verify the consistency of each difference
if (!all(diff_timelines <= lubridate::period(tolerance))) {
stop(.conf("messages", "sits_merge_raster_cube_error"))
stop(.conf("messages", "sits_merge_raster_cube_error"),
call. = FALSE
)
}
if (!.has(output_dir)) {
warning(
paste("The images with the fixed timeline of the",
"second cube will not be written. If you want",
"to write it, use the `output_dir` parameter."
),
call. = FALSE
)
}

# Change file name to match reference timeline
data2 <- slider::slide_dfr(data2, function(y) {
fi_list <- purrr::map(.tile_bands(y), function(band) {
fi_band <- .fi_filter_bands(.fi(y), bands = band)
fi_band[["date"]] <- d1_tl
if (!.has(output_dir)) {
return(fi_band)
}
fi_paths <- .fi_paths(fi_band)
file_names <- .file_eo_name(
tile = y, band = band, date = d1_tl, output_dir = output_dir
tile = y, band = band,
date = d1_tl, output_dir = output_dir
)
file.copy(from = fi_paths, to = file_names)
fi_band[["path"]] <- file_names
fi_band[["date"]] <- d1_tl
fi_band
return(fi_band)
})
tile_fi <- dplyr::bind_rows(fi_list)
tile_fi <- dplyr::arrange(
Expand All @@ -155,31 198,6 @@ sits_merge.raster_cube <- function(data1, data2, ...,
return(data1)
}

.merge_diff_timeline <- function(t1, t2) {
abs(as.Date(t1) - as.Date(t2))
}

.merge_fi <- function(data1, data2) {
data1 <- slider::slide2_dfr(data1, data2, function(x, y) {
.fi(x) <- dplyr::arrange(
dplyr::bind_rows(.fi(x), .fi(y)),
.data[["date"]],
.data[["band"]],
.data[["fid"]]
)
# remove duplicates
.fi(x) <- dplyr::distinct(
.fi(x),
.data[["band"]],
.data[["date"]],
.keep_all = TRUE
)

return(x)
})
return(data1)
}

#' @rdname sits_merge
#' @export
sits_merge.default <- function(data1, data2, ...) {
Expand Down
15 changes: 14 additions & 1 deletion contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 30,27 @@ their permission) for contributions that go beyond small typos in code or docume

### R programming models

- Most functions in `sits` use the S3 programming model with a strong emphasis on generic methods wich are specialized depending on the input data type. See for example the implementation of the `sits_bands()` function. Functions that use the `torch` package use the R6 model to be compatible with that package. See for example, the code in `sits_tempcnn.R` and `api_torch.R`.
- Most functions in `sits` use the S3 programming model with a strong emphasis on generic methods wich are specialized depending on the input data type. See for example the implementation of the `sits_bands()` function.

- Please do not include contributed code using the S4 programming model. Doing so would break the structure and the logic of existing code. Convert your code from S4 to S3.

- Use generic functions as much as possible, as they improve modularity and maintenance. If your code has decision points using `if-else` clauses, such as `if A, do X; else do Y` consider using generic functions.

- Functions that use the `torch` package use the R6 model to be compatible with that package. See for example, the code in `sits_tempcnn.R` and `api_torch.R`. To convert `pyTorch` code to R and include it is straightforward. Please see the [Technical Annex](https://e-sensing.github.io/sitsbook/technical-annex.html) of the sits on-line book.

### Adherence to the `tidyverse`, `sf` and `terra`

The sits `code` relies on the packages of the `tidyverse` to work with tables and list. We use `dplyr` and `tidyr` for data selection and wrangling, `purrr` and `slider` for loops on lists and table, `lubridate` to handle dates and times.


### Adherence to the `sits` data types

- The `sits` package in built on top of three data types: time series tibble, data cubes and models. Most `sits` functions have one or more of these types as inputs and one of them as return values.

- The time series tibble contains data and metadata. The first six columns contain the metadata: spatial and temporal information, the label assigned to the sample, and the data cube from where the data has been extracted. The time_series column contains the time series data for each spatiotemporal location. All time series tibbles are objects of class `sits`.

- The `cube` data type is designed to store metadata about image files. In principle, images which are part of a data cube share the same geographical region, have the same bands, and have been regularized to fit into a pre-defined temporal interval. Data cubes in `sits` are organized by tiles. A tile is an element of a satellite's mission reference system, for example MGRS for Sentinel-2 and WRS2 for Landsat. A `cube` is a tibble where each row contains information about data covering one tile. Each row of the cube tibble contains a column named `file_info`; this column contains a list that stores a tibble

- The `cube` data type is specialised in `raster_cube` (ARD images), `vector_cube` (ARD cube with segmentation vectors). `probs_cube` (probabilities produced by classification algorithms on raster data), `probs_vector_cube`(probabilites generated by vector classification of segments), `uncertainty_cube` (cubes with uncertainty information), and `class_cube` (labelled maps). See the code in `sits_plot.R` as an example of specialisation of `plot` to handle different classes of raster data.

- All ML/DL models in `sits` which are the result of `sits_train` belong to the `ml_model` class. In addition, models are assigned a second class, which is unique to ML models (e.g, `rfor_model`, `svm_model`) and generic for all DL `torch` based models (`torch_model`). The class information is used for plotting models and for establishing if a model can run on GPUs.
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/config_messages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 372,7 @@ sits_labels_summary: "this function is deprecated; please use summary()"
sits_lighttae: "wrong input parameters - see example in documentation"
sits_list_collections: "invalid source parameter as data provider"
sits_merge_default: "input should be objects of class sits or class raster_cube"
sits_merge_raster_cube: "merge cubes requires same tiles in both cubes"
sits_merge_raster_cube: "merge cubes requires same tiles and same timeline length in both cubes"
sits_merge_raster_cube_tolerance: "cubes timeline are different; using tolerance to merge them"
sits_merge_raster_cube_error: "temporal tolerance is smaller than difference between cube timelines"
sits_merge_sits: "input data is NULL or has different number of rows"
Expand Down
Loading

0 comments on commit f6c6422

Please sign in to comment.