pointcoral workflow

pointcoral turns local CPCe point-count annotations into ecological summaries, QC overlays, and ML-ready datasets. The package is fully local and does not require MERMAID, CoralNet, or any closed platform.

The key label convention is that raw_label remains the original short CPCe label from the .cpc file. You can run the bare workflow directly from those raw labels. A crosswalk is optional when you want to populate full_label, clean_label, label_class, major_category, ml_class, and project-specific class_id values. The bundled example maps short labels such as SPO, CALG, and PEFL to full labels such as Sponge, Coralline algae, and Peyssonnelia flavescens, and to major classes such as SPONGES (S), CORALLINE ALGAE (CA), and PEYSSONNELIACEAE.

library(pointcoral)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

1. Import CPCe data

example_dir <- system.file("extdata", package = "pointcoral")

points_raw <- read_cpce_folder(
  path = example_dir,
  image_root = example_dir,
  recursive = FALSE
)

dplyr::glimpse(points_raw)
#> Rows: 200
#> Columns: 36
#> $ project_id         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ site               <chr> "Hole in the Wall", "Hole in the Wall", "Hole in th…
#> $ transect           <chr> "HIW_158_W_U-1", "HIW_158_W_U-1", "HIW_158_W_U-1", …
#> $ survey_date        <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ image_id           <chr> "HIW_158_W_U-1", "HIW_158_W_U-1", "HIW_158_W_U-1", …
#> $ image_file         <chr> "HIW_158_W_U-1.jpg", "HIW_158_W_U-1.jpg", "HIW_158_…
#> $ image_path         <chr> "/private/var/folders/7j/dr505g_j3zd9z6m9qdykzc4w00…
#> $ point_id           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
#> $ cpce_x             <dbl> 982, 351, 1336, 3497, 1292, 2156, 119, 3748, 2157, …
#> $ cpce_y             <dbl> 567, 5422, 8356, 10228, 12293, 15436, 18377, 22870,…
#> $ cpce_width         <dbl> 45581.55, 45581.55, 45581.55, 45581.55, 45581.55, 4…
#> $ cpce_height        <dbl> 28658.92, 28658.92, 28658.92, 28658.92, 28658.92, 2…
#> $ image_width        <int> 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 9…
#> $ image_height       <int> 566, 566, 566, 566, 566, 566, 566, 566, 566, 566, 5…
#> $ x_px               <int> 19, 7, 26, 69, 26, 43, 2, 74, 43, 18, 136, 118, 113…
#> $ y_px               <int> 11, 107, 165, 202, 243, 305, 363, 452, 469, 541, 48…
#> $ raw_code           <chr> "SPO", "S", "CALG", "SPO", "SPO", "SPO", "PEYS", "C…
#> $ raw_label          <chr> "SPO", "S", "CALG", "SPO", "SPO", "SPO", "PEYS", "C…
#> $ full_label         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ clean_label        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ label_class        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ major_category     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ ml_class           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ class_id           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ reviewer           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ notes              <chr> "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA…
#> $ cpce_note_type     <chr> "Notes", "Notes", "Notes", "Notes", "Notes", "Notes…
#> $ cpc_file           <chr> "/private/var/folders/7j/dr505g_j3zd9z6m9qdykzc4w00…
#> $ codefile_path      <chr> "C:\\PROGRA~1\\CPCe36\\SHALLO~1.TXT", "C:\\PROGRA~1…
#> $ cpce_image_path    <chr> "E:\\Trasects June 09\\Deep Cres\\Hole in the Wall\…
#> $ cpce_header_width  <dbl> 45600, 45600, 45600, 45600, 45600, 45600, 45600, 45…
#> $ cpce_header_height <dbl> 28680, 28680, 28680, 28680, 28680, 28680, 28680, 28…
#> $ roi_x_min          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
#> $ roi_x_max          <dbl> 45581.55, 45581.55, 45581.55, 45581.55, 45581.55, 4…
#> $ roi_y_min          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
#> $ roi_y_max          <dbl> 28658.92, 28658.92, 28658.92, 28658.92, 28658.92, 2…

2. Match images

read_cpce_folder() can match images during import when image_root is provided. You can also match later:

points_raw <- match_images(points_raw, image_root = example_dir)

3. Bare workflow from raw CPCe labels

The .cpc files already contain point labels. In this bare workflow, summarize_images() and make_ml_points() fall back to raw_label because major_category and ml_class have not been populated by a crosswalk.

validate_points(points_raw)
#> # A tibble: 1 × 4
#>   check severity     n details                      
#>   <chr> <chr>    <int> <chr>                        
#> 1 ok    info         0 No validation issues detected
summarize_images(points_raw)
#> # A tibble: 21 × 7
#>    site             transect      image_id      raw_label     n n_points percent
#>    <chr>            <chr>         <chr>         <chr>     <int>    <int>   <dbl>
#>  1 Hole in the Wall HIW_158_W_U-1 HIW_158_W_U-1 AA            4      100       4
#>  2 Hole in the Wall HIW_158_W_U-1 HIW_158_W_U-1 CALG         18      100      18
#>  3 Hole in the Wall HIW_158_W_U-1 HIW_158_W_U-1 LOBO          8      100       8
#>  4 Hole in the Wall HIW_158_W_U-1 HIW_158_W_U-1 P             2      100       2
#>  5 Hole in the Wall HIW_158_W_U-1 HIW_158_W_U-1 PEFL          7      100       7
#>  6 Hole in the Wall HIW_158_W_U-1 HIW_158_W_U-1 PEYS          9      100       9
#>  7 Hole in the Wall HIW_158_W_U-1 HIW_158_W_U-1 S            21      100      21
#>  8 Hole in the Wall HIW_158_W_U-1 HIW_158_W_U-1 SPO          27      100      27
#>  9 Hole in the Wall HIW_158_W_U-1 HIW_158_W_U-1 SS            1      100       1
#> 10 Hole in the Wall HIW_158_W_U-1 HIW_158_W_U-1 TURF          3      100       3
#> # ℹ 11 more rows

points_split_raw <- split_ml_points(points_raw, split_by = "image", seed = 1)
ml_points_raw <- make_ml_points(points_split_raw)

dplyr::count(ml_points_raw, label, class_id, sort = TRUE)
#> # A tibble: 15 × 3
#>    label class_id     n
#>    <chr>    <int> <int>
#>  1 S           11    65
#>  2 SPO         12    44
#>  3 CALG         1    31
#>  4 PEYS        10    13
#>  5 LOBO         2     9
#>  6 P            6     9
#>  7 PEFL         7     7
#>  8 MICR         4     6
#>  9 AA           0     4
#> 10 MFRN         3     4
#> 11 TURF        14     3
#> 12 PEGI         8     2
#> 13 MME          5     1
#> 14 PEME         9     1
#> 15 SS          13     1

4. Optional: read a crosswalk table

crosswalk_path <- system.file(
  "extdata", "pointcoral_example_crosswalk.csv",
  package = "pointcoral"
)

crosswalk <- read_label_crosswalk(crosswalk_path)
dplyr::glimpse(crosswalk)
#> Rows: 116
#> Columns: 12
#> $ raw_code            <chr> "AC", "AP", "APR", "AA", "AF", "AG", "AH", "AT", "…
#> $ raw_label           <chr> "AC", "AP", "APR", "AA", "AF", "AG", "AH", "AT", "…
#> $ full_label          <chr> "Acropora cervicornis", "Acropora prolifera", "Acr…
#> $ clean_label         <chr> "Acropora cervicornis", "Acropora prolifera", "Acr…
#> $ label_class         <chr> "subcategory", "subcategory", "subcategory", "subc…
#> $ major_category      <chr> "CORAL (C)", "CORAL (C)", "CORAL (C)", "CORAL (C)"…
#> $ ml_class            <chr> "CORAL (C)", "CORAL (C)", "CORAL (C)", "CORAL (C)"…
#> $ class_id            <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
#> $ include_in_analysis <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
#> $ include_in_ml       <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
#> $ color_hex           <chr> "#ff4d4d", "#ff4d4d", "#ff4d4d", "#ff4d4d", "#ff4d…
#> $ notes               <chr> "Bundled example mapping for CPCe short labels and…

5. Optional: check unmapped labels

check_crosswalk(points_raw, crosswalk)
#> # A tibble: 101 × 12
#>    issue_type     severity raw_code raw_label full_label clean_label label_class
#>    <chr>          <chr>    <chr>    <chr>     <chr>      <chr>       <chr>      
#>  1 unused_in_poi… info     AC       <NA>      <NA>       <NA>        <NA>       
#>  2 unused_in_poi… info     AP       <NA>      <NA>       <NA>        <NA>       
#>  3 unused_in_poi… info     APR      <NA>      <NA>       <NA>        <NA>       
#>  4 unused_in_poi… info     AF       <NA>      <NA>       <NA>        <NA>       
#>  5 unused_in_poi… info     AG       <NA>      <NA>       <NA>        <NA>       
#>  6 unused_in_poi… info     AH       <NA>      <NA>       <NA>        <NA>       
#>  7 unused_in_poi… info     AT       <NA>      <NA>       <NA>        <NA>       
#>  8 unused_in_poi… info     AU       <NA>      <NA>       <NA>        <NA>       
#>  9 unused_in_poi… info     AL       <NA>      <NA>       <NA>        <NA>       
#> 10 unused_in_poi… info     CB       <NA>      <NA>       <NA>        <NA>       
#> # ℹ 91 more rows
#> # ℹ 5 more variables: major_category <chr>, ml_class <chr>, class_id <int>,
#> #   n <int>, details <chr>

6. Optional: apply label standardization

points_clean <- standardize_labels(
  points_raw,
  crosswalk,
  unknown_action = "warn"
)

points_clean |>
  count(raw_label, full_label, label_class, major_category, class_id, sort = TRUE)
#> # A tibble: 15 × 6
#>    raw_label full_label              label_class major_category   class_id     n
#>    <chr>     <chr>                   <chr>       <chr>               <int> <int>
#>  1 S         Sand                    subcategory SAND, PAVEMENT,…        8    65
#>  2 SPO       Sponge                  subcategory SPONGES (S)             2    44
#>  3 CALG      Coralline algae         subcategory CORALLINE ALGAE…        7    31
#>  4 PEYS      Peyssonnelia            subcategory PEYSSONNELIACEAE        5    13
#>  5 LOBO      Lobophora variegata     subcategory MACROALGAE (MA)         4     9
#>  6 P         Pavement                subcategory SAND, PAVEMENT,…        8     9
#>  7 PEFL      Peyssonnelia flavescens subcategory PEYSSONNELIACEAE        5     7
#>  8 MICR      Microdictyon            subcategory MACROALGAE (MA)         4     6
#>  9 AA        Agaricia                subcategory CORAL (C)               0     4
#> 10 MFRN      Orbicella franksi       subcategory CORAL (C)               0     4
#> 11 TURF      Turf                    subcategory MACROALGAE (MA)         4     3
#> 12 PEGI      Peyssonnelia gigaspora  subcategory PEYSSONNELIACEAE        5     2
#> 13 MME       Meandrina meandrites    subcategory CORAL (C)               0     1
#> 14 PEME      Peyssonnelia megasorus  subcategory PEYSSONNELIACEAE        5     1
#> 15 SS        Siderastrea siderea     subcategory CORAL (C)               0     1

7. Validate standardized points

validate_points(points_clean)
#> # A tibble: 1 × 4
#>   check severity     n details                      
#>   <chr> <chr>    <int> <chr>                        
#> 1 ok    info         0 No validation issues detected

8. Create standardized ecological summary tables

summarize_images(points_clean, class_col = "major_category")
#> # A tibble: 12 × 7
#>    site             transect      image_id major_category     n n_points percent
#>    <chr>            <chr>         <chr>    <chr>          <int>    <int>   <dbl>
#>  1 Hole in the Wall HIW_158_W_U-1 HIW_158… CORAL (C)          5      100       5
#>  2 Hole in the Wall HIW_158_W_U-1 HIW_158… CORALLINE ALG…    18      100      18
#>  3 Hole in the Wall HIW_158_W_U-1 HIW_158… MACROALGAE (M…    11      100      11
#>  4 Hole in the Wall HIW_158_W_U-1 HIW_158… PEYSSONNELIAC…    16      100      16
#>  5 Hole in the Wall HIW_158_W_U-1 HIW_158… SAND, PAVEMEN…    23      100      23
#>  6 Hole in the Wall HIW_158_W_U-1 HIW_158… SPONGES (S)       27      100      27
#>  7 Hoyo Terrace     H_211_E_U-1   H_211_E… CORAL (C)          5      100       5
#>  8 Hoyo Terrace     H_211_E_U-1   H_211_E… CORALLINE ALG…    13      100      13
#>  9 Hoyo Terrace     H_211_E_U-1   H_211_E… MACROALGAE (M…     7      100       7
#> 10 Hoyo Terrace     H_211_E_U-1   H_211_E… PEYSSONNELIAC…     7      100       7
#> 11 Hoyo Terrace     H_211_E_U-1   H_211_E… SAND, PAVEMEN…    51      100      51
#> 12 Hoyo Terrace     H_211_E_U-1   H_211_E… SPONGES (S)       17      100      17
summarize_transects(points_clean, class_col = "major_category")
#> # A tibble: 12 × 6
#>    site             transect      major_category              n n_points percent
#>    <chr>            <chr>         <chr>                   <int>    <int>   <dbl>
#>  1 Hole in the Wall HIW_158_W_U-1 CORAL (C)                   5      100       5
#>  2 Hole in the Wall HIW_158_W_U-1 CORALLINE ALGAE (CA)       18      100      18
#>  3 Hole in the Wall HIW_158_W_U-1 MACROALGAE (MA)            11      100      11
#>  4 Hole in the Wall HIW_158_W_U-1 PEYSSONNELIACEAE           16      100      16
#>  5 Hole in the Wall HIW_158_W_U-1 SAND, PAVEMENT, RUBBLE…    23      100      23
#>  6 Hole in the Wall HIW_158_W_U-1 SPONGES (S)                27      100      27
#>  7 Hoyo Terrace     H_211_E_U-1   CORAL (C)                   5      100       5
#>  8 Hoyo Terrace     H_211_E_U-1   CORALLINE ALGAE (CA)       13      100      13
#>  9 Hoyo Terrace     H_211_E_U-1   MACROALGAE (MA)             7      100       7
#> 10 Hoyo Terrace     H_211_E_U-1   PEYSSONNELIACEAE            7      100       7
#> 11 Hoyo Terrace     H_211_E_U-1   SAND, PAVEMENT, RUBBLE…    51      100      51
#> 12 Hoyo Terrace     H_211_E_U-1   SPONGES (S)                17      100      17
summarize_sites(points_clean, class_col = "major_category")
#> # A tibble: 12 × 5
#>    site             major_category                   n n_points percent
#>    <chr>            <chr>                        <int>    <int>   <dbl>
#>  1 Hole in the Wall CORAL (C)                        5      100       5
#>  2 Hole in the Wall CORALLINE ALGAE (CA)            18      100      18
#>  3 Hole in the Wall MACROALGAE (MA)                 11      100      11
#>  4 Hole in the Wall PEYSSONNELIACEAE                16      100      16
#>  5 Hole in the Wall SAND, PAVEMENT, RUBBLE (SPR)    23      100      23
#>  6 Hole in the Wall SPONGES (S)                     27      100      27
#>  7 Hoyo Terrace     CORAL (C)                        5      100       5
#>  8 Hoyo Terrace     CORALLINE ALGAE (CA)            13      100      13
#>  9 Hoyo Terrace     MACROALGAE (MA)                  7      100       7
#> 10 Hoyo Terrace     PEYSSONNELIACEAE                 7      100       7
#> 11 Hoyo Terrace     SAND, PAVEMENT, RUBBLE (SPR)    51      100      51
#> 12 Hoyo Terrace     SPONGES (S)                     17      100      17

summarize_images(points_clean, class_col = "clean_label")
#> # A tibble: 21 × 7
#>    site             transect      image_id    clean_label     n n_points percent
#>    <chr>            <chr>         <chr>       <chr>       <int>    <int>   <dbl>
#>  1 Hole in the Wall HIW_158_W_U-1 HIW_158_W_… Agaricia        4      100       4
#>  2 Hole in the Wall HIW_158_W_U-1 HIW_158_W_… Coralline …    18      100      18
#>  3 Hole in the Wall HIW_158_W_U-1 HIW_158_W_… Lobophora …     8      100       8
#>  4 Hole in the Wall HIW_158_W_U-1 HIW_158_W_… Pavement        2      100       2
#>  5 Hole in the Wall HIW_158_W_U-1 HIW_158_W_… Peyssonnel…     9      100       9
#>  6 Hole in the Wall HIW_158_W_U-1 HIW_158_W_… Peyssonnel…     7      100       7
#>  7 Hole in the Wall HIW_158_W_U-1 HIW_158_W_… Sand           21      100      21
#>  8 Hole in the Wall HIW_158_W_U-1 HIW_158_W_… Siderastre…     1      100       1
#>  9 Hole in the Wall HIW_158_W_U-1 HIW_158_W_… Sponge         27      100      27
#> 10 Hole in the Wall HIW_158_W_U-1 HIW_158_W_… Turf            3      100       3
#> # ℹ 11 more rows

9. Create ML point-label CSVs

points_split <- split_ml_points(points_clean, split_by = "image", seed = 1)
ml_points <- make_ml_points(points_split, class_col = "ml_class")

out_dir <- tempfile("pointcoral-vignette-")
dir.create(out_dir)

write_ml_points_csv(ml_points, file.path(out_dir, "ml"))
#> $labels
#> [1] "/var/folders/7j/dr505g_j3zd9z6m9qdykzc4w0000gn/T//RtmpBT6Yh4/pointcoral-vignette-e95346b87b8/ml/labels.csv"
#> 
#> $class_lookup
#> [1] "/var/folders/7j/dr505g_j3zd9z6m9qdykzc4w0000gn/T//RtmpBT6Yh4/pointcoral-vignette-e95346b87b8/ml/class_lookup.csv"
#> 
#> $labels_train
#> [1] "/var/folders/7j/dr505g_j3zd9z6m9qdykzc4w0000gn/T//RtmpBT6Yh4/pointcoral-vignette-e95346b87b8/ml/labels_train.csv"
#> 
#> $labels_val
#> [1] "/var/folders/7j/dr505g_j3zd9z6m9qdykzc4w0000gn/T//RtmpBT6Yh4/pointcoral-vignette-e95346b87b8/ml/labels_val.csv"
#> 
#> $labels_test
#> [1] "/var/folders/7j/dr505g_j3zd9z6m9qdykzc4w0000gn/T//RtmpBT6Yh4/pointcoral-vignette-e95346b87b8/ml/labels_test.csv"

10. Extract point patches

Patch extraction writes image crops to disk. For large projects, run this in a dedicated output folder.

extract_point_patches(
  points_split,
  image_root = example_dir,
  out_dir = file.path(out_dir, "patches"),
  patch_size = 224,
  class_col = "ml_class",
  edge = "skip"
)

11. Create sparse masks

Sparse masks are weak labels. Pixels outside point disks remain ignore_index by default.

make_sparse_masks(
  points_split,
  image_root = example_dir,
  out_dir = file.path(out_dir, "sparse_masks"),
  radius = 3
)

12. Create QC overlays

write_qc_overlays(
  points_split,
  image_root = example_dir,
  out_dir = file.path(out_dir, "qc"),
  label_col = "ml_class"
)

Full workflow wrapper

run_pointcoral(
  cpce_dir = example_dir,
  image_root = example_dir,
  out_dir = file.path(out_dir, "outputs"),
  make_patches = TRUE,
  make_masks = TRUE,
  make_qc = TRUE
)

Add crosswalk_path = crosswalk_path to that wrapper call when you want the standardized labels and major classes shown above.