feddelegrand7
diff --git a/‎DESCRIPTION‎
Lines changed: 1 addition & 1 deletion b/‎DESCRIPTION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎NAMESPACE‎
Lines changed: 1 addition & 0 deletions b/‎NAMESPACE‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 1 addition & 0 deletions b/‎NEWS.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/comments_scrap.R‎
Lines changed: 65 additions & 0 deletions b/‎R/comments_scrap.R‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎R/table_scrap.R‎
Lines changed: 1 addition & 1 deletion b/‎R/table_scrap.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.Rmd‎
Lines changed: 9 additions & 0 deletions b/‎README.Rmd‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 21 additions & 10 deletions b/‎README.md‎
Lines changed: 21 additions & 10 deletions
diff --git a/‎docs/404.html‎
Lines changed: 1 addition & 1 deletion b/‎docs/404.html‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/CODE_OF_CONDUCT.html‎
Lines changed: 1 addition & 1 deletion b/‎docs/CODE_OF_CONDUCT.html‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/LICENSE-text.html‎
Lines changed: 1 addition & 1 deletion b/‎docs/LICENSE-text.html‎
Lines changed: 1 addition & 1 deletion
@@ -1,7 +1,7 @@
 Package: ralger
 Type: Package
 Title: Easy Web Scraping
-Version: 2.2.4
+Version: 2.3.0
 Authors@R: c(
     person("Mohamed El Fodil", "Ihaddaden", email = "ihaddaden.fodeil@gmail.com", role = c("aut", "cre")),
     person("Ezekiel", "Ogundepo", role = c("ctb")),
 
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export(attribute_scrap)
+export(comments_scrap)
 export(csv_scrap)
 export(images_noalt_scrap)
 export(images_preview)
 
@@ -4,6 +4,7 @@
 - `xls_scrap`
 - `xlsx_scrap`
 - `csv_scrap`
+- `comments_scrap`
 
 # ralger 2.2.4
 
 
@@ -0,0 +1,65 @@
+#' Scrape HTML comments from a web page
+#'
+#' @description Extracts HTML comments (<!-- comment -->) from a webpage. Useful for detecting hidden notes, debug info, or developer messages.
+#'
+#' @param link Character. The URL of the web page to scrape.
+#' @param askRobot Logical. Should the function check robots.txt before scraping? Default is FALSE.
+#' @return A character vector of HTML comments found on the page.
+#'
+#' @examples
+#' \donttest{
+#' link <- "https://example.com"
+#' comments_scrap(link)
+#' }
+#'
+#' @export
+#' @importFrom xml2 read_html
+#' @importFrom rvest html_nodes
+#' @importFrom robotstxt paths_allowed
+#' @importFrom curl has_internet
+#' @importFrom crayon green bgRed
+comments_scrap <- function(link, askRobot = FALSE) {
+
+  ###################### Ask Robot part ######################################################
+
+  if (askRobot) {
+    if (paths_allowed(link)) {
+      message(green("robots.txt allows scraping this web page"))
+    } else {
+      message(bgRed("WARNING: robots.txt prohibits scraping this web page"))
+      return(NA)
+    }
+  }
+
+  ############################################################################################
+
+  tryCatch(
+    expr = {
+      if (!has_internet()) {
+        stop("No internet connection.")
+      }
+
+      html_content <- read_html(link)
+
+      raw_content <- as.character(html_content)
+
+      comments <- regmatches(
+        raw_content,
+        gregexpr("<!--(.*?)-->", raw_content, perl = TRUE)
+      )[[1]]
+
+      comments <- trimws(comments)
+
+      if (length(comments) == 0) {
+        message("No HTML comments found.")
+        return(NA)
+      }
+
+      return(comments)
+    },
+    error = function(cond) {
+      message("Error while scraping comments: ", cond$message)
+      return(NA)
+    }
+  )
+}
@@ -12,7 +12,7 @@
 #' @examples \donttest{
 #' # Extracting premier ligue 2019/2020 top scorers
 #'
-#' link     <- "https://www.topscorersfootball.com/premier-league"
+#' link <- "https://www.topscorersfootball.com/premier-league"
 #' table_scrap(link)
 #'
 #' }
 
@@ -305,6 +305,15 @@ xls_scrap(
 ```
 
 
+## `comments_scrap()`
+
+Useful when you want to extract the `HTML` comments within a webpage: 
+
+```{r}
+head(comments_scrap("https://posit.co"))
+```
+
+
 
 # Accessibility related functions
 
 
@@ -15,12 +15,10 @@ downloads](https://cranlogs.r-pkg.org/badges/grand-total/ralger)](https://cran.r
 <!-- [![license](https://img.shields.io/github/license/mashape/apistatus.svg)](https://choosealicense.com/licenses/mit/) -->
 [![R
 badge](https://img.shields.io/badge/Build%20with-♥%20and%20R-blue)](https://github.com/feddelegrand7/ralger)
-[![R
-badge](https://img.shields.io/badge/-Sponsor-brightgreen)](https://www.buymeacoffee.com/Fodil)
+
 [![R build
 status](https://github.com/feddelegrand7/ralger/workflows/R-CMD-check/badge.svg)](https://github.com/feddelegrand7/ralger/actions)
-[![Codecov test
-coverage](https://codecov.io/gh/feddelegrand7/ralger/branch/master/graph/badge.svg)](https://codecov.io/gh/feddelegrand7/ralger?branch=master)
+
 <!-- badges: end -->
 
 The goal of **ralger** is to facilitate web scraping in R. For a quick
@@ -248,9 +246,9 @@ easily extract the titles displayed within a specific web page :
 titles <- titles_scrap(link = "https://www.nytimes.com/")
 
 head(titles)
-#> [1] "New York Times - Top Stories"        "More News"                          
-#> [3] "The AthleticSports coverage"         "Well"                               
-#> [5] "Culture and Lifestyle"               "AudioPodcasts and narrated articles"
+#> [1] "New York Times - Top Stories" "What to Watch and Read"      
+#> [3] "More News"                    "The AthleticSports coverage" 
+#> [5] "Well"                         "Culture and Lifestyle"
 ```
 
 Further, it’s possible to filter the results using the `contain`
@@ -399,6 +397,20 @@ xls_scrap(
 )
 ```
 
+## `comments_scrap()`
+
+Useful when you want to extract the `HTML` comments within a webpage:
+
+``` r
+head(comments_scrap("https://posit.co"))
+#> [1] "<!-- Start VWO Common Smartcode -->"                                                                       
+#> [2] "<!-- End VWO Common Smartcode -->"                                                                         
+#> [3] "<!-- Start VWO Async SmartCode -->"                                                                        
+#> [4] "<!-- End VWO Async SmartCode -->"                                                                          
+#> [5] "<!-- This site is optimized with the Yoast SEO plugin v25.2 - https://yoast.com/wordpress/plugins/seo/ -->"
+#> [6] "<!-- / Yoast SEO plugin. -->"
+```
+
 # Accessibility related functions
 
 ## `images_noalt_scrap()`
@@ -410,9 +422,8 @@ people using a screen reader:
 ``` r
 
 images_noalt_scrap(link = "https://www.r-consortium.org/")
-#> [1] <img loading="lazy" src="./posts/r-consortium-awards-first-round-of-2025-isc-grants/isc-grantees-2025-1.png" class="thumbnail-image card-img" style="height: 150px;">                          
-#> [2] <img loading="lazy" src="./posts/exploring-kuzco-making-computer-vision-for-r-easily-accessible/frankthull.png" class="thumbnail-image card-img" style="height: 150px;">                       
-#> [3] <img loading="lazy" src="./posts/quantifying-participation-risk-with-r-and-r-shiny-a-new-frontier-in-financial-risk-modeling/demo.png" class="thumbnail-image card-img" style="height: 150px;">
+#> No images without 'alt' attribute found at: https://www.r-consortium.org/
+#> NULL
 ```
 
 If no images without `alt` attributes are found, the function returns