# lightsf - Collection of georeferenced and spatial datasets from different domains
# Version 0.1.0
# Copyright (C) 2025 Ingrid Romero Pinilla
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.



#' Mildly Clustered Points in North Carolina, United States
#'
#' This dataset, `nc_points`, is a data frame containing a set of spatial
#' point coordinates representing mildly clustered points in North Carolina,
#' United States. The dataset consists of 2,304 observations and 2 variables,
#' corresponding to the X and Y coordinates of the points. The data can be used
#' for examples of point pattern analysis, clustering, or spatial statistics.
#'
#' The dataset name has been kept as `nc_points` to avoid confusion with other
#' datasets in the R ecosystem. This naming convention helps distinguish this
#' dataset as part of the `lightsf` package and assists users in identifying its
#' specific characteristics. The suffix does not include `_df` because the dataset
#' primarily represents a spatial point pattern rather than general tabular survey data.
#' The original content has not been modified in any way.
#'
#' @name nc_points
#' @format A data frame with 2,304 observations and 2 variables:
#' \describe{
#'   \item{X}{X coordinate (numeric)}
#'   \item{Y}{Y coordinate (numeric)}
#' }
#' @source Data taken from the \pkg{chopin} package version 0.9.4
#' @usage data(nc_points)
#' @export
load("data/nc_points.rda")
NULL



#' Washington, D.C. Census Tract Data (ACS 2020)
#'
#' This dataset, `dc_poly`, is an `sf` object containing population and median
#' household income information for census tracts in Washington, D.C., based on
#' the 2020 American Community Survey (ACS). It also includes spatial polygon
#' geometries, allowing the data to be used directly for mapping and spatial
#' analysis, such as creating choropleth maps of demographic and socioeconomic
#' indicators.
#'
#' The dataset consists of 206 observations (census tracts) and 5 variables.
#' The geometry column contains polygon boundaries for each tract.
#'
#' The dataset name has been kept as `dc_poly` to avoid confusion with other
#' datasets in the R ecosystem. This naming convention helps distinguish this
#' dataset as part of the `lightsf` package and assists users in identifying
#' its specific characteristics. The original content has not been modified in
#' any way.
#'
#' @name dc_poly
#' @format An `sf` data frame with 206 observations and 5 variables:
#' \describe{
#'   \item{GEOID}{Unique identifier for the census tract (character)}
#'   \item{NAME}{Census tract name and jurisdiction (character)}
#'   \item{geometry}{Polygon geometry representing the tract boundaries (`\code{sfc_POLYGON}`)}
#'   \item{B01003_001}{Total population of the tract (numeric)}
#'   \item{B19013_001}{Median household income of the tract (numeric, in USD)}
#' }
#' @source Data taken from the \pkg{bivariateLeaflet} package version 0.1.0
#' @usage data(dc_poly)
#' @export
load("data/dc_poly.rda")
NULL



#' Spatial Patterns of Conflict in Africa (1966–1978)
#'
#' This dataset, `afcon_poly`, is a data frame summarizing spatial patterns of
#' conflict across 42 African countries between 1966 and 1978. The dataset was
#' originally used in Anselin (1995) to study spatial autocorrelation in
#' political conflict. It excludes South West Africa, Spanish Equatorial Africa,
#' and Spanish Sahara. The dataset includes centroid coordinates, country names,
#' and the total number of recorded conflicts during this period.
#'
#' The dataset consists of 42 observations (countries) and 5 variables.
#'
#' The dataset name has been kept as `afcon_poly` to avoid confusion with other
#' datasets in the R ecosystem. This naming convention helps distinguish this
#' dataset as part of the `lightsf` package and assists users in identifying its
#' specific characteristics. The original content has not been modified in any
#' way.
#'
#' @name afcon_poly
#' @format A data frame with 42 observations and 5 variables:
#' \describe{
#'   \item{x}{Longitude coordinate of the country centroid (numeric)}
#'   \item{y}{Latitude coordinate of the country centroid (numeric)}
#'   \item{totcon}{Total number of conflicts recorded, 1966–1978 (numeric)}
#'   \item{name}{Name of the country (factor with 42 levels)}
#'   \item{id}{Numeric country identifier (numeric)}
#' }
#' @source Data taken from the \pkg{spData} package version 2.3.4.
#' @references Anselin, L. (1995). Local Indicators of Spatial Association—LISA.
#' *Geographical Analysis*, 27(2), 93–115.
#' @usage data(afcon_poly)
#' @export
load("data/afcon_poly.rda")
NULL



#' Infant Mortality in Auckland, New Zealand (1977–1985)
#'
#' This dataset, `auckland_poly`, is a data frame containing information on
#' infant mortality in census area units (CAUs) of Auckland, New Zealand. The
#' dataset has 167 rows, each corresponding to a CAU, and 4 columns with
#' geographic coordinates and mortality-related statistics. It is often used in
#' spatial epidemiology studies and in demonstrations of spatial analysis
#' methods.
#'
#' In addition to the `auckland_poly` data frame, the original source also
#' provides two related spatial objects: `auckland.nb`, a neighbour list of CAUs
#' based on contiguity, and `auckpolys`, a polylist object representing polygon
#' boundaries. These are not included here, but can be generated from the
#' original dataset using spatial analysis workflows.
#'
#' The dataset name has been kept as `auckland_poly` to avoid confusion with
#' other datasets in the R ecosystem. This naming convention helps distinguish
#' this dataset as part of the `lightsf` package and assists users in identifying
#' its specific characteristics. The original content has not been modified in
#' any way.
#'
#' @name auckland_poly
#' @format A data frame with 167 observations and 4 variables:
#' \describe{
#'   \item{Easting}{Easting coordinate (numeric)}
#'   \item{Northing}{Northing coordinate (numeric)}
#'   \item{Deaths.1977.85}{Number of infant deaths between 1977 and 1985 (numeric)}
#'   \item{Under.5.1981}{Population under age 5 in 1981 (numeric)}
#' }
#' @source Data taken from the \pkg{spData} package version 2.3.4.
#' @usage data(auckland_poly)
#' @export
load("data/auckland_poly.rda")
NULL

#' Housing Sales in Baltimore, Maryland (1978)
#'
#' This dataset, `baltimore_pts`, is a data frame containing housing sales data
#' and property characteristics for Baltimore, Maryland, in 1978. It has been
#' widely used in spatial econometrics and hedonic regression studies. Each row
#' corresponds to a house, including sale price, structural attributes, lot size,
#' and geographic coordinates (X, Y) on the Maryland grid (projection type
#' unknown).
#'
#' The dataset consists of 211 observations (houses) and 17 variables.
#'
#' The dataset name has been kept as `baltimore_pts` to avoid confusion with
#' other datasets in the R ecosystem. This naming convention helps distinguish
#' this dataset as part of the `lightsf` package and assists users in identifying
#' its specific characteristics. The original content has not been modified in
#' any way.
#'
#' @name baltimore_pts
#' @format A data frame with 211 observations and 17 variables:
#' \describe{
#'   \item{STATION}{Census tract station identifier (integer)}
#'   \item{PRICE}{House sale price (numeric)}
#'   \item{NROOM}{Number of rooms (numeric)}
#'   \item{DWELL}{Dwelling type indicator (numeric)}
#'   \item{NBATH}{Number of bathrooms (numeric)}
#'   \item{PATIO}{Presence of patio (numeric indicator)}
#'   \item{FIREPL}{Presence of fireplace (numeric indicator)}
#'   \item{AC}{Presence of air conditioning (numeric indicator)}
#'   \item{BMENT}{Presence of basement (numeric indicator)}
#'   \item{NSTOR}{Number of stories (numeric)}
#'   \item{GAR}{Presence of garage (numeric indicator)}
#'   \item{AGE}{Age of the dwelling (numeric)}
#'   \item{CITCOU}{City/county indicator (numeric)}
#'   \item{LOTSZ}{Lot size (numeric)}
#'   \item{SQFT}{Interior square footage (numeric)}
#'   \item{X}{X coordinate (numeric)}
#'   \item{Y}{Y coordinate (numeric)}
#' }
#' @source Data taken from the \pkg{spData} package version 2.3.4.
#' @usage data(baltimore_pts)
#' @export
load("data/baltimore_pts.rda")
NULL



#' Boston Housing Data with Geographic Coordinates
#'
#' This dataset, \code{boston_pts}, is a data frame containing information on housing values
#' and neighborhood characteristics in the Boston area. It is based on the classic dataset
#' by Harrison and Rubinfeld (1978), corrected for minor errors and augmented with the latitude
#' and longitude of the observations. Gilley and Pace also note that the \code{MEDV} variable
#' is censored, with values at or over USD 50,000 set to USD 50,000.
#'
#' The dataset consists of 506 observations and 20 variables, including socio-economic,
#' environmental, and housing characteristics. Geographic coordinates (longitude and latitude)
#' are provided for spatial analysis. Related data objects include \code{boston.utm}, a matrix
#' of tract point coordinates projected to UTM zone 19, and \code{boston.soi}, a sphere of
#' influence neighbors list.
#'
#' The dataset name has been kept as \code{boston_pts} to avoid confusion with other datasets
#' in the R ecosystem. This naming convention helps distinguish this dataset as part of the
#' \code{lightsf} package and assists users in identifying its specific characteristics.
#' The suffix \code{pts} indicates that the dataset includes spatial point information.
#' The original content has not been modified in any way.
#'
#' @name boston_pts
#' @format A data frame with 506 observations and 20 variables:
#' \describe{
#'   \item{TOWN}{Town name (factor with 92 levels)}
#'   \item{TOWNNO}{Town number (integer)}
#'   \item{TRACT}{Census tract number (integer)}
#'   \item{LON}{Longitude (numeric)}
#'   \item{LAT}{Latitude (numeric)}
#'   \item{MEDV}{Median value of owner-occupied homes in USD 1,000s (numeric, censored at 50)}
#'   \item{CMEDV}{Corrected median value of owner-occupied homes (numeric)}
#'   \item{CRIM}{Per capita crime rate by town (numeric)}
#'   \item{ZN}{Proportion of residential land zoned for lots over 25,000 sq.ft. (numeric)}
#'   \item{INDUS}{Proportion of non-retail business acres per town (numeric)}
#'   \item{CHAS}{Charles River dummy variable (factor: "0" = not bounded, "1" = bounded)}
#'   \item{NOX}{Nitric oxides concentration (parts per 10 million, numeric)}
#'   \item{RM}{Average number of rooms per dwelling (numeric)}
#'   \item{AGE}{Proportion of owner-occupied units built prior to 1940 (numeric)}
#'   \item{DIS}{Weighted distances to five Boston employment centers (numeric)}
#'   \item{RAD}{Index of accessibility to radial highways (integer)}
#'   \item{TAX}{Full-value property-tax rate per \code{$10,000} (integer)}
#'   \item{PTRATIO}{Pupil-teacher ratio by town (numeric)}
#'   \item{B}{Proportion of Black residents, defined as 1000(Bk - 0.63)^2 (numeric)}
#'   \item{LSTAT}{Percentage of lower status of the population (numeric)}
#' }
#' @source Data taken from the \pkg{spData} package version 2.3.4
#' @usage data(boston_pts)
#' @export
load("data/boston_pts.rda")
NULL


#' World Coffee Production Data
#'
#' This dataset, \code{coffee_poly}, is a tibble containing estimates of global coffee production
#' by country. The data represent thousands of 60 kg bags of coffee produced in 2016 and 2017.
#' It is intended for teaching purposes only and not for research use.
#'
#' The dataset consists of 47 observations (countries) and 3 variables, including the country name
#' and production values for two years. The data provide a simple example of tabular international
#' production figures that can be used in spatial and non-spatial analyses.
#'
#' The dataset name has been kept as \code{coffee_poly} to avoid confusion with other datasets
#' in the R ecosystem. This naming convention helps distinguish this dataset as part of the
#' \code{lightsf} package and assists users in identifying its specific characteristics.
#' The suffix \code{poly} indicates that the dataset can be linked to polygon boundaries for mapping.
#' The original content has not been modified in any way.
#'
#' @name coffee_poly
#' @format A tibble with 47 observations and 3 variables:
#' \describe{
#'   \item{name_long}{Country name (character)}
#'   \item{coffee_production_2016}{Coffee production in 2016, in thousands of 60 kg bags (integer)}
#'   \item{coffee_production_2017}{Coffee production in 2017, in thousands of 60 kg bags (integer)}
#' }
#' @source Data taken from the \pkg{spData} package version 2.3.4
#' @usage data(coffee_poly)
#' @export
load("data/coffee_poly.rda")
NULL


#' Columbus Neighborhood Data (1980)
#'
#' This dataset, \code{columbus_poly}, is a data frame containing socioeconomic and housing
#' characteristics for 49 neighborhoods in Columbus, Ohio, based on 1980 data.
#' The dataset is widely used in spatial econometrics and geographic analysis.
#'
#' In addition to the attributes, the original dataset also included a polygon list
#' of neighborhood boundaries, a centroid matrix, and a neighbor list object, although
#' these are not part of \code{columbus_poly}. The matrix \code{bbs} is deprecated but
#' retained in other packages for compatibility.
#'
#' The dataset name has been kept as \code{columbus_poly} to avoid confusion with other datasets
#' in the R ecosystem. This naming convention helps distinguish this dataset as part of the
#' \code{lightsf} package and assists users in identifying its specific characteristics.
#' The suffix \code{poly} indicates that the dataset can be linked to polygon boundaries.
#' The original content has not been modified in any way.
#'
#' @name columbus_poly
#' @format A data frame with 49 observations and 22 variables:
#' \describe{
#'   \item{AREA}{Area of the neighborhood (numeric)}
#'   \item{PERIMETER}{Perimeter of the neighborhood (numeric)}
#'   \item{COLUMBUS.}{Identifier variable (integer)}
#'   \item{COLUMBUS.I}{Identifier variable (integer)}
#'   \item{POLYID}{Polygon ID (integer)}
#'   \item{NEIG}{Neighborhood ID (integer)}
#'   \item{HOVAL}{Housing value (numeric)}
#'   \item{INC}{Household income (numeric)}
#'   \item{CRIME}{Crime rate (numeric)}
#'   \item{OPEN}{Open space (numeric)}
#'   \item{PLUMB}{Plumbing quality (numeric)}
#'   \item{DISCBD}{Distance to central business district (numeric)}
#'   \item{X}{X coordinate of centroid (numeric)}
#'   \item{Y}{Y coordinate of centroid (numeric)}
#'   \item{AREA}{Area variable (numeric, duplicated)}
#'   \item{NSA}{Neighborhood spatial attribute A (numeric)}
#'   \item{NSB}{Neighborhood spatial attribute B (numeric)}
#'   \item{EW}{East/West indicator (numeric)}
#'   \item{CP}{Central place indicator (numeric)}
#'   \item{THOUS}{Thousands of dollars (numeric)}
#'   \item{NEIGNO}{Neighborhood number (numeric)}
#'   \item{PERIM}{Perimeter variable (numeric, duplicated)}
#' }
#' @source Data taken from the \pkg{spData} package version 2.3.4
#' @usage data(columbus_poly)
#' @export
load("data/columbus_poly.rda")
NULL


#' Cycle Hire Stations in London
#'
#' This dataset, \code{cyclehire_pts}, is an \code{sf} object containing point locations
#' of cycle hire stations across London. Each observation represents a hire point with
#' information about its name, area, number of available bikes, and number of empty
#' docking slots at the time of data collection.
#'
#' The dataset name has been kept as \code{cyclehire_pts} to avoid confusion with other datasets
#' in the R ecosystem. This naming convention helps distinguish this dataset as part of the
#' \code{lightsf} package and assists users in identifying its specific characteristics.
#' The suffix \code{pts} indicates that the dataset contains point geometries.
#' The original content has not been modified in any way.
#'
#' @name cyclehire_pts
#' @format An \code{sf} object with 742 observations and 6 variables:
#' \describe{
#'   \item{id}{Station identifier (integer)}
#'   \item{name}{Name of the station (factor)}
#'   \item{area}{Area of London where the station is located (factor with 121 levels)}
#'   \item{nbikes}{Number of bikes available (integer)}
#'   \item{nempty}{Number of empty docking slots (integer)}
#'   \item{geometry}{Point geometry in XY coordinates (\code{sfc_POINT})}
#' }
#' @source Data taken from the \pkg{spData} package version 2.3.4
#' @usage data(cyclehire_pts)
#' @export
load("data/cyclehire_pts.rda")
NULL


#' World Bank Socioeconomic Indicators by Country
#'
#' This dataset, worldbank_poly, is a data frame containing selected socioeconomic indicators
#' compiled from the World Bank. The dataset includes 177 observations (countries) and 7 variables
#' such as Human Development Index (HDI), urban population percentage, unemployment rate,
#' population growth, and literacy rate. Some values may be missing.
#'
#' The dataset name has been kept as 'worldbank_poly' to avoid confusion with other datasets
#' in the R ecosystem. This naming convention helps distinguish this dataset as part of the
#' lightsf package and assists users in identifying its specific characteristics.
#' The original content has not been modified in any way.
#'
#' @name worldbank_poly
#' @format A data frame (tibble) with 177 observations and 7 variables:
#' \describe{
#'   \item{name}{Country name (character)}
#'   \item{iso_a2}{ISO 2-letter country code (character)}
#'   \item{HDI}{Human Development Index (numeric)}
#'   \item{urban_pop}{Urban population percentage (numeric)}
#'   \item{unemployment}{Unemployment rate (numeric)}
#'   \item{pop_growth}{Population growth rate (numeric)}
#'   \item{literacy}{Literacy rate (numeric)}
#' }
#' @source Data taken from the \pkg{spData} package version 2.3.4
#' @usage data(worldbank_poly)
#' @export
load("data/worldbank_poly.rda")
NULL



#' Bacterial Production Sampling Points in Lake St. Pierre (2005)
#'
#' This dataset, bacprodxy_pts, is a data frame containing the geographical coordinates
#' (longitude and latitude) of 25 sampling locations where bacterial production was measured
#' in Lake St. Pierre (Québec, Canada). The samples were collected on August 18, 2005.
#'
#' The dataset name has been kept as 'bacprodxy_pts' to avoid confusion with other datasets
#' in the R ecosystem. This naming convention helps distinguish this dataset as part of the
#' lightsf package and assists users in identifying its specific characteristics.
#' The original content has not been modified in any way.
#'
#' @name bacprodxy_pts
#' @format A data frame with 25 observations and 2 variables:
#' \describe{
#'   \item{Longitude}{Longitude coordinate of the sampling point (numeric)}
#'   \item{Latitude}{Latitude coordinate of the sampling point (numeric)}
#' }
#' @source Data taken from the \pkg{adespatial} package version 0.3-28
#' @usage data(bacprodxy_pts)
#' @export
load("data/bacprodxy_pts.rda")
NULL


#' Mastigouche Lake Network Data Set
#'
#' This dataset, mastigouche_poly, is a list containing spatial and network information
#' for 42 lakes in the Mastigouche region. The dataset includes the XY geographical coordinates
#' of the lakes and a site-by-edge matrix describing how the lakes influence each other.
#' The network is defined by 66 directional edges of influence between the lakes.
#'
#' The dataset name has been kept as 'mastigouche_poly' to avoid confusion with other datasets
#' in the R ecosystem. This naming convention helps distinguish this dataset as part of the
#' lightsf package and assists users in identifying its specific characteristics.
#' The original content has not been modified in any way.
#'
#' @name mastigouche_poly
#' @format A list with 2 elements:
#' \describe{
#'   \item{xy}{A data frame with 42 observations and 2 variables: X (numeric), Y (numeric) coordinates of the lakes}
#'   \item{siteEdge}{An integer site-by-edge matrix describing 66 edges of influence among lakes}
#' }
#' @source Data taken from the \pkg{adespatial} package version 0.3-28
#' @usage data(mastigouche_poly)
#' @export
load("data/mastigouche_poly.rda")
NULL



#' Countries Latitude-Longitude Dataset
#'
#' This dataset, countries_pts, is a data frame containing information on 245 countries,
#' including their names and geographical coordinates (latitude and longitude).
#' It provides a simple reference for mapping and spatial analysis.
#'
#' The dataset name has been kept as 'countries_pts' to avoid confusion with other datasets
#' in the R ecosystem. This naming convention helps distinguish this dataset as part of the
#' lightsf package and assists users in identifying its specific characteristics.
#' The original content has not been modified in any way.
#'
#' @name countries_pts
#' @format A data frame with 245 observations and 4 variables:
#' \describe{
#'   \item{country}{Country code or identifier (character)}
#'   \item{latitude}{Latitude of the country (numeric)}
#'   \item{longitude}{Longitude of the country (numeric)}
#'   \item{name}{Country name (character)}
#' }
#' @source Data taken from Kaggle: \url{https://www.kaggle.com/datasets/arviinndn/countries}
#' @usage data(countries_pts)
#' @export
load("data/countries_pts.rda")
NULL



#' Georeferenced Pedestrian Car Collisions (2015, Santiago de Chile)
#'
#' This dataset, atropellados_pts, is a data frame containing information on pedestrian
#' car collisions that occurred in Santiago de Chile in 2015. Each record includes
#' the geographical coordinates of the accident, location description, and the number
#' of victims categorized by severity (fatal, serious, less serious, and minor).
#'
#' The dataset name has been kept as 'atropellados_pts' to avoid confusion with other datasets
#' in the R ecosystem. This naming convention helps distinguish this dataset as part of the
#' lightsf package and assists users in identifying its specific characteristics.
#' The original content has not been modified in any way.
#'
#' @name atropellados_pts
#' @format A data frame with 1,841 observations and 8 variables:
#' \describe{
#'   \item{X}{Longitude coordinate of the accident (numeric)}
#'   \item{Y}{Latitude coordinate of the accident (numeric)}
#'   \item{Ubicacion}{Location description of the accident (character)}
#'   \item{Fallecidos}{Number of fatalities (integer)}
#'   \item{Graves}{Number of serious injuries (integer)}
#'   \item{MenosGrave}{Number of less serious injuries (integer)}
#'   \item{Leve}{Number of minor injuries (integer)}
#'   \item{Accidentes}{Total number of accidents at the location (integer)}
#' }
#' @source Data taken from Kaggle: \url{https://www.kaggle.com/datasets/sandorabad/georeferenced-car-accidents-santiago-de-chile?select=AtropellosGS2015.csv}
#' @usage data(atropellados_pts)
#' @export
load("data/atropellados_pts.rda")
NULL



#' Georeferenced Forest Fires in Chile (2016–2017 Season)
#'
#' This dataset, `conafchile_pts`, is a data frame containing georeferenced forest fire records and
#' associated characteristics between July 1, 2016, and June 30, 2017. The dataset includes detailed
#' information such as location, administrative codes, fire causes, vegetation affected, and surface
#' area impacted. The data were compiled by CONAF and correspond to forest fires recorded in Chile.
#'
#' The dataset name has been kept as `conafchile_pts` to avoid confusion with other datasets
#' in the R ecosystem. This naming convention helps distinguish this dataset as part of the
#' lightsf package and assists users in identifying its specific characteristics.
#' The suffix `pts` indicates that the dataset contains georeferenced point data.
#' The original content has not been modified in any way.
#'
#' @name conafchile_pts
#' @format A data frame with 5,234 observations and 30 variables:
#' \describe{
#'   \item{X}{Index of the fire record (integer)}
#'   \item{temporada}{Fire season (character, e.g., "2016-2017")}
#'   \item{codreg}{Region code (integer)}
#'   \item{codprov}{Province code (integer)}
#'   \item{codcom}{Commune code (integer)}
#'   \item{ambito}{Institutional scope (character, e.g., "Conaf")}
#'   \item{numero}{Fire identification number (numeric)}
#'   \item{nombre_inc}{Name of the fire incident (character)}
#'   \item{utm_este}{UTM Easting coordinate (numeric)}
#'   \item{utm_norte}{UTM Northing coordinate (numeric)}
#'   \item{inicio_c}{Location of ignition (character)}
#'   \item{combus_i}{Initial fuel type (character)}
#'   \item{causa_gene}{General cause code (numeric)}
#'   \item{causa_espe}{Specific cause code (character)}
#'   \item{pino_0010}{Surface with pine (0–10 years old) affected (numeric)}
#'   \item{pino_11_17}{Surface with pine (11–17 years old) affected (numeric)}
#'   \item{pino_18}{Surface with pine (18+ years old) affected (numeric)}
#'   \item{eucalipto}{Surface with eucalyptus affected (numeric)}
#'   \item{otras_plan}{Surface with other plantations affected (numeric)}
#'   \item{total_plan}{Total surface of plantations affected (numeric)}
#'   \item{arbolado}{Surface of woodland affected (numeric)}
#'   \item{matorral}{Surface of shrubland affected (numeric)}
#'   \item{pastizal}{Surface of grassland affected (numeric)}
#'   \item{total_veg}{Total surface of vegetation affected (numeric)}
#'   \item{agricola}{Surface of agricultural land affected (numeric)}
#'   \item{desechos}{Surface of waste material affected (numeric)}
#'   \item{total_otra}{Total surface of other land use affected (numeric)}
#'   \item{sup_t_a}{Total affected surface area (numeric)}
#'   \item{long}{Longitude or projected coordinate (numeric)}
#'   \item{lat}{Latitude or projected coordinate (numeric)}
#' }
#' @source Data taken from Kaggle:
#' \url{https://www.kaggle.com/datasets/sandorabad/georeferenced-forestfires-2017-chile}
#' @usage data(conafchile_pts)
#' @export
load("data/conafchile_pts.rda")
NULL



#' California Housing Prices (1990 Census)
#'
#' This dataset, `housing_pts`, is a data frame containing information on median house prices
#' for California districts, derived from the 1990 census. It includes geographic coordinates,
#' demographic and housing characteristics, and district-level income and housing attributes.
#' The dataset consists of 20,640 observations and 10 variables. Missing values may be present
#' in some variables.
#'
#' The dataset name has been kept as `housing_pts` to avoid confusion with other datasets
#' in the R ecosystem. This naming convention helps distinguish this dataset as part of your
#' package and assists users in identifying its specific characteristics. The suffix `pts`
#' indicates that the dataset contains georeferenced point data. The original content has not
#' been modified in any way.
#'
#' @name housing_pts
#' @format A data frame with 20,640 observations and 10 variables:
#' \describe{
#'   \item{longitude}{Longitude coordinate of the district (numeric)}
#'   \item{latitude}{Latitude coordinate of the district (numeric)}
#'   \item{housing_median_age}{Median age of houses in the district (numeric)}
#'   \item{total_rooms}{Total number of rooms in the district (numeric)}
#'   \item{total_bedrooms}{Total number of bedrooms in the district (numeric)}
#'   \item{population}{Population of the district (numeric)}
#'   \item{households}{Number of households in the district (numeric)}
#'   \item{median_income}{Median income in the district (numeric)}
#'   \item{median_house_value}{Median house value in the district (numeric, in US dollars)}
#'   \item{ocean_proximity}{Proximity of the district to the ocean (character string categories)}
#' }
#' @source Data taken from Kaggle:
#'   \url{https://www.kaggle.com/datasets/camnugent/california-housing-prices}
#' @usage data(housing_pts)
#' @export
load("data/housing_pts.rda")
NULL


