{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extract random points\n",
    "\n",
    "This notebook allows to randomly select a certain number of samples (points) from a categorical raster."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import geopandas as gpd\n",
    "import rasterio\n",
    "from pathlib import Path"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Set directory**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "computer_path = '/export/miro/ndeffense/LBRAT2104/'\n",
    "grp_nb        = '99'\n",
    "\n",
    "data_path = f'{computer_path}data/'                        # Directory with data shared by the assistant\n",
    "work_path = f'{computer_path}STUDENTS/GROUP_{grp_nb}/TP/'  # Directory for all work files\n",
    "\n",
    "\n",
    "# Input directory\n",
    "land_cover_path = f'{work_path}LAND_COVER/'\n",
    "\n",
    "# Output directory\n",
    "points_path = f'{work_path}SAMPLES_POINTS/'\n",
    "\n",
    "Path(points_path).mkdir(parents=True, exist_ok=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Set filenames**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "categorical_tif = f'{land_cover_path}Corine_Land_Cover_32631_ROI.tif'\n",
    "\n",
    "randomly_selected_points_shp =  f'{land_cover_path}Corine_Land_Cover_selected_points_ROI.shp'"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Set parameters**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "no_data = 999\n",
    "\n",
    "epsg = '32631'  # Projection of the categorical GeoTIFF\n",
    "\n",
    "# Select a different number of points per class\n",
    "classes_list = [1,2]\n",
    "nb_points_per_class_list = [50,75]\n",
    "\n",
    "# Select the same number of points for every classes\n",
    "classes_list = []\n",
    "np_points_for_all_classes = 10"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Build dataframe with the number of points to select for each class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Class</th>\n",
       "      <th>Nb points</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>11</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>12</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>16</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>20</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>21</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>23</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>25</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>40</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Class  Nb points\n",
       "0       1         10\n",
       "1       2         10\n",
       "2       3         10\n",
       "3       4         10\n",
       "4       7         10\n",
       "5      10         10\n",
       "6      11         10\n",
       "7      12         10\n",
       "8      16         10\n",
       "9      20         10\n",
       "10     21         10\n",
       "11     23         10\n",
       "12     25         10\n",
       "13     40         10"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "if not classes_list:\n",
    "    src = rasterio.open(categorical_tif)\n",
    "    im_arr = src.read(1)\n",
    "    src.close()\n",
    "    \n",
    "    classes_list = np.unique(im_arr[im_arr != no_data])\n",
    "    nb_points_per_class_list = [np_points_for_all_classes] * len(classes_list)\n",
    "\n",
    "\n",
    "# Create a dictionary from the two lists\n",
    "data = {'Class': classes_list, 'Nb points': nb_points_per_class_list}\n",
    "\n",
    "# Create a DataFrame from the dictionary\n",
    "point_df = pd.DataFrame(data)\n",
    "\n",
    "point_df"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Select random points/pixels in each class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Class : 1\n",
      "Pixels total for class 1 : 82\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 2\n",
      "Pixels total for class 2 : 2079\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 3\n",
      "Pixels total for class 3 : 241\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 4\n",
      "Pixels total for class 4 : 90\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 7\n",
      "Pixels total for class 7 : 70\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 10\n",
      "Pixels total for class 10 : 40\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 11\n",
      "Pixels total for class 11 : 45\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 12\n",
      "Pixels total for class 12 : 880\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 16\n",
      "Pixels total for class 16 : 22\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 20\n",
      "Pixels total for class 20 : 327\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 21\n",
      "Pixels total for class 21 : 327\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 23\n",
      "Pixels total for class 23 : 188\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 25\n",
      "Pixels total for class 25 : 270\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n",
      "Class : 40\n",
      "Pixels total for class 40 : 71\n",
      "Randomly selected points/pixels : 10\n",
      "---------------------\n"
     ]
    }
   ],
   "source": [
    "src = rasterio.open(categorical_tif)\n",
    "im_arr = src.read(1)\n",
    "src.close()\n",
    "\n",
    "bounds    = src.bounds\n",
    "transform = src.transform\n",
    "\n",
    "upper_left_x = transform[2]\n",
    "upper_left_y = transform[5]\n",
    "x_size       = transform[0]\n",
    "y_size       = transform[4]\n",
    "\n",
    "\n",
    "df = pd.DataFrame(columns=['x','y','class_nb'])\n",
    "\n",
    "n = 0\n",
    "\n",
    "for j in range(0,len(point_df)):\n",
    "\n",
    "\tclass_nb = point_df.loc[j]['Class']\n",
    "\tpoint_nb = point_df.loc[j]['Nb points']\n",
    "\n",
    "\t(y_index, x_index) = np.nonzero(im_arr == class_nb)\n",
    "\n",
    "\tprint(f'Class : {class_nb}')\n",
    "\tprint(f'Pixels total for class {class_nb} : {len(y_index)}')\n",
    "\tprint(f'Randomly selected points/pixels : {point_nb}')\n",
    "\n",
    "\n",
    "\trandom.seed(10)\n",
    "\trandom_pixels_to_add = random.sample(range(0, len(y_index)), point_nb)\n",
    "\n",
    "\tfor i in random_pixels_to_add:\n",
    "\t\t\n",
    "\t\tx = x_index[i] * x_size + upper_left_x + (x_size / 2) #add half the cell size\n",
    "\t\ty = y_index[i] * y_size + upper_left_y + (y_size / 2) #to centre the point\n",
    "\n",
    "\t\tdf.loc[n] = [x,y,class_nb]\n",
    "\n",
    "\t\tn += 1\n",
    "\n",
    "\tprint('---------------------')"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Save selected points to shapefile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "class_nb\n",
      "1     10\n",
      "2     10\n",
      "3     10\n",
      "4     10\n",
      "7     10\n",
      "10    10\n",
      "11    10\n",
      "12    10\n",
      "16    10\n",
      "20    10\n",
      "21    10\n",
      "23    10\n",
      "25    10\n",
      "40    10\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y), crs=\"EPSG:\" + epsg)\n",
    "\n",
    "\n",
    "gdf['geometry'] = gdf.geometry\n",
    "\n",
    "gdf['class_nb'] = gdf['class_nb'].astype('int16')\n",
    "\n",
    "\n",
    "gdf = gdf[['class_nb','geometry']]\n",
    "\n",
    "print(gdf.groupby(['class_nb']).size())\n",
    "\n",
    "gdf.to_file(randomly_selected_points_shp)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}