{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Extract random points\n", "\n", "This notebook allows to randomly select a certain number of samples (points) from a categorical raster." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "import random\n", "import numpy as np\n", "import pandas as pd\n", "import geopandas as gpd\n", "import rasterio\n", "from pathlib import Path" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "**Set directory**" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "computer_path = '/export/miro/ndeffense/LBRAT2104/'\n", "grp_nb = '99'\n", "\n", "data_path = f'{computer_path}data/' # Directory with data shared by the assistant\n", "work_path = f'{computer_path}STUDENTS/GROUP_{grp_nb}/TP/' # Directory for all work files\n", "\n", "\n", "# Input directory\n", "land_cover_path = f'{work_path}LAND_COVER/'\n", "\n", "# Output directory\n", "points_path = f'{work_path}SAMPLES_POINTS/'\n", "\n", "Path(points_path).mkdir(parents=True, exist_ok=True)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "**Set filenames**" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "categorical_tif = f'{land_cover_path}Corine_Land_Cover_32631_ROI.tif'\n", "\n", "randomly_selected_points_shp = f'{land_cover_path}Corine_Land_Cover_selected_points_ROI.shp'" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "**Set parameters**" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "no_data = 999\n", "\n", "epsg = '32631' # Projection of the categorical GeoTIFF\n", "\n", "# Select a different number of points per class\n", "classes_list = [1,2]\n", "nb_points_per_class_list = [50,75]\n", "\n", "# Select the same number of points for every classes\n", "classes_list = []\n", "np_points_for_all_classes = 10" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Build dataframe with the number of points to select for each class" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ClassNb points
0110
1210
2310
3410
4710
51010
61110
71210
81610
92010
102110
112310
122510
134010
\n", "
" ], "text/plain": [ " Class Nb points\n", "0 1 10\n", "1 2 10\n", "2 3 10\n", "3 4 10\n", "4 7 10\n", "5 10 10\n", "6 11 10\n", "7 12 10\n", "8 16 10\n", "9 20 10\n", "10 21 10\n", "11 23 10\n", "12 25 10\n", "13 40 10" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "if not classes_list:\n", " src = rasterio.open(categorical_tif)\n", " im_arr = src.read(1)\n", " src.close()\n", " \n", " classes_list = np.unique(im_arr[im_arr != no_data])\n", " nb_points_per_class_list = [np_points_for_all_classes] * len(classes_list)\n", "\n", "\n", "# Create a dictionary from the two lists\n", "data = {'Class': classes_list, 'Nb points': nb_points_per_class_list}\n", "\n", "# Create a DataFrame from the dictionary\n", "point_df = pd.DataFrame(data)\n", "\n", "point_df" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Select random points/pixels in each class" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Class : 1\n", "Pixels total for class 1 : 82\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 2\n", "Pixels total for class 2 : 2079\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 3\n", "Pixels total for class 3 : 241\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 4\n", "Pixels total for class 4 : 90\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 7\n", "Pixels total for class 7 : 70\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 10\n", "Pixels total for class 10 : 40\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 11\n", "Pixels total for class 11 : 45\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 12\n", "Pixels total for class 12 : 880\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 16\n", "Pixels total for class 16 : 22\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 20\n", "Pixels total for class 20 : 327\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 21\n", "Pixels total for class 21 : 327\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 23\n", "Pixels total for class 23 : 188\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 25\n", "Pixels total for class 25 : 270\n", "Randomly selected points/pixels : 10\n", "---------------------\n", "Class : 40\n", "Pixels total for class 40 : 71\n", "Randomly selected points/pixels : 10\n", "---------------------\n" ] } ], "source": [ "src = rasterio.open(categorical_tif)\n", "im_arr = src.read(1)\n", "src.close()\n", "\n", "bounds = src.bounds\n", "transform = src.transform\n", "\n", "upper_left_x = transform[2]\n", "upper_left_y = transform[5]\n", "x_size = transform[0]\n", "y_size = transform[4]\n", "\n", "\n", "df = pd.DataFrame(columns=['x','y','class_nb'])\n", "\n", "n = 0\n", "\n", "for j in range(0,len(point_df)):\n", "\n", "\tclass_nb = point_df.loc[j]['Class']\n", "\tpoint_nb = point_df.loc[j]['Nb points']\n", "\n", "\t(y_index, x_index) = np.nonzero(im_arr == class_nb)\n", "\n", "\tprint(f'Class : {class_nb}')\n", "\tprint(f'Pixels total for class {class_nb} : {len(y_index)}')\n", "\tprint(f'Randomly selected points/pixels : {point_nb}')\n", "\n", "\n", "\trandom.seed(10)\n", "\trandom_pixels_to_add = random.sample(range(0, len(y_index)), point_nb)\n", "\n", "\tfor i in random_pixels_to_add:\n", "\t\t\n", "\t\tx = x_index[i] * x_size + upper_left_x + (x_size / 2) #add half the cell size\n", "\t\ty = y_index[i] * y_size + upper_left_y + (y_size / 2) #to centre the point\n", "\n", "\t\tdf.loc[n] = [x,y,class_nb]\n", "\n", "\t\tn += 1\n", "\n", "\tprint('---------------------')" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Save selected points to shapefile" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "class_nb\n", "1 10\n", "2 10\n", "3 10\n", "4 10\n", "7 10\n", "10 10\n", "11 10\n", "12 10\n", "16 10\n", "20 10\n", "21 10\n", "23 10\n", "25 10\n", "40 10\n", "dtype: int64\n" ] } ], "source": [ "gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y), crs=\"EPSG:\" + epsg)\n", "\n", "\n", "gdf['geometry'] = gdf.geometry\n", "\n", "gdf['class_nb'] = gdf['class_nb'].astype('int16')\n", "\n", "\n", "gdf = gdf[['class_nb','geometry']]\n", "\n", "print(gdf.groupby(['class_nb']).size())\n", "\n", "gdf.to_file(randomly_selected_points_shp)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }