Extract random points
This notebook allows to randomly select a certain number of samples (points) from a categorical raster.
[15]:
import random
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
from pathlib import Path
Set directory
[6]:
computer_path = '/export/miro/ndeffense/LBRAT2104/'
grp_nb = '99'
data_path = f'{computer_path}data/' # Directory with data shared by the assistant
work_path = f'{computer_path}STUDENTS/GROUP_{grp_nb}/TP/' # Directory for all work files
# Input directory
land_cover_path = f'{work_path}LAND_COVER/'
# Output directory
points_path = f'{work_path}SAMPLES_POINTS/'
Path(points_path).mkdir(parents=True, exist_ok=True)
Set filenames
[33]:
categorical_tif = f'{land_cover_path}Corine_Land_Cover_32631_ROI.tif'
randomly_selected_points_shp = f'{land_cover_path}Corine_Land_Cover_selected_points_ROI.shp'
Set parameters
[32]:
no_data = 999
epsg = '32631' # Projection of the categorical GeoTIFF
# Select a different number of points per class
classes_list = [1,2]
nb_points_per_class_list = [50,75]
# Select the same number of points for every classes
classes_list = []
np_points_for_all_classes = 10
1. Build dataframe with the number of points to select for each class
[23]:
if not classes_list:
src = rasterio.open(categorical_tif)
im_arr = src.read(1)
src.close()
classes_list = np.unique(im_arr[im_arr != no_data])
nb_points_per_class_list = [np_points_for_all_classes] * len(classes_list)
# Create a dictionary from the two lists
data = {'Class': classes_list, 'Nb points': nb_points_per_class_list}
# Create a DataFrame from the dictionary
point_df = pd.DataFrame(data)
point_df
[23]:
Class | Nb points | |
---|---|---|
0 | 1 | 10 |
1 | 2 | 10 |
2 | 3 | 10 |
3 | 4 | 10 |
4 | 7 | 10 |
5 | 10 | 10 |
6 | 11 | 10 |
7 | 12 | 10 |
8 | 16 | 10 |
9 | 20 | 10 |
10 | 21 | 10 |
11 | 23 | 10 |
12 | 25 | 10 |
13 | 40 | 10 |
2. Select random points/pixels in each class
[43]:
src = rasterio.open(categorical_tif)
im_arr = src.read(1)
src.close()
bounds = src.bounds
transform = src.transform
upper_left_x = transform[2]
upper_left_y = transform[5]
x_size = transform[0]
y_size = transform[4]
df = pd.DataFrame(columns=['x','y','class_nb'])
n = 0
for j in range(0,len(point_df)):
class_nb = point_df.loc[j]['Class']
point_nb = point_df.loc[j]['Nb points']
(y_index, x_index) = np.nonzero(im_arr == class_nb)
print(f'Class : {class_nb}')
print(f'Pixels total for class {class_nb} : {len(y_index)}')
print(f'Randomly selected points/pixels : {point_nb}')
random.seed(10)
random_pixels_to_add = random.sample(range(0, len(y_index)), point_nb)
for i in random_pixels_to_add:
x = x_index[i] * x_size + upper_left_x + (x_size / 2) #add half the cell size
y = y_index[i] * y_size + upper_left_y + (y_size / 2) #to centre the point
df.loc[n] = [x,y,class_nb]
n += 1
print('---------------------')
Class : 1
Pixels total for class 1 : 82
Randomly selected points/pixels : 10
---------------------
Class : 2
Pixels total for class 2 : 2079
Randomly selected points/pixels : 10
---------------------
Class : 3
Pixels total for class 3 : 241
Randomly selected points/pixels : 10
---------------------
Class : 4
Pixels total for class 4 : 90
Randomly selected points/pixels : 10
---------------------
Class : 7
Pixels total for class 7 : 70
Randomly selected points/pixels : 10
---------------------
Class : 10
Pixels total for class 10 : 40
Randomly selected points/pixels : 10
---------------------
Class : 11
Pixels total for class 11 : 45
Randomly selected points/pixels : 10
---------------------
Class : 12
Pixels total for class 12 : 880
Randomly selected points/pixels : 10
---------------------
Class : 16
Pixels total for class 16 : 22
Randomly selected points/pixels : 10
---------------------
Class : 20
Pixels total for class 20 : 327
Randomly selected points/pixels : 10
---------------------
Class : 21
Pixels total for class 21 : 327
Randomly selected points/pixels : 10
---------------------
Class : 23
Pixels total for class 23 : 188
Randomly selected points/pixels : 10
---------------------
Class : 25
Pixels total for class 25 : 270
Randomly selected points/pixels : 10
---------------------
Class : 40
Pixels total for class 40 : 71
Randomly selected points/pixels : 10
---------------------
3. Save selected points to shapefile
[42]:
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y), crs="EPSG:" + epsg)
gdf['geometry'] = gdf.geometry
gdf['class_nb'] = gdf['class_nb'].astype('int16')
gdf = gdf[['class_nb','geometry']]
print(gdf.groupby(['class_nb']).size())
gdf.to_file(randomly_selected_points_shp)
class_nb
1 10
2 10
3 10
4 10
7 10
10 10
11 10
12 10
16 10
20 10
21 10
23 10
25 10
40 10
dtype: int64