Extract random points

This notebook allows to randomly select a certain number of samples (points) from a categorical raster.

[15]:
import random
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
from pathlib import Path

Set directory

[6]:
computer_path = '/export/miro/ndeffense/LBRAT2104/'
grp_nb        = '99'

data_path = f'{computer_path}data/'                        # Directory with data shared by the assistant
work_path = f'{computer_path}STUDENTS/GROUP_{grp_nb}/TP/'  # Directory for all work files


# Input directory
land_cover_path = f'{work_path}LAND_COVER/'

# Output directory
points_path = f'{work_path}SAMPLES_POINTS/'

Path(points_path).mkdir(parents=True, exist_ok=True)

Set filenames

[33]:
categorical_tif = f'{land_cover_path}Corine_Land_Cover_32631_ROI.tif'

randomly_selected_points_shp =  f'{land_cover_path}Corine_Land_Cover_selected_points_ROI.shp'

Set parameters

[32]:
no_data = 999

epsg = '32631'  # Projection of the categorical GeoTIFF

# Select a different number of points per class
classes_list = [1,2]
nb_points_per_class_list = [50,75]

# Select the same number of points for every classes
classes_list = []
np_points_for_all_classes = 10

1. Build dataframe with the number of points to select for each class

[23]:
if not classes_list:
    src = rasterio.open(categorical_tif)
    im_arr = src.read(1)
    src.close()

    classes_list = np.unique(im_arr[im_arr != no_data])
    nb_points_per_class_list = [np_points_for_all_classes] * len(classes_list)


# Create a dictionary from the two lists
data = {'Class': classes_list, 'Nb points': nb_points_per_class_list}

# Create a DataFrame from the dictionary
point_df = pd.DataFrame(data)

point_df
[23]:
Class Nb points
0 1 10
1 2 10
2 3 10
3 4 10
4 7 10
5 10 10
6 11 10
7 12 10
8 16 10
9 20 10
10 21 10
11 23 10
12 25 10
13 40 10

2. Select random points/pixels in each class

[43]:
src = rasterio.open(categorical_tif)
im_arr = src.read(1)
src.close()

bounds    = src.bounds
transform = src.transform

upper_left_x = transform[2]
upper_left_y = transform[5]
x_size       = transform[0]
y_size       = transform[4]


df = pd.DataFrame(columns=['x','y','class_nb'])

n = 0

for j in range(0,len(point_df)):

    class_nb = point_df.loc[j]['Class']
    point_nb = point_df.loc[j]['Nb points']

    (y_index, x_index) = np.nonzero(im_arr == class_nb)

    print(f'Class : {class_nb}')
    print(f'Pixels total for class {class_nb} : {len(y_index)}')
    print(f'Randomly selected points/pixels : {point_nb}')


    random.seed(10)
    random_pixels_to_add = random.sample(range(0, len(y_index)), point_nb)

    for i in random_pixels_to_add:

            x = x_index[i] * x_size + upper_left_x + (x_size / 2) #add half the cell size
            y = y_index[i] * y_size + upper_left_y + (y_size / 2) #to centre the point

            df.loc[n] = [x,y,class_nb]

            n += 1

    print('---------------------')
Class : 1
Pixels total for class 1 : 82
Randomly selected points/pixels : 10
---------------------
Class : 2
Pixels total for class 2 : 2079
Randomly selected points/pixels : 10
---------------------
Class : 3
Pixels total for class 3 : 241
Randomly selected points/pixels : 10
---------------------
Class : 4
Pixels total for class 4 : 90
Randomly selected points/pixels : 10
---------------------
Class : 7
Pixels total for class 7 : 70
Randomly selected points/pixels : 10
---------------------
Class : 10
Pixels total for class 10 : 40
Randomly selected points/pixels : 10
---------------------
Class : 11
Pixels total for class 11 : 45
Randomly selected points/pixels : 10
---------------------
Class : 12
Pixels total for class 12 : 880
Randomly selected points/pixels : 10
---------------------
Class : 16
Pixels total for class 16 : 22
Randomly selected points/pixels : 10
---------------------
Class : 20
Pixels total for class 20 : 327
Randomly selected points/pixels : 10
---------------------
Class : 21
Pixels total for class 21 : 327
Randomly selected points/pixels : 10
---------------------
Class : 23
Pixels total for class 23 : 188
Randomly selected points/pixels : 10
---------------------
Class : 25
Pixels total for class 25 : 270
Randomly selected points/pixels : 10
---------------------
Class : 40
Pixels total for class 40 : 71
Randomly selected points/pixels : 10
---------------------

3. Save selected points to shapefile

[42]:
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y), crs="EPSG:" + epsg)


gdf['geometry'] = gdf.geometry

gdf['class_nb'] = gdf['class_nb'].astype('int16')


gdf = gdf[['class_nb','geometry']]

print(gdf.groupby(['class_nb']).size())

gdf.to_file(randomly_selected_points_shp)
class_nb
1     10
2     10
3     10
4     10
7     10
10    10
11    10
12    10
16    10
20    10
21    10
23    10
25    10
40    10
dtype: int64