In situ data preparation
[1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
from rasterio import features
import rasterio.plot
from IPython.display import display
import glob
import plotly.express as px
import plotly.offline
plotly.offline.init_notebook_mode()
from pathlib import Path
print('All libraries successfully imported!')
print(f'Pandas : {pd.__version__}')
print(f'GeoPandas : {gpd.__version__}')
All libraries successfully imported!
Pandas : 1.5.2
GeoPandas : 0.12.2
Set directory
[2]:
computer_path = 'X:/'
grp_nb = '99'
# Directory for all work files
work_path = f'{computer_path}STUDENTS/GROUP_{grp_nb}/'
data_path = f'{work_path}data/'
reflectance_path = f'{work_path}3_L2A_MASKED/'
in_situ_path = f'{data_path}A_IN_SITU/'
in_situ_prepro_path = f'{work_path}B_IN_SITU_PREPRO/'
Path(in_situ_prepro_path).mkdir(parents=True, exist_ok=True)
Set parameters
[3]:
site = 'NAMUR'
year = '2020'
no_data = -999
field_id = 'id'
field_classif_code = 'grp_1_nb'
field_classif_name = 'grp_1'
buffer_size = -10
pix_min = 3
pix_ratio_min = 0.0002
poly_min = 4
Set filenames
[4]:
# Input file
in_situ_shp = f'{in_situ_path}{site}_{year}_IN_SITU_ROI.shp'
# Output files
in_situ_tif = f'{in_situ_prepro_path}{site}_{year}_IN_SITU_ROI_buffer.tif'
in_situ_prepared_shp = f'{in_situ_prepro_path}{site}_{year}_IN_SITU_ROI_prepared.shp'
1. Apply 10m inner buffer to each polygons
[6]:
gdf = gpd.read_file(in_situ_shp)
print(f'The Coordinates Reference System is {gdf.crs} \n')
print(f'There are {len(gdf)} polygons BEFORE applying the {buffer_size}m buffer.')
gdf.geometry = gdf.geometry.buffer(buffer_size)
gdf = gdf[~gdf.geometry.is_empty] # Remove empty geometries
print(f'There are {len(gdf)} polygons AFTER applying the {buffer_size}m buffer.')
display(gdf.head())
The Coordinates Reference System is epsg:32631
There are 733 polygons BEFORE applying the -10m buffer.
There are 628 polygons AFTER applying the -10m buffer.
| id | lc_nb | lc | grp_nb | grp | class_nb | class | sub_nb | sub | grp_1_nb | grp_1 | grp_A_nb | grp_A | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | MULTIPOLYGON (((636113.254 5595481.323, 636113... |
| 1 | 2 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((629775.101 5593157.573, 629778.482 5... |
| 2 | 3 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((636748.981 5595728.374, 636750.028 5... |
| 3 | 4 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | MULTIPOLYGON (((630003.573 5594258.004, 630003... |
| 4 | 5 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((636962.589 5595674.757, 636966.536 5... |
[7]:
fig, ax = plt.subplots(1, 1, figsize=(15,15))
gdf.plot(ax=ax,
column=field_classif_name,
categorical=True,
cmap='tab20',
linewidth=.6,
edgecolor='0.2',
legend=True)
ax.set_title(f'In situ with {buffer_size}m buffer - {site}, {year}',fontsize=20)
plt.box(False)
2. Add polygon area
[8]:
gdf['area'] = gdf.geometry.area.astype(int)
display(gdf.head())
| id | lc_nb | lc | grp_nb | grp | class_nb | class | sub_nb | sub | grp_1_nb | grp_1 | grp_A_nb | grp_A | geometry | area | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | MULTIPOLYGON (((636113.254 5595481.323, 636113... | 0 |
| 1 | 2 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((629775.101 5593157.573, 629778.482 5... | 235 |
| 2 | 3 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((636748.981 5595728.374, 636750.028 5... | 251 |
| 3 | 4 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | MULTIPOLYGON (((630003.573 5594258.004, 630003... | 1735 |
| 4 | 5 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((636962.589 5595674.757, 636966.536 5... | 14331 |
3. Add pixel count
3.1 Select one raster clipped to the extent of the ROI as a template to rasterize in situ polygons
[9]:
img_temp_tif = glob.glob(f'{reflectance_path}*.tif')[0]
print(f'Raster template file : {img_temp_tif}')
Raster template file : /export/miro/ndeffense/LBRAT2104/STUDENTS/GROUP_99/TP/3_L2A_MASKED/T31UFS_20200116T105309_B02_10m_ROI_SCL.tif
3.2 Rasterize in situ polygons
[10]:
# Open the raster file you want to use as a template for rasterize
src = rasterio.open(img_temp_tif, "r")
# Update metadata
out_meta = src.meta
out_meta.update(dtype='int16',
nodata=no_data)
# Burn the features into the raster and write it out
dst = rasterio.open(in_situ_tif, 'w+', **out_meta)
dst_arr = dst.read(1)
# this is where we create a generator of geom, value pairs to use in rasterizing
geom_col = gdf.geometry
code_col = gdf[field_id].astype(int)
shapes = ((geom,value) for geom, value in zip(geom_col, code_col))
in_situ_arr = features.rasterize(shapes=shapes,
fill=no_data,
out=dst_arr,
transform=dst.transform,
all_touched=False) # If false, only pixels whose center is within the polygon will be burned in.
dst.write_band(1, in_situ_arr)
# Close rasterio objects
src.close()
dst.close()
print(f'Rasterize is done : {in_situ_tif}')
Rasterize is done : /export/miro/ndeffense/LBRAT2104/STUDENTS/GROUP_99/TP/B_IN_SITU_PREPRO/NAMUR_2020_IN_SITU_ROI_buffer.tif
[11]:
fig, ax = plt.subplots(1, 1, figsize=(10,10))
rasterio.plot.show(in_situ_arr, ax=ax)
plt.box(False)
3.3 Count number of pixels for each polygons
[12]:
unique, counts = np.unique(in_situ_arr, return_counts=True)
pix_count_df = pd.DataFrame(zip(unique, counts), columns=['id','pix_count'], dtype='int16')
display(pix_count_df)
/tmp/ipykernel_20119/83254786.py:2: FutureWarning:
Values are too large to be losslessly cast to int16. In a future version this will raise OverflowError. To retain the old behavior, use pd.Series(values).astype(int16)
| id | pix_count | |
|---|---|---|
| 0 | -999 | -12677 |
| 1 | 2 | 2 |
| 2 | 4 | 17 |
| 3 | 5 | 146 |
| 4 | 6 | 18 |
| ... | ... | ... |
| 600 | 729 | 77 |
| 601 | 730 | 23 |
| 602 | 731 | 82 |
| 603 | 732 | 47 |
| 604 | 733 | 44 |
605 rows × 2 columns
3.4 Merge pixels count with polygons informations
[13]:
pix_count_gdf = gdf.merge(pix_count_df, on='id', how="left")
display(pix_count_gdf.head())
print(f'There are {len(pix_count_gdf)} polygons')
| id | lc_nb | lc | grp_nb | grp | class_nb | class | sub_nb | sub | grp_1_nb | grp_1 | grp_A_nb | grp_A | geometry | area | pix_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | MULTIPOLYGON (((636113.254 5595481.323, 636113... | 0 | NaN |
| 1 | 2 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((629775.101 5593157.573, 629778.482 5... | 235 | 2.0 |
| 2 | 3 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((636748.981 5595728.374, 636750.028 5... | 251 | NaN |
| 3 | 4 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | MULTIPOLYGON (((630003.573 5594258.004, 630003... | 1735 | 17.0 |
| 4 | 5 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((636962.589 5595674.757, 636966.536 5... | 14331 | 146.0 |
There are 628 polygons
4. Remove unnecessary in situ polygons
4.1 Remove polygons with less than pix_min pixels
[14]:
gdf = pix_count_gdf.loc[pix_count_gdf['pix_count'] >= pix_min]
gdf = gdf.astype({"pix_count": int})
print(f'There are {len(gdf)} polygons with more (or equal) than {pix_min} pixels.')
There are 583 polygons with more (or equal) than 3 pixels.
4.2 Remove polygons from under represented and unwanted classes
Under represented classes mean the classes with :
not enough pixels - less than
pix_ratio_min% of the total number of pixels (default : 0.0002%)not enough polygons - less than
poly_minpolygons (default : 5 polygons)
Unwanted classes mean the classes we don’t want to work with
[15]:
# Get number of pixels per class
pix_per_class_df = gdf.groupby([field_classif_code, field_classif_name])['pix_count'].agg('sum').to_frame().reset_index()
pix_per_class_df = pix_per_class_df.sort_values(by='pix_count', ascending=False)
pix_per_class_df['ratio'] = pix_per_class_df['pix_count'] / pix_per_class_df['pix_count'].sum()
# Get number of polygons per class
poly_per_class_df = gdf.groupby([field_classif_code, field_classif_name])[field_classif_code].agg('count').reset_index(name='poly_count')
poly_per_class_df = poly_per_class_df.sort_values(by='poly_count', ascending=False)
# Merge 2 previous dataframe in a single one
pix_poly_per_class_df = pix_per_class_df.merge(poly_per_class_df, on=[field_classif_code, field_classif_name], how="outer")
display(pix_poly_per_class_df)
| grp_1_nb | grp_1 | pix_count | ratio | poly_count | |
|---|---|---|---|---|---|
| 0 | 1111 | Winter wheat | 29633 | 0.255640 | 74 |
| 1 | 3 | Grassland and meadows | 23889 | 0.206087 | 221 |
| 2 | 1121 | Maize | 8667 | 0.074769 | 36 |
| 3 | 1811 | Sugar beet | 7400 | 0.063839 | 21 |
| 4 | 1152 | Barley six-row | 6659 | 0.057446 | 20 |
| 5 | 1511 | Potatoes | 6439 | 0.055548 | 14 |
| 6 | 1192 | Other cereals | 6117 | 0.052771 | 17 |
| 7 | 0 | Remove | 4068 | 0.035094 | 28 |
| 8 | 1771 | Peas | 3606 | 0.031108 | 10 |
| 9 | 69 | Forest | 3109 | 0.026821 | 35 |
| 10 | 1923 | Flax hemp and other similar crops | 2810 | 0.024241 | 8 |
| 11 | 21 | Fruits trees | 2239 | 0.019316 | 7 |
| 12 | 1435 | Rapeseed | 1826 | 0.015753 | 6 |
| 13 | 81 | Urban | 1795 | 0.015485 | 36 |
| 14 | 1151 | Barley two-row | 1660 | 0.014321 | 3 |
| 15 | 121 | Leafy or stem vegetables | 1351 | 0.011655 | 4 |
| 16 | 1711 | Beans | 835 | 0.007203 | 2 |
| 17 | 1171 | Oats | 776 | 0.006694 | 8 |
| 18 | 1231 | Carrots | 723 | 0.006237 | 2 |
| 19 | 1911 | Alfalfa | 554 | 0.004779 | 7 |
| 20 | 1112 | Spring wheat | 405 | 0.003494 | 3 |
| 21 | 84 | Greenhouses | 252 | 0.002174 | 5 |
| 22 | 1161 | Rye | 230 | 0.001984 | 2 |
| 23 | 9212 | Permanent rivers and streams | 208 | 0.001794 | 4 |
| 24 | 22 | Vineyards | 178 | 0.001536 | 4 |
| 25 | 1115 | Triticale | 149 | 0.001285 | 1 |
| 26 | 1761 | Lupins | 140 | 0.001208 | 2 |
| 27 | 1931 | Medicinal aromatic pesticidal or similar crops | 138 | 0.001191 | 1 |
| 28 | 72 | Bare soils | 61 | 0.000526 | 2 |
[16]:
list_all_classes = set(pix_poly_per_class_df[field_classif_code].values)
list_under_represented_class = set(pix_poly_per_class_df.loc[(pix_poly_per_class_df['ratio'] < pix_ratio_min) | (pix_poly_per_class_df['poly_count'] < poly_min)][field_classif_code].values)
list_unwanted_class = {0, 1992}
list_well_represented_class = set(list_all_classes - list_under_represented_class - list_unwanted_class)
print(f'---> {len(list_well_represented_class)} well-represented classes (>= {pix_ratio_min}% & >= {poly_min} polygons) :\n {sorted(list_well_represented_class)}')
print(f'---> {len(list_under_represented_class)} under-represented classes (< {pix_ratio_min}% & < {poly_min} polygons) :\n {sorted(list_under_represented_class)}')
print(f'---> {len(list_unwanted_class)} unwanted classes :\n {sorted(list_unwanted_class)}')
gdf = gdf.loc[gdf[field_classif_code].isin(list_well_represented_class)]
display(gdf.head())
print(f'There are {len(gdf)} polygons from well-represented classes.')
---> 19 well-represented classes (>= 0.0002% & >= 4 polygons) :
[3, 21, 22, 69, 81, 84, 121, 1111, 1121, 1152, 1171, 1192, 1435, 1511, 1771, 1811, 1911, 1923, 9212]
---> 9 under-represented classes (< 0.0002% & < 4 polygons) :
[72, 1112, 1115, 1151, 1161, 1231, 1711, 1761, 1931]
---> 2 unwanted classes :
[0, 1992]
| id | lc_nb | lc | grp_nb | grp | class_nb | class | sub_nb | sub | grp_1_nb | grp_1 | grp_A_nb | grp_A | geometry | area | pix_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 4 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | MULTIPOLYGON (((630003.573 5594258.004, 630003... | 1735 | 17 |
| 4 | 5 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((636962.589 5595674.757, 636966.536 5... | 14331 | 146 |
| 5 | 6 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((635692.119 5593303.601, 635688.405 5... | 1768 | 18 |
| 6 | 7 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((627911.368 5595749.375, 627942.033 5... | 7828 | 79 |
| 7 | 9 | 3 | Grassland and meadows | 31 | Grassland and meadows | 319 | Grassland and meadows | 3199 | Grassland and meadows | 3 | Grassland and meadows | 3 | Grassland and meadows | POLYGON ((633236.525 5596165.100, 633397.286 5... | 3197 | 39 |
There are 537 polygons from well-represented classes.
Plot histogram
[17]:
df = pix_poly_per_class_df.loc[pix_poly_per_class_df[field_classif_code].isin(list_well_represented_class)]
display(df)
| grp_1_nb | grp_1 | pix_count | ratio | poly_count | |
|---|---|---|---|---|---|
| 0 | 1111 | Winter wheat | 29633 | 0.255640 | 74 |
| 1 | 3 | Grassland and meadows | 23889 | 0.206087 | 221 |
| 2 | 1121 | Maize | 8667 | 0.074769 | 36 |
| 3 | 1811 | Sugar beet | 7400 | 0.063839 | 21 |
| 4 | 1152 | Barley six-row | 6659 | 0.057446 | 20 |
| 5 | 1511 | Potatoes | 6439 | 0.055548 | 14 |
| 6 | 1192 | Other cereals | 6117 | 0.052771 | 17 |
| 8 | 1771 | Peas | 3606 | 0.031108 | 10 |
| 9 | 69 | Forest | 3109 | 0.026821 | 35 |
| 10 | 1923 | Flax hemp and other similar crops | 2810 | 0.024241 | 8 |
| 11 | 21 | Fruits trees | 2239 | 0.019316 | 7 |
| 12 | 1435 | Rapeseed | 1826 | 0.015753 | 6 |
| 13 | 81 | Urban | 1795 | 0.015485 | 36 |
| 15 | 121 | Leafy or stem vegetables | 1351 | 0.011655 | 4 |
| 17 | 1171 | Oats | 776 | 0.006694 | 8 |
| 19 | 1911 | Alfalfa | 554 | 0.004779 | 7 |
| 21 | 84 | Greenhouses | 252 | 0.002174 | 5 |
| 23 | 9212 | Permanent rivers and streams | 208 | 0.001794 | 4 |
| 24 | 22 | Vineyards | 178 | 0.001536 | 4 |
[18]:
fig = px.bar(df,
x=df[field_classif_name],
y=df['pix_count'],
text='poly_count',
labels={'pix_count': 'Number of pixels'})
fig.update_layout(xaxis_title=None)
fig.show()
Write geodataframe into a shapefile
[19]:
gdf.to_file(in_situ_prepared_shp)
print(f'New shapefile has been created : {in_situ_prepared_shp}')
New shapefile has been created : /export/miro/ndeffense/LBRAT2104/STUDENTS/GROUP_99/TP/B_IN_SITU_PREPRO/NAMUR_2020_IN_SITU_ROI_prepared.shp