Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
Federated Learning
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
CHAUDHARY Akshat
Federated Learning
Commits
998bf353
Commit
998bf353
authored
1 year ago
by
CHAUDHARY Akshat
Browse files
Options
Downloads
Patches
Plain Diff
all functions have been added
parent
531494af
Branches
main
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
partitions.py
+168
-14
168 additions, 14 deletions
partitions.py
with
168 additions
and
14 deletions
partitions.py
+
168
−
14
View file @
998bf353
from
typing
import
Tuple
from
dataset.data
import
Dataset
from
dataset.nfv2
import
load_data
import
math
import
numpy
as
np
from
sklearn.utils
import
shuffle
import
matplotlib.pyplot
as
plt
SEED
=
1138
#Loading various Datasets
cicids
:
Tuple
[
Dataset
,
Dataset
]
=
load_data
(
"
data/sampled/cicids_sampled.csv.gz
"
,
seed
=
SEED
,
n_partition
=
1
,
only_benign
=
False
)
botiot
:
Tuple
[
Dataset
,
Dataset
]
=
load_data
(
"
data/sampled/botiot_sampled.csv.gz
"
,
seed
=
SEED
,
n_partition
=
1
,
only_benign
=
False
)
nb15
:
Tuple
[
Dataset
,
Dataset
]
=
load_data
(
"
data/sampled/nb15_sampled.csv.gz
"
,
seed
=
SEED
,
n_partition
=
1
,
only_benign
=
False
)
toniot
:
Tuple
[
Dataset
,
Dataset
]
=
load_data
(
"
data/sampled/toniot_sampled.csv.gz
"
,
seed
=
SEED
,
n_partition
=
1
,
only_benign
=
False
)
cicids_train
,
cicids_test
=
cicids
botiot_train
,
botiot_test
=
botiot
nb15_train
,
nb15_test
=
nb15
toniot_train
,
toniot_test
=
toniot
import
random
...
...
@@ -116,7 +104,7 @@ def partition_3(dataset_train):
return
partitions
#Partition 4 : One Client per dataset
#Partition 4 : One Client per dataset
: All labels
def
partition_4
(
datasets
):
partitions
=
[]
...
...
@@ -142,6 +130,172 @@ def partition_4(datasets):
return
partitions
#Partition 5 : One client per dataset : Some labels : `NUM_REMOVED` labels randomly removed from each dataset
#By default NUM_REMOVED = 2 , but be careful it should not exceed the total number of classes of any dataset!
def
partition_5
(
datasets
,
NUM_REMOVED
=
2
):
partitions
=
[]
for
i
,
dataset
in
enumerate
(
datasets
):
X
=
dataset
[
0
]
y
=
dataset
[
1
]
m
=
dataset
[
2
][
"
Attack
"
]
# Randomly select two classes to remove
classes_to_remove
=
random
.
sample
(
m
.
unique
().
tolist
(),
NUM_REMOVED
)
# Filter the dataset to remove the selected classes
mask
=
~
m
.
isin
(
classes_to_remove
)
X_filtered
=
X
[
mask
]
y_filtered
=
y
[
mask
]
m_filtered
=
m
[
mask
]
partition
=
(
X_filtered
,
y_filtered
,
m_filtered
)
partitions
.
append
(
partition
)
# Plot the partition
label_counts
=
m_filtered
.
value_counts
()
plt
.
figure
(
figsize
=
(
6
,
4
))
plt
.
bar
(
label_counts
.
index
,
label_counts
.
values
)
plt
.
title
(
f
"
Partition
{
i
+
1
}
"
)
plt
.
xlabel
(
"
Attack Label
"
)
plt
.
ylabel
(
"
Count
"
)
plt
.
xticks
(
rotation
=
90
)
plt
.
show
()
return
partitions
#Partition 6 : Multiple client per dataset : 1 label per client within each dataset
def
partition_6
(
datasets
):
partitions
=
[]
unique_labels
=
set
()
# Collect unique labels from all datasets
for
dataset
in
datasets
:
m
=
dataset
[
2
][
"
Attack
"
]
unique_labels
.
update
(
m
.
unique
())
# Create partitions with unique labels
partition_number
=
1
for
label
in
unique_labels
:
for
i
,
dataset
in
enumerate
(
datasets
):
X
=
dataset
[
0
]
y
=
dataset
[
1
]
m
=
dataset
[
2
][
"
Attack
"
]
mask
=
m
==
label
if
mask
.
sum
()
>
0
:
X_filtered
=
X
[
mask
]
y_filtered
=
y
[
mask
]
m_filtered
=
m
[
mask
]
partition
=
(
X_filtered
,
y_filtered
,
m_filtered
)
partitions
.
append
(
partition
)
# Plot the partition
label_counts
=
m_filtered
.
value_counts
()
plt
.
figure
(
figsize
=
(
6
,
4
))
plt
.
bar
(
label_counts
.
index
,
label_counts
.
values
)
plt
.
title
(
f
"
Partition
{
partition_number
}
:
{
label
}
"
)
plt
.
xlabel
(
"
Attack Label
"
)
plt
.
ylabel
(
"
Count
"
)
plt
.
xticks
(
rotation
=
90
)
plt
.
show
()
partition_number
+=
1
return
partitions
#Partition 7 : Multiple client per dataset : Each client has all the labels of the dataset
def
partition_7
(
datasets
,
NUM_CLIENTS
=
10
):
partitions
=
[]
for
dataset_idx
,
dataset
in
enumerate
(
datasets
):
X
=
dataset
[
0
]
y
=
dataset
[
1
]
m
=
dataset
[
2
][
"
Attack
"
]
# Split the dataset into NUM_CLIENTS parts
partition_size
=
math
.
floor
(
len
(
X
)
/
NUM_CLIENTS
)
dataset_partitions
=
[]
for
i
in
range
(
NUM_CLIENTS
):
idx_from
,
idx_to
=
i
*
partition_size
,
(
i
+
1
)
*
partition_size
X_part
=
X
[
idx_from
:
idx_to
]
y_part
=
y
[
idx_from
:
idx_to
]
m_part
=
m
[
idx_from
:
idx_to
]
dataset_partitions
.
append
((
X_part
,
y_part
,
m_part
))
partitions
.
extend
(
dataset_partitions
)
# Plot the distribution of dataset partitions
fig
,
axes
=
plt
.
subplots
(
1
,
NUM_CLIENTS
,
figsize
=
(
15
,
5
))
for
i
,
ax
in
enumerate
(
axes
):
ax
.
set_title
(
f
"
Dataset
{
dataset_idx
+
1
}
, Partition
{
i
+
1
}
"
)
ax
.
set_ylabel
(
"
Count
"
)
ax
.
set_xlabel
(
"
Attack Label
"
)
ax
.
tick_params
(
axis
=
'
x
'
,
rotation
=
90
)
ax
.
bar
(
dataset_partitions
[
i
][
2
].
value_counts
().
index
,
dataset_partitions
[
i
][
2
].
value_counts
())
plt
.
show
()
return
partitions
#Partition 8 : Multiple client per dataset : Each client has {TOTAL-NUM_REMOVED_CLASSES} number of labels of that dataset
def
partition_8
(
dataset_train
,
NUM_REMOVED_CLASSES
,
NUM_CLIENTS
=
10
):
partitions
=
[]
for
dataset_idx
,
dataset
in
enumerate
(
dataset_train
):
X
=
dataset
[
0
]
y
=
dataset
[
1
]
m
=
dataset
[
2
][
"
Attack
"
]
# Shuffle the dataset
X
,
y
,
m
=
shuffle
(
X
,
y
,
m
)
# Split the data into NUM_CLIENTS parts
partition_size
=
math
.
floor
(
len
(
X
)
/
NUM_CLIENTS
)
dataset_partitions
=
[]
for
i
in
range
(
NUM_CLIENTS
):
idx_from
,
idx_to
=
i
*
partition_size
,
(
i
+
1
)
*
partition_size
X_part
=
X
[
idx_from
:
idx_to
]
y_part
=
y
[
idx_from
:
idx_to
]
m_part
=
m
[
idx_from
:
idx_to
]
# Randomly remove NUM_REMOVED_CLASSES from each partition
unique_labels
=
m_part
.
unique
()
classes_to_remove
=
np
.
random
.
choice
(
unique_labels
,
NUM_REMOVED_CLASSES
,
replace
=
False
)
mask
=
~
m_part
.
isin
(
classes_to_remove
)
X_part
=
X_part
[
mask
]
y_part
=
y_part
[
mask
]
m_part
=
m_part
[
mask
]
dataset_partitions
.
append
((
X_part
,
y_part
,
m_part
))
partitions
.
extend
(
dataset_partitions
)
# Plot the distribution of dataset partitions
fig
,
axes
=
plt
.
subplots
(
1
,
NUM_CLIENTS
,
figsize
=
(
15
,
5
))
for
i
,
ax
in
enumerate
(
axes
):
ax
.
set_title
(
f
"
Dataset
{
dataset_idx
+
1
}
, Partition
{
i
+
1
}
"
)
ax
.
set_ylabel
(
"
Count
"
)
ax
.
set_xlabel
(
"
Attack Label
"
)
ax
.
tick_params
(
axis
=
'
x
'
,
rotation
=
90
)
ax
.
bar
(
dataset_partitions
[
i
][
2
].
value_counts
().
index
,
dataset_partitions
[
i
][
2
].
value_counts
())
plt
.
show
()
return
partitions
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment