Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
INT-HACK-Groupe7
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
PC-AP-INT446-N-2023
Hacking Health
INT-HACK-Groupe7
Commits
34ab0bea
Commit
34ab0bea
authored
2 years ago
by
BIRK Renaud
Browse files
Options
Downloads
Patches
Plain Diff
Mise à jour de l'identification et ajout de tests
parent
bbb61ce8
No related branches found
No related tags found
No related merge requests found
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
poc/_identification.py
+32
-16
32 additions, 16 deletions
poc/_identification.py
poc/_models.py
+2
-2
2 additions, 2 deletions
poc/_models.py
poc/_ocr.py
+4
-0
4 additions, 0 deletions
poc/_ocr.py
test.py
+30
-17
30 additions, 17 deletions
test.py
with
68 additions
and
35 deletions
poc/_identification.py
+
32
−
16
View file @
34ab0bea
...
...
@@ -5,8 +5,8 @@ import csv, re
class
Identification
:
"""
Classe de gestion de l
'
identification et du regroupement des données.
:param groups:
La
liste de groupes trouvés à l
'
issue de l
'
OCR
:param csv_path:
Le
chemin vers le fichier CSV contenant les informations
:param groups:
Une
liste de groupes trouvés à l
'
issue de l
'
OCR
:param csv_path:
Un
chemin vers le fichier CSV contenant les informations
sur les marques référencées.
"""
...
...
@@ -15,7 +15,16 @@ class Identification:
self
.
csv_path
=
csv_path
self
.
device
=
Device
()
def
extract_column
(
input_file_path
:
str
,
output_file_path
:
str
,
column_index
:
int
,
distinct
=
True
,
case_insensitive
=
True
):
def
extract_column
(
self
,
input_file_path
:
str
,
output_file_path
:
str
,
column_index
:
int
,
drop_duplicates
=
True
,
case_insensitive
=
True
)
->
None
:
"""
Extrait une colonne d
'
un fichier séparé par des virgules
(comma-separated values)
:param input_file_path: Un chemin vers le fichier à traiter
:param output_file_path: Le chemin où générer le fichier
:param column_index: Indice de la colonne (en partant de 0)
:param drop_duplicates: Enlève les répétitions
:param case_insensitive: Ne respecte pas la casse
"""
added_rows
=
set
()
with
open
(
input_file_path
,
'
r
'
,
encoding
=
'
utf-8
'
)
as
input_csv
,
open
(
output_file_path
,
'
w
'
,
encoding
=
'
utf-8
'
,
newline
=
''
)
as
output_csv
:
...
...
@@ -28,7 +37,7 @@ class Identification:
for
row
in
reader
:
value
=
row
[
column_index
].
lower
()
if
case_insensitive
else
row
[
column_index
]
if
d
istinct
:
if
d
rop_duplicates
:
if
value
not
in
added_rows
:
writer
.
writerow
([
value
])
added_rows
.
add
(
value
)
...
...
@@ -37,16 +46,13 @@ class Identification:
def
value_in_column
(
self
,
file_path
:
str
,
value
:
str
,
column_index
=
0
)
->
bool
:
"""
Vérifie si une valeur apparaît dans une colonne particulière d
'
un fichier CSV.
La casse n
'
est pas prise en compte.
Args:
file_path (str): Le chemin du fichier CSV.
column_index (int): L
'
indice de la colonne à vérifier (commençant à 0).
value (str): La valeur à rechercher dans la colonne.
Returns:
bool: True si la valeur apparaît dans la colonne, False sinon.
Vérifie si une valeur fait partie d
'
une colonne d
'
un fichier séparé par
des virgules (comma-separated values)
:param input_file_path: Un chemin vers le fichier à traiter
:param output_file_path: Le chemin où générer le fichier
:param column_index: Indice de la colonne (en partant de 0)
:param drop_duplicates: Enlève les répétitions
:param case_insensitive: Ne respecte pas la casse
"""
with
open
(
file_path
,
'
r
'
,
encoding
=
'
utf-8
'
)
as
file
:
reader
=
csv
.
reader
(
file
)
...
...
@@ -58,10 +64,20 @@ class Identification:
return
True
return
False
def
is_brand_name
(
self
,
word
:
str
):
def
is_brand_name
(
self
,
word
:
str
)
->
bool
:
"""
Renvoie `True` si le mot passé en entrée correspond à une marque
présente dans la base de données des marques connues d
'
Oxyledger.
:word: Un mot à chercher
"""
return
self
.
value_in_column
(
self
.
csv_path
+
"
/device_model_extracted_brand_name.csv
"
,
word
)
def
is_manufacturer_name
(
self
,
word
:
str
):
def
is_manufacturer_name
(
self
,
word
:
str
)
->
bool
:
"""
Renvoie `True` si le mot passé en entrée correspond à un fabricant
présent dans la base de données des fabricants connus d
'
Oxyledger.
:word: Un mot à chercher
"""
return
self
.
value_in_column
(
self
.
csv_path
+
"
/device_model_extracted_manufacturer_name.csv
"
,
word
)
def
est_code_barre
(
self
,
chaine
:
str
,
prefix
=
""
)
->
bool
:
...
...
This diff is collapsed.
Click to expand it.
poc/_models.py
+
2
−
2
View file @
34ab0bea
...
...
@@ -30,11 +30,11 @@ class Device:
:param useful_groups: La liste de groupes à trier
"""
def
__init__
(
self
,
manufacturer_name
=
""
,
brand_name
=
""
,
description
=
""
,
uid
=
""
,
ref
=
""
,
useful_groups
:
List
[
Group
]
=
list
())
->
None
:
def
__init__
(
self
,
uid
=
""
,
manufacturer_name
=
""
,
brand_name
=
""
,
description
=
""
,
ref
=
""
,
useful_groups
:
List
[
Group
]
=
list
())
->
None
:
self
.
uid
=
uid
self
.
manufacturer_name
=
manufacturer_name
self
.
brand_name
=
brand_name
self
.
description
=
description
self
.
uid
=
uid
self
.
ref
=
ref
self
.
useful_groups
=
useful_groups
...
...
This diff is collapsed.
Click to expand it.
poc/_ocr.py
+
4
−
0
View file @
34ab0bea
...
...
@@ -44,6 +44,10 @@ class OCR:
def
save_ocr
(
self
,
groups
:
List
[
Group
],
img_path
:
str
,
output_img_path
:
str
,
font_path
:
str
)
->
Image
:
"""
Génère une image d
'
illustration des résultats et la renvoie.
:param groups: Une liste de groupes trouvés
:param img_path: Un chemin vers l
'
image à analyser
:param output_img_path: Un chemin vers l
'
image de sortie
:param font_path: Un chemin vers une police d
'
écriture (au format TrueType Font)
"""
image
=
Image
.
open
(
img_path
).
convert
(
'
RGB
'
)
boxes
=
[
line
.
box
for
line
in
groups
]
...
...
This diff is collapsed.
Click to expand it.
test.py
+
30
−
17
View file @
34ab0bea
...
...
@@ -14,6 +14,9 @@ def main():
# 1. Phase de reconnaissance optique de caractères
ocr
=
OCR
(
use_gpu
=
True
)
groups
=
ocr
.
recognize
(
TEST_IMAGES_DIR
+
"
/biomet.jpg
"
)
file
=
open
(
OUTPUT_DIR
+
"
/biomet_export_step1.txt
"
,
"
w
"
)
file
.
write
(
str
(
groups
))
file
.
close
()
# 1 bis. Génération des images
ocr
.
save_ocr
(
groups
,
TEST_IMAGES_DIR
+
"
/biomet.jpg
"
,
OUTPUT_DIR
+
"
/biomet_export_step1.jpg
"
,
FONT_DIR
+
"
/
"
+
FONT_FILENAME
)
...
...
@@ -21,28 +24,38 @@ def main():
# 2. Identification du dispositif
identification
=
Identification
(
groups
)
device
=
identification
.
identify
()
print
(
device
)
output_file
=
open
(
OUTPUT_DIR
+
"
/biomet_export_step2.txt
"
,
"
w
"
)
output_file
.
write
(
str
(
device
))
output_file
.
close
()
# 2 bis. Génération des images
ocr
.
save_ocr
(
device
.
useful_groups
,
TEST_IMAGES_DIR
+
"
/biomet.jpg
"
,
OUTPUT_DIR
+
"
/biomet_export_step2.jpg
"
,
FONT_DIR
+
"
/
"
+
FONT_FILENAME
)
# 3. Interface graphique
# groups2 = ocr.recognize(TEST_IMAGES_DIR + "/stryker.jpg")
# ocr.save_ocr(groups2, TEST_IMAGES_DIR + "/stryker.jpg", OUTPUT_DIR + "/stryker_export_step1.jpg", FONT_DIR + "/" + FONT_FILENAME)
# identification2 = Identification(groups2)
# device2, newgroups2 = identification2.identify()
# print(device2)
# ocr.save_ocr(newgroups2, TEST_IMAGES_DIR + "/stryker.jpg", OUTPUT_DIR + "/stryker_export_step2.jpg", FONT_DIR + "/" + FONT_FILENAME)
# groups3 = ocr.recognize(TEST_IMAGES_DIR + "/passeo-18.jpg")
# ocr.save_ocr(groups3, TEST_IMAGES_DIR + "/passeo-18.jpg", OUTPUT_DIR + "/passeo-18_export_step1.jpg", FONT_DIR + "/" + FONT_FILENAME)
# identification3 = Identification(groups3)
# device3, newgroups3 = identification3.identify()
# print(device3)
# ocr.save_ocr(newgroups3, TEST_IMAGES_DIR + "/passeo-18.jpg", OUTPUT_DIR + "/passeo-18_export_step2.jpg", FONT_DIR + "/" + FONT_FILENAME)
# Autres exemples
groups2
=
ocr
.
recognize
(
TEST_IMAGES_DIR
+
"
/passeo-18.jpg
"
)
ocr
.
save_ocr
(
groups2
,
TEST_IMAGES_DIR
+
"
/passeo-18.jpg
"
,
OUTPUT_DIR
+
"
/passeo-18_export_step1.jpg
"
,
FONT_DIR
+
"
/
"
+
FONT_FILENAME
)
file2
=
open
(
OUTPUT_DIR
+
"
/passeo-18_export_step1.txt
"
,
"
w
"
)
file2
.
write
(
str
(
groups2
))
file2
.
close
()
identification2
=
Identification
(
groups2
)
device2
=
identification2
.
identify
()
output_file2
=
open
(
OUTPUT_DIR
+
"
/passeo-18_export_step2.txt
"
,
"
w
"
)
output_file2
.
write
(
str
(
device2
))
output_file2
.
close
()
ocr
.
save_ocr
(
device2
.
useful_groups
,
TEST_IMAGES_DIR
+
"
/passeo-18.jpg
"
,
OUTPUT_DIR
+
"
/passeo-18_export_step2.jpg
"
,
FONT_DIR
+
"
/
"
+
FONT_FILENAME
)
groups3
=
ocr
.
recognize
(
TEST_IMAGES_DIR
+
"
/stryker.jpg
"
)
ocr
.
save_ocr
(
groups3
,
TEST_IMAGES_DIR
+
"
/stryker.jpg
"
,
OUTPUT_DIR
+
"
/stryker_export_step1.jpg
"
,
FONT_DIR
+
"
/
"
+
FONT_FILENAME
)
file3
=
open
(
OUTPUT_DIR
+
"
/stryker_export_step1.txt
"
,
"
w
"
)
file3
.
write
(
str
(
groups3
))
file3
.
close
()
identification3
=
Identification
(
groups3
)
device3
=
identification3
.
identify
()
output_file3
=
open
(
OUTPUT_DIR
+
"
/stryker_export_step2.txt
"
,
"
w
"
)
output_file3
.
write
(
str
(
device3
))
output_file3
.
close
()
ocr
.
save_ocr
(
device3
.
useful_groups
,
TEST_IMAGES_DIR
+
"
/stryker.jpg
"
,
OUTPUT_DIR
+
"
/stryker_export_step2.jpg
"
,
FONT_DIR
+
"
/
"
+
FONT_FILENAME
)
if
__name__
==
'
__main__
'
:
#Identification.extract_column("data/csv/device_model.csv", "data/csv/device_model_extracted_brand_name.csv", 6)
#Identification.extract_column("data/csv/device_model.csv", "data/csv/device_model_extracted_manufacturer_name.csv", 8)
main
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment