Mise à jour de l'identification et ajout de tests

34ab0bea · BIRK Renaud · bbb61ce8 · 34ab0bea · 34ab0bea · 34ab0bea
Commit 34ab0bea authored 2 years ago by BIRK Renaud
--- a/poc/_identification.py
+++ b/poc/_identification.py
@@ -5,8 +5,8 @@ import csv, re
 class Identification:
    """
    Classe de gestion de l'identification et du regroupement des données.
-    :param groups: La liste de groupes trouvés à l'issue de l'OCR
-    :param csv_path: Le chemin vers le fichier CSV contenant les informations
+    :param groups: Une liste de groupes trouvés à l'issue de l'OCR
+    :param csv_path: Un chemin vers le fichier CSV contenant les informations
    sur les marques référencées.
    """

@@ -15,7 +15,16 @@ class Identification:
        self.csv_path = csv_path
        self.device = Device()

-    def extract_column(input_file_path: str, output_file_path: str, column_index: int, distinct=True, case_insensitive=True):
+    def extract_column(self, input_file_path: str, output_file_path: str, column_index: int, drop_duplicates=True, case_insensitive=True) -> None:
+        """
+        Extrait une colonne d'un fichier séparé par des virgules
+        (comma-separated values)
+        :param input_file_path: Un chemin vers le fichier à traiter
+        :param output_file_path: Le chemin où générer le fichier
+        :param column_index: Indice de la colonne (en partant de 0)
+        :param drop_duplicates: Enlève les répétitions
+        :param case_insensitive: Ne respecte pas la casse
+        """
        added_rows = set()

        with open(input_file_path, 'r', encoding='utf-8') as input_csv, open(output_file_path, 'w', encoding='utf-8', newline='') as output_csv:
@@ -28,7 +37,7 @@ class Identification:

            for row in reader:
                value = row[column_index].lower() if case_insensitive else row[column_index]
-                if distinct:
+                if drop_duplicates:
                    if value not in added_rows:
                        writer.writerow([value])
                        added_rows.add(value)
@@ -37,16 +46,13 @@ class Identification:
        
    def value_in_column(self, file_path: str, value: str, column_index=0) -> bool:
        """
-        Vérifie si une valeur apparaît dans une colonne particulière d'un fichier CSV.
-        La casse n'est pas prise en compte.
-
-        Args:
-            file_path (str): Le chemin du fichier CSV.
-            column_index (int): L'indice de la colonne à vérifier (commençant à 0).
-            value (str): La valeur à rechercher dans la colonne.
-
-        Returns:
-            bool: True si la valeur apparaît dans la colonne, False sinon.
+        Vérifie si une valeur fait partie d'une colonne d'un fichier séparé par
+        des virgules (comma-separated values)
+        :param input_file_path: Un chemin vers le fichier à traiter
+        :param output_file_path: Le chemin où générer le fichier
+        :param column_index: Indice de la colonne (en partant de 0)
+        :param drop_duplicates: Enlève les répétitions
+        :param case_insensitive: Ne respecte pas la casse
        """
        with open(file_path, 'r', encoding='utf-8') as file:
            reader = csv.reader(file)
@@ -58,10 +64,20 @@ class Identification:
                        return True
        return False
    
-    def is_brand_name(self, word: str):
+    def is_brand_name(self, word: str) -> bool:
+        """
+        Renvoie `True` si le mot passé en entrée correspond à une marque
+        présente dans la base de données des marques connues d'Oxyledger.
+        :word: Un mot à chercher
+        """
        return self.value_in_column(self.csv_path + "/device_model_extracted_brand_name.csv", word)
    
-    def is_manufacturer_name(self, word: str):
+    def is_manufacturer_name(self, word: str) -> bool:
+        """
+        Renvoie `True` si le mot passé en entrée correspond à un fabricant
+        présent dans la base de données des fabricants connus d'Oxyledger.
+        :word: Un mot à chercher
+        """
        return self.value_in_column(self.csv_path + "/device_model_extracted_manufacturer_name.csv", word)
    
    def est_code_barre(self, chaine: str, prefix="") -> bool:

--- a/poc/_models.py
+++ b/poc/_models.py
@@ -30,11 +30,11 @@ class Device:
    :param useful_groups: La liste de groupes à trier
    """

-    def __init__(self, manufacturer_name="", brand_name="", description="", uid="", ref="", useful_groups: List[Group]=list()) -> None:
+    def __init__(self, uid="", manufacturer_name="", brand_name="", description="", ref="", useful_groups: List[Group]=list()) -> None:
+        self.uid = uid
        self.manufacturer_name = manufacturer_name
        self.brand_name = brand_name
        self.description = description
-        self.uid = uid
        self.ref = ref
        self.useful_groups = useful_groups
    

--- a/poc/_ocr.py
+++ b/poc/_ocr.py
@@ -44,6 +44,10 @@ class OCR:
    def save_ocr(self, groups: List[Group], img_path: str, output_img_path: str, font_path: str) -> Image:
        """
        Génère une image d'illustration des résultats et la renvoie.
+        :param groups: Une liste de groupes trouvés
+        :param img_path: Un chemin vers l'image à analyser
+        :param output_img_path: Un chemin vers l'image de sortie
+        :param font_path: Un chemin vers une police d'écriture (au format TrueType Font)
        """
        image = Image.open(img_path).convert('RGB')
        boxes = [line.box for line in groups]

--- a/test.py
+++ b/test.py
@@ -14,6 +14,9 @@ def main():
    # 1. Phase de reconnaissance optique de caractères
    ocr = OCR(use_gpu=True)
    groups = ocr.recognize(TEST_IMAGES_DIR + "/biomet.jpg")
+    file = open(OUTPUT_DIR + "/biomet_export_step1.txt", "w") 
+    file.write(str(groups))
+    file.close()

    # 1 bis. Génération des images
    ocr.save_ocr(groups, TEST_IMAGES_DIR + "/biomet.jpg", OUTPUT_DIR + "/biomet_export_step1.jpg", FONT_DIR + "/" + FONT_FILENAME)
@@ -21,28 +24,38 @@ def main():
    # 2. Identification du dispositif
    identification = Identification(groups)
    device = identification.identify()
-    print(device)
+    output_file = open(OUTPUT_DIR + "/biomet_export_step2.txt", "w") 
+    output_file.write(str(device))
+    output_file.close()

    # 2 bis. Génération des images
    ocr.save_ocr(device.useful_groups, TEST_IMAGES_DIR + "/biomet.jpg", OUTPUT_DIR + "/biomet_export_step2.jpg", FONT_DIR + "/" + FONT_FILENAME)

-    # 3. Interface graphique
-
-    # groups2 = ocr.recognize(TEST_IMAGES_DIR + "/stryker.jpg")
-    # ocr.save_ocr(groups2, TEST_IMAGES_DIR + "/stryker.jpg", OUTPUT_DIR + "/stryker_export_step1.jpg", FONT_DIR + "/" + FONT_FILENAME)
-    # identification2 = Identification(groups2)
-    # device2, newgroups2 = identification2.identify()
-    # print(device2)
-    # ocr.save_ocr(newgroups2, TEST_IMAGES_DIR + "/stryker.jpg", OUTPUT_DIR + "/stryker_export_step2.jpg", FONT_DIR + "/" + FONT_FILENAME)
-    
-    # groups3 = ocr.recognize(TEST_IMAGES_DIR + "/passeo-18.jpg")
-    # ocr.save_ocr(groups3, TEST_IMAGES_DIR + "/passeo-18.jpg", OUTPUT_DIR + "/passeo-18_export_step1.jpg", FONT_DIR + "/" + FONT_FILENAME)
-    # identification3 = Identification(groups3)
-    # device3, newgroups3 = identification3.identify()
-    # print(device3)
-    # ocr.save_ocr(newgroups3, TEST_IMAGES_DIR + "/passeo-18.jpg", OUTPUT_DIR + "/passeo-18_export_step2.jpg", FONT_DIR + "/" + FONT_FILENAME)
+    # Autres exemples
+    
+    groups2 = ocr.recognize(TEST_IMAGES_DIR + "/passeo-18.jpg")
+    ocr.save_ocr(groups2, TEST_IMAGES_DIR + "/passeo-18.jpg", OUTPUT_DIR + "/passeo-18_export_step1.jpg", FONT_DIR + "/" + FONT_FILENAME)
+    file2 = open(OUTPUT_DIR + "/passeo-18_export_step1.txt", "w") 
+    file2.write(str(groups2))
+    file2.close()
+    identification2 = Identification(groups2)
+    device2 = identification2.identify()
+    output_file2 = open(OUTPUT_DIR + "/passeo-18_export_step2.txt", "w") 
+    output_file2.write(str(device2))
+    output_file2.close()
+    ocr.save_ocr(device2.useful_groups, TEST_IMAGES_DIR + "/passeo-18.jpg", OUTPUT_DIR + "/passeo-18_export_step2.jpg", FONT_DIR + "/" + FONT_FILENAME)
+
+    groups3 = ocr.recognize(TEST_IMAGES_DIR + "/stryker.jpg")
+    ocr.save_ocr(groups3, TEST_IMAGES_DIR + "/stryker.jpg", OUTPUT_DIR + "/stryker_export_step1.jpg", FONT_DIR + "/" + FONT_FILENAME)
+    file3 = open(OUTPUT_DIR + "/stryker_export_step1.txt", "w") 
+    file3.write(str(groups3))
+    file3.close()
+    identification3 = Identification(groups3)
+    device3 = identification3.identify()
+    output_file3 = open(OUTPUT_DIR + "/stryker_export_step2.txt", "w") 
+    output_file3.write(str(device3))
+    output_file3.close()
+    ocr.save_ocr(device3.useful_groups, TEST_IMAGES_DIR + "/stryker.jpg", OUTPUT_DIR + "/stryker_export_step2.jpg", FONT_DIR + "/" + FONT_FILENAME)

 if __name__ == '__main__':
-    #Identification.extract_column("data/csv/device_model.csv", "data/csv/device_model_extracted_brand_name.csv", 6)
-    #Identification.extract_column("data/csv/device_model.csv", "data/csv/device_model_extracted_manufacturer_name.csv", 8)
    main()