From a32928643b1ece2f03f277b2e4de826c3fc25a11 Mon Sep 17 00:00:00 2001
From: Terrtia <or1994@hotmail.fr>
Date: Tue, 21 Jun 2022 16:15:18 +0200
Subject: [PATCH] fix: [cld3] enable cld3

---
 bin/lib/objects/Items.py | 28 ++++++++++++++--------------
 bin/packages/Item.py     | 28 ++++++++++++++--------------
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/bin/lib/objects/Items.py b/bin/lib/objects/Items.py
index e34315d7..d31a8315 100755
--- a/bin/lib/objects/Items.py
+++ b/bin/lib/objects/Items.py
@@ -273,20 +273,20 @@ def remove_all_urls_from_content(item_id, item_content=None):
 def get_item_languages(item_id, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7):
     all_languages = []
 
-    # ## CLEAN CONTENT ##
-    # content = get_item_content_html2text(item_id, ignore_links=True)
-    # content = remove_all_urls_from_content(item_id, item_content=content)
-    #
-    # # REMOVE USELESS SPACE
-    # content = ' '.join(content.split())
-    # #- CLEAN CONTENT -#
-    #
-    # #print(content)
-    # #print(len(content))
-    # if len(content) >= min_len:
-    #     for lang in cld3.get_frequent_languages(content, num_langs=num_langs):
-    #         if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable:
-    #             all_languages.append(lang)
+    ## CLEAN CONTENT ##
+    content = get_item_content_html2text(item_id, ignore_links=True)
+    content = remove_all_urls_from_content(item_id, item_content=content)
+
+    # REMOVE USELESS SPACE
+    content = ' '.join(content.split())
+    #- CLEAN CONTENT -#
+
+    #print(content)
+    #print(len(content))
+    if len(content) >= min_len:
+        for lang in cld3.get_frequent_languages(content, num_langs=num_langs):
+            if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable:
+                all_languages.append(lang)
     return all_languages
 
 # API
diff --git a/bin/packages/Item.py b/bin/packages/Item.py
index 40765a39..ab634073 100755
--- a/bin/packages/Item.py
+++ b/bin/packages/Item.py
@@ -148,20 +148,20 @@ def remove_all_urls_from_content(item_id, item_content=None):
 def get_item_languages(item_id, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7):
     all_languages = []
 
-    # ## CLEAN CONTENT ##
-    # content = get_item_content_html2text(item_id, ignore_links=True)
-    # content = remove_all_urls_from_content(item_id, item_content=content)
-    #
-    # # REMOVE USELESS SPACE
-    # content = ' '.join(content.split())
-    # #- CLEAN CONTENT -#
-    #
-    # #print(content)
-    # #print(len(content))
-    # if len(content) >= min_len:
-    #     for lang in cld3.get_frequent_languages(content, num_langs=num_langs):
-    #         if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable:
-    #             all_languages.append(lang)
+    ## CLEAN CONTENT ##
+    content = get_item_content_html2text(item_id, ignore_links=True)
+    content = remove_all_urls_from_content(item_id, item_content=content)
+
+    # REMOVE USELESS SPACE
+    content = ' '.join(content.split())
+    #- CLEAN CONTENT -#
+
+    #print(content)
+    #print(len(content))
+    if len(content) >= min_len:
+        for lang in cld3.get_frequent_languages(content, num_langs=num_langs):
+            if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable:
+                all_languages.append(lang)
     return all_languages
 
 # API