documents: complete the data conversion

* Implements transformation from Marc21 to JSON RERO ILS for: * frequency (L32). * bf:usageAndAccessPolicy (L74). * document relations (L28). * publication_place link form field 752 (L47). * closes rero#1617. * closes rero#1951. * closes rero#1987. * closes rero#1996. Co-Authored-by: Gianni Pante <[email protected]>
reropag · Jun 13, 2021 · 74529eb · 74529eb
1 parent d98129e
commit 74529eb
Show file tree

Hide file tree

Showing 8 changed files with 724 additions and 70 deletions.
diff --git a/data/pid_dependencies_big.json b/data/pid_dependencies_big.json
@@ -149,6 +149,16 @@
         "name": "relatedTo",
         "ref": "document",
         "optional": "True"
+      },
+      {
+        "name": "hasReproduction",
+        "ref": "document",
+        "optional": "True"
+      },
+      {
+        "name": "reproductionOf",
+        "ref": "document",
+        "optional": "True"
       }
     ]
   },
@@ -299,4 +309,4 @@
       }
     ]
   }
-]
+]
diff --git a/data/pid_dependencies_small.json b/data/pid_dependencies_small.json
@@ -294,4 +294,4 @@
       }
     ]
   }
-]
+]
diff --git a/rero_ils/dojson/utils.py b/rero_ils/dojson/utils.py
@@ -17,14 +17,18 @@
 
 """Dojson utils."""
 
+import os
 import re
 import sys
 import traceback
 from copy import deepcopy
 
 import click
+import requests
 from dojson import Overdo, utils
 
+from rero_ils.modules.utils import requests_retry_session
+
 _UNIMARC_LANGUAGES_SCRIPTS = {
     'ba': 'latn',  # Latin
     'ca': 'cyrl',  # Cyrillic
@@ -286,6 +290,8 @@
     'z': 'Not applicable'
 }
 
+_CONTRIBUTION_TAGS = ['100', '600', '610', '611', '630', '650', '651',
+                             '655', '700', '710', '711']
 
 re_identified = re.compile(r'\((.*)\)(.*)')
 
@@ -379,6 +385,35 @@ def remove_trailing_punctuation(
         '',
         data.rstrip()).rstrip()
 
+def get_contribution_link(bibid, reroid, id, key):
+    """Get MEF contribution link.
+
+    :params bibid: Bib id from the record.
+    :params reroid: RERO id from the record.
+    :params id: $0 from the marc field.
+    :params key: Tag from the marc field.
+    :returns: MEF url.
+    """
+    # https://mef.test.rero.ch/api/mef/?q=rero.rero_pid:A012327677
+    prod_host = 'mef.rero.ch'
+    test_host = os.environ.get('RERO_ILS_MEF_HOST', 'mef.rero.ch')
+    mef_url = f'https://{test_host}/api/'
+
+    match = re_identified.search(id)
+    if match and len(match.groups()) == 2 and key[:3] in _CONTRIBUTION_TAGS:
+        match_type = match.group(1).lower()
+        match_value = match.group(2)
+        if match_type == 'idref':
+            url = f'{mef_url}{match_type}/{match_value}'
+            response = requests_retry_session().get(url)
+            status_code = response.status_code
+            if status_code == requests.codes.ok:
+                return url.replace(test_host, prod_host)
+            error_print('WARNING GET MEF CONTRIBUTION:',
+                        bibid, reroid, key, id, url, status_code)
+    else:
+        error_print('ERROR GET MEF CONTRIBUTION:', bibid, reroid, key, id)
+
 
 def add_note(new_note, data):
     """Add a new note to the data avoiding duplicate notes.
@@ -913,6 +948,7 @@ class ReroIlsMarc21Overdo(ReroIlsOverdo):
     has_field_490 = False
     has_field_580 = False
     content_media_carrier_type = None
+    links_from_752 = []
 
     def __init__(self, bases=None, entry_point_group=None):
         """Reroilsmarc21overdo init."""
@@ -962,8 +998,11 @@ def do(self, blob, ignore_missing=True, exception_handlers=None):
             self.field_008_data = ''
             self.date1_from_008 = None
             self.date2_from_008 = None
+            self.original_date_from_008 = None
             self.date_type_from_008 = ''
             self.date = {'start_date': None}
+            self.serial_type = ''
+            self.is_top_level_record = False
             fields_008 = self.get_fields(tag='008')
             if fields_008:
                 self.field_008_data = self.get_control_field_data(
@@ -995,9 +1034,9 @@ def do(self, blob, ignore_missing=True, exception_handlers=None):
             # identifiy a top level record (has 019 $a Niveau supérieur)
             regexp = re.compile(r'Niveau sup[eé]rieur', re.IGNORECASE)
             fields_019 = self.get_fields(tag='019')
-            note = ''
             notes_from_019_and_351 = []
             for field_019 in fields_019:
+                note = ''
                 for subfield_a in self.get_subfields(field_019, 'a'):
                     note += ' | ' + subfield_a
                     if regexp.search(subfield_a):
@@ -1037,7 +1076,18 @@ def do(self, blob, ignore_missing=True, exception_handlers=None):
                 if description_conventions:
                     self.admin_meta_data['descriptionConventions'] = \
                         description_conventions
-            # check presence of specific fields
+
+            # build the list of links from filed 752
+            self.links_from_752 = []
+            fields_752 = self.get_fields(tag='752')
+            for field_752 in fields_752:
+                subfields_d = self.get_subfields(field_752, 'd')
+                if subfields_d:
+                    identifier = build_identifier(field_752['subfields'])
+                    if identifier:
+                        self.links_from_752.append(identifier)
+
+           # check presence of specific fields
             self.has_field_490 = len(self.get_fields(tag='490')) > 0
             self.has_field_580 = len(self.get_fields(tag='580')) > 0
             result = super().do(
@@ -1107,7 +1157,7 @@ def init_lang_from(fields_041, code):
                         langs_from_041.append(lang_from_041)
             return langs_from_041
 
-        self.lang_from_008 = ""
+        self.lang_from_008 = None
         self.langs_from_041_a = []
         self.langs_from_041_h = []
         try:
-Original file line number
+Diff line change
@@ Expand Up / @@ -294,4 +294,4 @@ @@
           }
         ]
       }
-    ]
+    ]