import: add new sources to import from the web

* Adds Library of Congress catalog. * Adds DNB catalog. * Adds SLSP catalog. * Adds UGent catalog. * Adds KULeuven catalog. * Adds import record serializer factory. * Fixes import subtype facets. * Updates unimarc mapping for BNF import (Tag 464). * Adds API endpoint to export external source configuration. * Closes rero#2065. * Closes rero#1825. Co-Authored-by: Benoit Erken <[email protected]> Co-Authored-by: Laurent Dubois <[email protected]> Co-Authored-by: Renaud Michotte <[email protected]>
benerken · Nov 5, 2021 · c00a06f · c00a06f
1 parent 70ab240
commit c00a06f
Show file tree

Hide file tree

Showing 51 changed files with 46,762 additions and 96 deletions.
diff --git a/rero_ils/config.py b/rero_ils/config.py
@@ -2808,14  2808,65 @@ def _(x):
     'markdown_captions'
 ))
 
-# IMPORT
-# ====
-RERO_IMPORT_REST_ENDPOINTS = dict(
-    bnf=dict(
 # IMPORT FROM EXTERNAL SOURCE CONFIGURATION
 # =============================================================================
 #    Endpoint to load data from external repository. Each endpoint must be
 #    defined as a dict with the following keys:
 #      * key: (required) the endpoint key (used to build the API endpoint)
 #      * import_class: (required) the class used to import the external
 #                      document from this source.
 #      * import_size: (required) the max number of document returned when
 #                     searching on this source.
 #      * label: (required) the label used into the professional interface for
 #               this source. This label will be untranslated.
 #      * weight: (optional) Used to sort the sources into the professional
 #                interface. Default value is 100. Lower is the weight, higher
 #                is the priority.
 
 RERO_IMPORT_REST_ENDPOINTS = [
     dict(
         key='loc',
         import_class='rero_ils.modules.imports.api:LoCImport',
         import_size=50,
         label='Library of Congress',
         weight=70
     ),
     dict(
         key='bnf',
         import_class='rero_ils.modules.imports.api:BnfImport',
-        import_size=50
         import_size=50,
         label='BNF',
         weight=20
     ),
     dict(
         key='dnb',
         import_class='rero_ils.modules.imports.api:DNBImport',
         import_size=50,
         label='DNB',
         weight=20
     ),
     dict(
         key='slsp',
         import_class='rero_ils.modules.imports.api:SLSPImport',
         import_size=50,
         label='SLSP',
         weight=15
     ),
     dict(
         key='ugent',
         import_class='rero_ils.modules.imports.api:UGentImport',
         import_size=50,
         label='UGent',
         weight=30
     ),
     dict(
         key='kul',
         import_class='rero_ils.modules.imports.api:KULImport',
         import_size=50,
         label='KULeuven',
         weight=30
     )
-)
 ]
 
 # SRU
 # ====

diff --git a/rero_ils/dojson/utils.py b/rero_ils/dojson/utils.py
@@ -351,7  351,9 @@ def get_field_items(value):
 def build_string_from_subfields(value, subfield_selection, separator=' '):
     """Build a string parsing the selected subfields in order."""
     items = get_field_items(value)
-    parts = [value for key, value in items if key in subfield_selection]
     # remove special character from string
     parts = [value.replace('\u0098', '').replace('\u009C', '')
         for key, value in items if key in subfield_selection]
     return separator.join(parts)
 
 
@@ -399,12  401,34 @@ def get_contribution_link(bibid, reroid, id, key):
     prod_host = 'mef.rero.ch'
     test_host = os.environ.get('RERO_ILS_MEF_HOST', 'mef.rero.ch')
     mef_url = f'https://{test_host}/api/'
-
-    match = re_identified.search(id)
     if type(id) is str:
         match = re_identified.search(id)
     else:
         match = re_identified.search(id[0])
     if match and len(match.groups()) == 2 and key[:3] in _CONTRIBUTION_TAGS:
         match_type = match.group(1).lower()
         match_value = match.group(2)
-        if match_type == 'idref':
         match_type.replace('de-588', 'gnd')
         # if we have a viafid, look for the contributor
         if match_type == "viaf":
             url = f'{mef_url}/mef/?q=viaf_pid:{match_value}'
             response = requests_retry_session().get(url)
             status_code = response.status_code
             if status_code == requests.codes.ok:
                 try:
                     if response.json()['hits']['hits'][0]\
                     ['metadata']['idref']['pid']:
                         match_value = response.json()['hits']['hits'][0]\
                         ['metadata']['idref']['pid']
                         match_type = 'idref'
                     elif response.json()['hits']['hits'][0]\
                     ['metadata']['gnd']['pid']:
                         match_value = response.json()['hits']['hits'][0]\
                         ['metadata']['idref']['pid']
                         match_type = 'gnd'
                 except Exception as err:
                     pass
         if match_type == 'idref' or match_type == 'gnd':
             url = f'{mef_url}{match_type}/{match_value}'
             response = requests_retry_session().get(url)
             status_code = response.status_code
@@ -680,6  704,7 @@ def clean_punctuation(value, punct, spaced_punct):
         data = []
         value = clean_punctuation(label, punct, spaced_punct).strip()
         if value:
             value = value.replace('\u0098', '').replace('\u009C', '')
             data = [{'value': value}]
         else:
             error_print('WARNING NO VALUE:', self.bib_id, self.rero_id, tag,
@@ -817,7  842,7 @@ def extract_description_from_marc_field(self, key, value, data):
                 book_formats.append(book_format)
             dim = remove_trailing_punctuation(
                 data=dimension.rstrip(),
-                punctuation=' ,:;&'
                 punctuation=' ,:;&.'
             )
             if dim:
                 add_data_and_sort_list(
@@ -1141,9  1166,12 @@ def init_country(self):
                                 self.rero_id, cantons_codes)
             if self.cantons:
                 self.country = 'sz'
             if self.country is None:
                 self.country = self.field_008_data[15:18].rstrip()
         else:
             try:
                 self.country = self.field_008_data[15:18].rstrip()
 
             except Exception as err:
                 pass
 
@@ -1853,7  1881,8 @@ def build_identifier(data):
         'RERO': 'RERO',
         'RERO-RAMEAU': 'RERO-RAMEAU',
         'IDREF': 'IdRef',
-        'GND': 'GND'
         'GND': 'GND',
         'DE-588': 'GND'
     }
     result = {}
     data_0 = utils.force_list(data.get('0'))

diff --git a/rero_ils/modules/documents/dojson/contrib/marc21tojson/model.py b/rero_ils/modules/documents/dojson/contrib/marc21tojson/model.py
@@ -2,6  2,7 @@
 #
 # RERO ILS
 # Copyright (C) 2019 RERO
 # Copyright (C) 2021 UCLOUVAIN
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
@@ -1282,13  1283,18 @@ def marc21_to_identifiedBy_from_field_035(self, key, value):
     """Get identifier from field 035."""
     subfield_a = not_repetitive(marc21.bib_id, marc21.rero_id,
                                 key, value, 'a', default='').strip()
     identifiedBy = self.get('identifiedBy', [])
     if subfield_a:
         source = 'RERO'
         # search source between parenthesis
         match = re.search(r'\(([^()] )\)', subfield_a)
         if match:
             source = match.group(1)
         identifier = {
             'value': subfield_a,
             'type': 'bf:Local',
-            'source': 'RERO'
             'source': source,
         }
-        identifiedBy = self.get('identifiedBy', [])
         identifiedBy.append(identifier)
     return identifiedBy or None
 

diff --git a/rero_ils/modules/documents/dojson/contrib/marc21tojson_dnb/__init__.py b/rero_ils/modules/documents/dojson/contrib/marc21tojson_dnb/__init__.py
@@ -0,0  1,23 @@
 # -*- coding: utf-8 -*-
 #
 # RERO ILS
 # Copyright (C) 2021 RERO
 # Copyright (C) 2021 UCLOUVAIN
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, version 3 of the License.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 
 """MARC21 RERO to JSON."""
 
 from .model import marc21
 
 __all__ = ('marc21')