Skip to content

Commit

Permalink
documents: complete the data conversion
Browse files Browse the repository at this point in the history
    * Implements transformation from Marc21 to JSON RERO ILS for:
        * frequency (L32).
        * bf:usageAndAccessPolicy (L74).
        * document relations (L28).
        * publication_place link form field 752 (L47).
    * closes rero#1617.
    * closes rero#1951.
    * closes rero#1987.
    * closes rero#1996.
Co-Authored-by: Gianni Pante <[email protected]>
  • Loading branch information
reropag committed Jun 13, 2021
1 parent d98129e commit 74529eb
Show file tree
Hide file tree
Showing 8 changed files with 724 additions and 70 deletions.
12 changes: 11 additions & 1 deletion data/pid_dependencies_big.json
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,16 @@
"name": "relatedTo",
"ref": "document",
"optional": "True"
},
{
"name": "hasReproduction",
"ref": "document",
"optional": "True"
},
{
"name": "reproductionOf",
"ref": "document",
"optional": "True"
}
]
},
Expand Down Expand Up @@ -299,4 +309,4 @@
}
]
}
]
]
2 changes: 1 addition & 1 deletion data/pid_dependencies_small.json
Original file line number Diff line number Diff line change
Expand Up @@ -294,4 +294,4 @@
}
]
}
]
]
56 changes: 53 additions & 3 deletions rero_ils/dojson/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,18 @@

"""Dojson utils."""

import os
import re
import sys
import traceback
from copy import deepcopy

import click
import requests
from dojson import Overdo, utils

from rero_ils.modules.utils import requests_retry_session

_UNIMARC_LANGUAGES_SCRIPTS = {
'ba': 'latn', # Latin
'ca': 'cyrl', # Cyrillic
Expand Down Expand Up @@ -286,6 +290,8 @@
'z': 'Not applicable'
}

_CONTRIBUTION_TAGS = ['100', '600', '610', '611', '630', '650', '651',
'655', '700', '710', '711']

re_identified = re.compile(r'\((.*)\)(.*)')

Expand Down Expand Up @@ -379,6 +385,35 @@ def remove_trailing_punctuation(
'',
data.rstrip()).rstrip()

def get_contribution_link(bibid, reroid, id, key):
"""Get MEF contribution link.
:params bibid: Bib id from the record.
:params reroid: RERO id from the record.
:params id: $0 from the marc field.
:params key: Tag from the marc field.
:returns: MEF url.
"""
# https://mef.test.rero.ch/api/mef/?q=rero.rero_pid:A012327677
prod_host = 'mef.rero.ch'
test_host = os.environ.get('RERO_ILS_MEF_HOST', 'mef.rero.ch')
mef_url = f'https://{test_host}/api/'

match = re_identified.search(id)
if match and len(match.groups()) == 2 and key[:3] in _CONTRIBUTION_TAGS:
match_type = match.group(1).lower()
match_value = match.group(2)
if match_type == 'idref':
url = f'{mef_url}{match_type}/{match_value}'
response = requests_retry_session().get(url)
status_code = response.status_code
if status_code == requests.codes.ok:
return url.replace(test_host, prod_host)
error_print('WARNING GET MEF CONTRIBUTION:',
bibid, reroid, key, id, url, status_code)
else:
error_print('ERROR GET MEF CONTRIBUTION:', bibid, reroid, key, id)


def add_note(new_note, data):
"""Add a new note to the data avoiding duplicate notes.
Expand Down Expand Up @@ -913,6 +948,7 @@ class ReroIlsMarc21Overdo(ReroIlsOverdo):
has_field_490 = False
has_field_580 = False
content_media_carrier_type = None
links_from_752 = []

def __init__(self, bases=None, entry_point_group=None):
"""Reroilsmarc21overdo init."""
Expand Down Expand Up @@ -962,8 +998,11 @@ def do(self, blob, ignore_missing=True, exception_handlers=None):
self.field_008_data = ''
self.date1_from_008 = None
self.date2_from_008 = None
self.original_date_from_008 = None
self.date_type_from_008 = ''
self.date = {'start_date': None}
self.serial_type = ''
self.is_top_level_record = False
fields_008 = self.get_fields(tag='008')
if fields_008:
self.field_008_data = self.get_control_field_data(
Expand Down Expand Up @@ -995,9 +1034,9 @@ def do(self, blob, ignore_missing=True, exception_handlers=None):
# identifiy a top level record (has 019 $a Niveau supérieur)
regexp = re.compile(r'Niveau sup[eé]rieur', re.IGNORECASE)
fields_019 = self.get_fields(tag='019')
note = ''
notes_from_019_and_351 = []
for field_019 in fields_019:
note = ''
for subfield_a in self.get_subfields(field_019, 'a'):
note += ' | ' + subfield_a
if regexp.search(subfield_a):
Expand Down Expand Up @@ -1037,7 +1076,18 @@ def do(self, blob, ignore_missing=True, exception_handlers=None):
if description_conventions:
self.admin_meta_data['descriptionConventions'] = \
description_conventions
# check presence of specific fields

# build the list of links from filed 752
self.links_from_752 = []
fields_752 = self.get_fields(tag='752')
for field_752 in fields_752:
subfields_d = self.get_subfields(field_752, 'd')
if subfields_d:
identifier = build_identifier(field_752['subfields'])
if identifier:
self.links_from_752.append(identifier)

# check presence of specific fields
self.has_field_490 = len(self.get_fields(tag='490')) > 0
self.has_field_580 = len(self.get_fields(tag='580')) > 0
result = super().do(
Expand Down Expand Up @@ -1107,7 +1157,7 @@ def init_lang_from(fields_041, code):
langs_from_041.append(lang_from_041)
return langs_from_041

self.lang_from_008 = ""
self.lang_from_008 = None
self.langs_from_041_a = []
self.langs_from_041_h = []
try:
Expand Down
Loading

0 comments on commit 74529eb

Please sign in to comment.