Data feeder for SOLR
$ git clone https://github.com/vysokeskoly/SolrFeeder.git
$ cd SolrFeeder
$ composer install --no-dev
php8.1
bin/build-deb-app
bin/Solr-feeder-console list
bin/Solr-feeder-console [command] [arguments]
help Displays help for a command
list Lists commands
Solr-feeder
Solr-feeder:feed Feed data from database to SOLR by xml configuration
Feed data from database
to SOLR
by xml configuration
bin/Solr-feeder-console Solr-feeder:feed [configPath]
Arguments:
config Path to xml config file.
Options:
-h, --help Display this help message
-q, --quiet Do not output any message
-V, --version Display this application version
--ansi Force ANSI output
--no-ansi Disable ANSI output
-n, --no-interaction Do not ask any interactive question
-v|vv|vvv, --verbose Increase the verbosity of messages: 1 for normal output, 2 for more verbose output and 3 for debug
You can find different examples in Fixtures
<?xml version="1.0" encoding="UTF-8"?>
<ConnectorConfig xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="file:///C:/apps/jaxb-ri-2.2.7/bin/config.xsd">
<lockFile>var/tmp/vysokeskoly.txt</lockFile>
<statusReportFile>var/status/status-report-vysokeskoly.txt</statusReportFile>
<log4jConfig>etc/log4j-vysokeskoly.properties</log4jConfig>
<db>
<driver>org.postgresql.Driver</driver>
<connection>jdbc:postgresql://dbvysokeskoly:5432/vysokeskoly</connection>
<user>vysokeskoly</user>
<password>vysokeskoly</password>
<credentialsFile>etc/db-credentials-vysokeskoly.xml</credentialsFile>
<timestamps file="var/timestamp/last-timestamps.xml" >
<timestamp type="datetime" name="timestamp" column="ts" lastValuePlaceholder="%%LAST_TIMESTAMP%%" currValuePlaceholder="%%CURRENT_TIMESTAMP%%" default="1970-01-01 00:00:00"/>
<timestamp type="datetime" name="updated" column="updated" lastValuePlaceholder="%%LAST_UPDATED%%" currValuePlaceholder="%%CURRENT_UPDATED%%" default="1970-01-01 00:00:00"/>
<timestamp type="datetime" name="deleted" column="deleted" lastValuePlaceholder="%%LAST_DELETED%%" currValuePlaceholder="%%CURRENT_DELETED%%" default="1970-01-01 00:00:00"/>
</timestamps>
<feeding>
<feedingBatch name="add" type="add">
<idColumn>study_id</idColumn>
<mainSelect><![CDATA[
SELECT *
FROM studies_solr
WHERE updated >= %%LAST_UPDATED%%
ORDER BY updated ASC
]]>
</mainSelect>
<columnMap>
<map src="study_keyword" dst="study_keyword" separator="\|" />
<map src="study_name" dst="study_name" />
<map src="study_name" dst="study_name_str" />
<map src="updated" dst="_ignored" /><!-- will be stored in lastmodified field - see the SQL query above -->
</columnMap>
</feedingBatch>
<feedingBatch name="delete" type="delete">
<idColumn>study_id</idColumn>
<mainSelect><![CDATA[
SELECT study_id, deleted FROM studies_solr WHERE deleted >= %%LAST_DELETED%%
]]></mainSelect>
</feedingBatch>
</feeding>
</db>
<feeder>
<solr>
<url>http://solr:8983/solr/vysokeskoly</url>
<connectionType>http</connectionType>
<readTimeout>200000</readTimeout>
<batchSizeDocs>100</batchSizeDocs>
</solr>
</feeder>
</ConnectorConfig>
Node | Description |
---|---|
/lockFile |
Path to file which locks command from executing simultaneously |
/statusReportFile |
Path to file which is updated every time commands run and holds a result (0 2017-08-11T17:59:14 OK ) |
0 |
is exit status of command |
timestamp |
of execution |
OK |
is result (or error message) |
/db/driver,connection,user,password |
Holds information about database connection (currently supported: mysql , postgresql ) |
/db/timestamps |
|
@file |
Path to file containing timestamps of last executed batch (see syntax) |
/timestamp |
Placeholders which will be replaced in sql query by previous values (or by defaults) |
/db/feeding/feedingBatch |
Definition of one batch to Solr (currently supported types: add , delete ) |
/idColumn |
Column in sql which holds Solr primary key |
/mainSelect |
sql query representing the batch data (it is advised be ordered by column defined in timestamps ) |
/columnMap |
sql data mappings |
/map |
Mapping is mainly used for separating values for Solr multi-valued field |
@src |
column in row from database, which should be mapped (mapped columns will NOT be send to Solr, unless they appear in dst too) |
@dst |
field name in Solr document |
@separator |
[OPTIONAL] separator for data in src (if it is not set, mapping will simply pass the value) |
/feeder/solr |
|
/url |
Solr collection url |
/readTimeout |
Timeout for Solr client |
/batchSizeDocs |
Max number of documents send to Solr in one batch |
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<timestamps updatedOn="2017-08-07 21:12:03.347 0200">
<timestamp name="deleted">2017-07-13 09:08:59.78</timestamp>
<timestamp name="updated">2017-08-07 04:11:27.855</timestamp>
<timestamp name="timestamp">1970-01-01 00:00:00.0</timestamp>
</timestamps>