Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Point-in-time Data Operation #343

Merged
merged 34 commits into from
Mar 10, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift click to select a range
3e92169
add period ops class
bxdd Mar 9, 2021
fead243
black format
bxdd Mar 9, 2021
61720c2
add pit data read
bxdd Mar 10, 2021
a0959a9
fix bug in period ops
bxdd Mar 10, 2021
bd46d14
update ops runnable
bxdd Mar 10, 2021
9f1cc64
update PIT test example
bxdd Mar 10, 2021
63e4895
black format
bxdd Mar 10, 2021
88b7926
update PIT test
bxdd Mar 10, 2021
a2dae5c
update tets_PIT
bxdd Mar 12, 2021
99db80d
update code format
bxdd Mar 12, 2021
c4bbe6b
add check_feature_exist
bxdd Mar 12, 2021
20bcf25
black format
bxdd Mar 12, 2021
6e23ff7
optimize the PIT Algorithm
bxdd Mar 12, 2021
88a0d3d
fix bug
bxdd Mar 12, 2021
f52462a
update example
bxdd Mar 12, 2021
b794e65
update test_PIT name
bxdd Mar 17, 2021
255ed0b
Merge https://github.com/microsoft/qlib
bxdd Apr 7, 2021
9df1fbd
add pit collector
bxdd Apr 7, 2021
71d5640
black format
bxdd Apr 7, 2021
ebe277b
fix bugs
bxdd Apr 8, 2021
655ff51
fix try
bxdd Apr 8, 2021
f6ca4d2
fix bug & add dump_pit.py
bxdd Apr 9, 2021
566a8f9
Successfully run and understand PIT
you-n-g Mar 4, 2022
63b5ed4
Merge remote-tracking branch 'origin/main' into PIT
you-n-g Mar 4, 2022
4997389
Add some docs and remove a bug
you-n-g Mar 4, 2022
561be64
Merge remote-tracking branch 'origin/main' into PIT
you-n-g Mar 8, 2022
6811a07
Merge remote-tracking branch 'origin/main' into PIT
you-n-g Mar 8, 2022
cf77cd0
mv crypto collector
you-n-g Mar 8, 2022
79422a1
black format
you-n-g Mar 8, 2022
48ea2c5
Run succesfully after merging master
you-n-g Mar 8, 2022
9c67303
Pass test and fix code
you-n-g Mar 10, 2022
69cf2ab
remove useless PIT code
you-n-g Mar 10, 2022
de8d6cb
fix PYlint
you-n-g Mar 10, 2022
2671dc2
Rename
you-n-g Mar 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add pit data read
  • Loading branch information
bxdd committed Mar 10, 2021
commit 61720c29d43efc8a8ef8ce902f12112bba593744
13 changes: 8 additions & 5 deletions qlib/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 360,8 @@ def load(self, instrument, start_index, end_index, freq):

resample_series = pd.Series(index=pd.RangeIndex(start_index, end_index 1), dtype="float32", name=str(self))
for cur_index in range(start_index, end_index 1):
start_offset, end_offset = self.get_period_offset(cur_index)
resample_data[cur_index] = self.load_period_data(instrument, start_offset, end_offset, cur_index).iloc[-1]
start_offset = self.get_period_offset(cur_index)
resample_data[cur_index] = self.load_period_data(instrument, start_offset, 0, cur_index).iloc[-1]

H["f"][args] = resample_series
return resample_data
Expand All @@ -381,14 381,17 @@ def __init__(self, name=None):
self._name = type(self).__name__.lower()

def __str__(self):
return "$" self._name
return "$$" self._name

def load_period_data(self, instrument, start_offset, end_offset, cur_index):
### Zhou Code
return pd.Series([1, 2, 3])
from .data import FeatureD

return FeatureD.period_feature(instrument, str(self), start_offset, end_offset, cur_index)
# return pd.Series([1, 2, 3]) # fot test

def get_period_offset(self, cur_index):
return 0
return 0, 0


class PExpressionOps(PExpression):
Expand Down
32 changes: 32 additions & 0 deletions qlib/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 632,14 @@ def _uri_data(self):
"""Static feature file uri."""
return os.path.join(C.get_data_path(), "features", "{}", "{}.{}.bin")

@property
def _uri_period_index(self):
return os.path.join(C.get_data_path(), "financial", "{}", "{}.index")

@property
def _uri_period_data(self):
return os.path.join(C.get_data_path(), "financial", "{}", "{}.data")

def feature(self, instrument, field, start_index, end_index, freq):
# validate
field = str(field).lower()[1:]
Expand All @@ -645,6 653,30 @@ def feature(self, instrument, field, start_index, end_index, freq):
series = read_bin(uri_data, start_index, end_index)
return series

def period_feature(self, instrument, field, start_offset, end_offset, cur_index):
DATA_RECORDS = [("date", "I"), ("period", "I"), ("value", "d"), ("_next", "I")]

NA_VALUE = float("NAN")

field = str(field).lower()[2:]
instrument = code_to_fname(instrument)
if not field.startswith("q_") and not field.startswith("a_"):
raise ValueError("period field must start with 'q_' or 'a_'")
quarterly = field.startswith("q_")
index_path = sself._uri_period_index.format(instrument.lower(), field)
data_path = self._uri_period_data.format(instrument.lower(), field)

data = np.fromfile(data_file, dtype=DATA_RECORDS)
# find all revision periods before `cur_date`
loc = np.searchsorted(data["date"], cur_date, side="left")
if loc <= 0:
return NA_VALUE
last_period = data["period"][loc - start_offset : loc - end_offset].max() # return the latest quarter
first_period = data["period"][loc - start_offset : loc - end_offset].min()

series = read_period_interval_data(index_path, data_path, last_period, first_period, cur_index, quarterly)
return series


class LocalExpressionProvider(ExpressionProvider):
"""Local expression data provider class
Expand Down
Loading