Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Point-in-time Data Operation #343

Merged
merged 34 commits into from
Mar 10, 2022
Merged
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift click to select a range
3e92169
add period ops class
bxdd Mar 9, 2021
fead243
black format
bxdd Mar 9, 2021
61720c2
add pit data read
bxdd Mar 10, 2021
a0959a9
fix bug in period ops
bxdd Mar 10, 2021
bd46d14
update ops runnable
bxdd Mar 10, 2021
9f1cc64
update PIT test example
bxdd Mar 10, 2021
63e4895
black format
bxdd Mar 10, 2021
88b7926
update PIT test
bxdd Mar 10, 2021
a2dae5c
update tets_PIT
bxdd Mar 12, 2021
99db80d
update code format
bxdd Mar 12, 2021
c4bbe6b
add check_feature_exist
bxdd Mar 12, 2021
20bcf25
black format
bxdd Mar 12, 2021
6e23ff7
optimize the PIT Algorithm
bxdd Mar 12, 2021
88a0d3d
fix bug
bxdd Mar 12, 2021
f52462a
update example
bxdd Mar 12, 2021
b794e65
update test_PIT name
bxdd Mar 17, 2021
255ed0b
Merge https://github.com/microsoft/qlib
bxdd Apr 7, 2021
9df1fbd
add pit collector
bxdd Apr 7, 2021
71d5640
black format
bxdd Apr 7, 2021
ebe277b
fix bugs
bxdd Apr 8, 2021
655ff51
fix try
bxdd Apr 8, 2021
f6ca4d2
fix bug & add dump_pit.py
bxdd Apr 9, 2021
566a8f9
Successfully run and understand PIT
you-n-g Mar 4, 2022
63b5ed4
Merge remote-tracking branch 'origin/main' into PIT
you-n-g Mar 4, 2022
4997389
Add some docs and remove a bug
you-n-g Mar 4, 2022
561be64
Merge remote-tracking branch 'origin/main' into PIT
you-n-g Mar 8, 2022
6811a07
Merge remote-tracking branch 'origin/main' into PIT
you-n-g Mar 8, 2022
cf77cd0
mv crypto collector
you-n-g Mar 8, 2022
79422a1
black format
you-n-g Mar 8, 2022
48ea2c5
Run succesfully after merging master
you-n-g Mar 8, 2022
9c67303
Pass test and fix code
you-n-g Mar 10, 2022
69cf2ab
remove useless PIT code
you-n-g Mar 10, 2022
de8d6cb
fix PYlint
you-n-g Mar 10, 2022
2671dc2
Rename
you-n-g Mar 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
black format
  • Loading branch information
bxdd committed Apr 7, 2021
commit 71d5640036bce2cad685a0b69a4ef50e7595b53c
95 changes: 52 additions & 43 deletions scripts/data_collector/pit/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 18,7 @@
from data_collector.base import BaseCollector, BaseRun
from data_collector.utils import get_calendar_list, get_hs_stock_symbols


class PitCollector(BaseCollector):

DEFAULT_START_DATETIME_QUARTER = pd.Timestamp("2000-01-01")
Expand Down Expand Up @@ -76,103 77,109 @@ def normalize_symbol(self, symbol):
symbol_s = symbol.split(".")
symbol = f"sh{symbol_s[0]}" if symbol_s[-1] == "ss" else f"sz{symbol_s[0]}"
return symbol

def get_instrument_list(self):
logger.info("get cn stock symbols......")
symbols = get_hs_stock_symbols()
logger.info(f"get {len(symbols)} symbols.")
return symbols

def _get_data_from_baostock(self, symbol, interval, start_datetime, end_datetime):
error_msg = f"{symbol}-{interval}-{start_datetime}-{end_datetime}"
try:
symbol = f"{symbol[7:]}.{symbol[:6]}"
print(symbol)
rs_report = bs.query_performance_express_report(code=symbol, start_date=str(start_datetime.date()), end_date=str(end_datetime.date()))
rs_report = bs.query_performance_express_report(
code=symbol, start_date=str(start_datetime.date()), end_date=str(end_datetime.date())
)
report_list = []
while (rs_report.error_code == '0') & rs_report.next():
while (rs_report.error_code == "0") & rs_report.next():
report_list.append(rs_report.get_row_data())
# 获取一条记录,将记录合并在一起
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use english

df_report = pd.DataFrame(report_list, columns=rs_report.fields)
if not df_report.empty:
df_report = df_report[['performanceExpPubDate', 'performanceExpStatDate', 'performanceExpressROEWa']]
df_report = df_report[["performanceExpPubDate", "performanceExpStatDate", "performanceExpressROEWa"]]
df_report.rename(
columns = {
"performanceExpPubDate": "date",
columns={
"performanceExpPubDate": "date",
"performanceExpStatDate": "period",
"performanceExpressROEWa":"value"
},
"performanceExpressROEWa": "value",
},
inplace=True,
)
df_report['value'] = df_report['value'].astype('float32') / 100.0
df_report['field'] = "roeWa"
df_report["value"] = df_report["value"].astype("float32") / 100.0
df_report["field"] = "roeWa"

profit_list = []
for year in range(start_datetime.year - 1, end_datetime.year 1):
for q_num in range(0, 4):
rs_profit = bs.query_profit_data(code=symbol, year=year, quarter=q_num 1)
while (rs_profit.error_code == '0') & rs_profit.next():
while (rs_profit.error_code == "0") & rs_profit.next():
row_data = rs_profit.get_row_data()
pub_date = pd.Timestamp(row_data[rs_profit.fields.index('pubDate')])
pub_date = pd.Timestamp(row_data[rs_profit.fields.index("pubDate")])
if pub_date >= start_datetime and pub_date <= end_datetime:
profit_list.append(row_data)

df_profit = pd.DataFrame(profit_list, columns=rs_profit.fields)
if not df_profit.empty:
df_profit = df_profit[['pubDate', 'statDate', 'roeAvg']]
df_profit = df_profit[["pubDate", "statDate", "roeAvg"]]
df_profit.rename(
columns = {
"pubDate": "date",
"statDate": "period",
"roeAvg":"value"
},
columns={"pubDate": "date", "statDate": "period", "roeAvg": "value"},
inplace=True,
)
df_profit['value'] = df_profit['value'].astype('float32')
df_profit['field'] = "roeWa"
df_profit["value"] = df_profit["value"].astype("float32")
df_profit["field"] = "roeWa"

forecast_list = []
rs_forecast = bs.query_forecast_report(code=symbol, start_date=str(start_datetime.date()), end_date=str(end_datetime.date()))

while (rs_forecast.error_code == '0') & rs_forecast.next():
rs_forecast = bs.query_forecast_report(
code=symbol, start_date=str(start_datetime.date()), end_date=str(end_datetime.date())
)

while (rs_forecast.error_code == "0") & rs_forecast.next():
forecast_list.append(rs_forecast.get_row_data())

df_forecast = pd.DataFrame(forecast_list, columns=rs_forecast.fields)
if not df_forecast.empty:
df_forecast = df_forecast[['profitForcastExpPubDate', 'profitForcastExpStatDate', 'profitForcastChgPctUp', 'profitForcastChgPctDwn']]
df_forecast = df_forecast[
[
"profitForcastExpPubDate",
"profitForcastExpStatDate",
"profitForcastChgPctUp",
"profitForcastChgPctDwn",
]
]
df_forecast.rename(
columns = {
"profitForcastExpPubDate": "date",
columns={
"profitForcastExpPubDate": "date",
"profitForcastExpStatDate": "period",
},
},
inplace=True,
)
df_forecast['value'] = (df_forecast['profitForcastChgPctUp'].astype('float32') df_forecast['profitForcastChgPctDwn'].astype('float32')) / 200
df_forecast['field'] = "YOYNI"
df_forecast.drop(['profitForcastChgPctUp', 'profitForcastChgPctDwn'], axis=1, inplace=True)
df_forecast["value"] = (
df_forecast["profitForcastChgPctUp"].astype("float32")
df_forecast["profitForcastChgPctDwn"].astype("float32")
) / 200
df_forecast["field"] = "YOYNI"
df_forecast.drop(["profitForcastChgPctUp", "profitForcastChgPctDwn"], axis=1, inplace=True)

growth_list = []
for year in range(start_datetime.year - 1, end_datetime.year 1):
for q_num in range(0, 4):
rs_growth = bs.query_growth_data(code=symbol, year=year, quarter=q_num 1)
while (rs_growth.error_code == '0') & rs_growth.next():
while (rs_growth.error_code == "0") & rs_growth.next():
row_data = rs_growth.get_row_data()
pub_date = pd.Timestamp(row_data[rs_growth.fields.index('pubDate')])
pub_date = pd.Timestamp(row_data[rs_growth.fields.index("pubDate")])
if pub_date >= start_datetime and pub_date <= end_datetime:
growth_list.append(row_data)
df_growth = pd.DataFrame(growth_list, columns=rs_growth.fields)[['pubDate', 'statDate', 'YOYNI']]
df_growth = pd.DataFrame(growth_list, columns=rs_growth.fields)[["pubDate", "statDate", "YOYNI"]]
if not df_growth.empty:
df_growth = df_growth[['pubDate', 'statDate', 'YOYNI']]
df_growth = df_growth[["pubDate", "statDate", "YOYNI"]]
df_growth.rename(
columns = {
"pubDate": "date",
"statDate": "period",
"YOYNI":"value"
},
columns={"pubDate": "date", "statDate": "period", "YOYNI": "value"},
inplace=True,
)
df_growth['value'] = df_growth['value'].astype('float32')
df_growth['field'] = "YOYNI"
df_growth["value"] = df_growth["value"].astype("float32")
df_growth["field"] = "YOYNI"

df_merge = df_report.append([df_profit, df_forecast, df_growth])
return df_merge
Expand All @@ -193,6 200,7 @@ def get_data(
def min_numbers_trading(self):
pass


class Run(BaseRun):
def __init__(self, source_dir=None, max_workers=4, interval="quarterly"):
"""
Expand Down Expand Up @@ -258,6 266,7 @@ def download_data(
def normalize_class_name(self):
pass


if __name__ == "__main__":
bs.login()
fire.Fire(Run)
Expand Down