import numpy as np
import pandas as pd
import random
stock_price = np.random.rand(10000).tolist() # 10000 days of stock data
up_or_down = [1 if stock_price[x] > stock_price[x+1] else 0 for x in range(len(stock_price)-1)]
print(up_or_down[:100])
# generate a bunch of RSI indicators
rsis = []
for days in range(5,50): # RSIs with time windows from 5 to 49
rsi = [np.mean(up_or_down[x:x+days]) for x in range(len(up_or_down)-days)]
rsi = rsi[:9950] # arbitrarily just make sure RSI sizes match to make our lives easy by throwing away some data
rsis.append(rsi)
print(rsis[-1]) # lets see the 49 day RSI - should almost always be about .5 naturally
num_to_generate = 100
rsi_lengths = [random.randint(5,len(rsis)-1) for _ in range(num_to_generate)]
num_bins = [random.randint(2,100) for _ in range(num_to_generate)]
histograms_to_generate = zip(rsi_lengths, num_bins)
features_to_add = []
for rsi_len, q in histograms_to_generate:
feat = pd.qcut(np.array(rsis[rsi_len]), q, duplicates='drop', labels=False)
features_to_add.append(feat)
features_to_add = np.array(features_to_add)
print(features_to_add.shape)
print(rsi_lengths[:10])
print(num_bins[:10])
print(features_to_add[:10,:25])
dummies_list = [pd.get_dummies(features_to_add[i,:]) for i in range(len(features_to_add))]
one_hot_encoded = np.hstack(dummies_list)
print(one_hot_encoded.shape)