import datetime

import praw
import psaw
import tqdm

api = psaw.PushshiftAPI()

# all posts until the end of 2017
end_time = int(datetime.datetime(2018, 1, 1).timestamp())

query = api.search_submissions(before=end_time,
                               filter=['url', 'score'],
                               sort='desc',
                               score='>2',
                               is_self=False,
                               over_18=False)

with tqdm.tqdm() as pbar:
    # download links from submissions
    with open('urls.txt', 'w') as fh:
        for subm in query:
            url = subm.url

            # weird issue with psaw/pushshift that breaks score=">2"
            if subm.score < 3:
                continue
            #print(subm.score)
#            pbar.write(str(datetime.datetime.fromtimestamp(subm.created_utc)))
            pbar.update(1)
            fh.write(url + '\n')
        fh.flush()