The following is a wrapper around the export API that generates a jsonlines file with the desired data.

The contents of the jsonlines file comes from a paging request that contains the desired data in the response body which can be output to any file type or other system.

You will need to specify the ID of a stream from which to export as well as an ID for any analysis results you want to include.

The stream ID can be found inside the stream editor by clicking on the "information" icon next to the stream name.

Click on the name of any analysis attached to the stream to get the analysis ID

'''
This is a basic example of how to export data from stratifyd
A logged in user can download an API key to make authenticated requests
Please contact stratifyd to make sure you are using the correct base URL for the API
'''

import json
import requests
from datetime import datetime
from datetime import timedelta
import calendar
import urllib


with open("apikey.json") as f:
credential = json.load(f)
print("credential")
print(credential)


# Update this URL to your workspace endpoint - contact Stratifyd to get your endpoint
BASE_URL = "https://uatapi.stratifyd.com:443"

#State the name of the output file you are trying to export
outputFileName = 'sample_export.jsonl'


# Generate headers to use in the Requests Get/Post commands
headers = {
'Accept': "application/json",
'X-Taste-User': credential['key'],
'X-Taste-Key': credential['token'],
'X-Taste-App': credential['domain']
}

# insert the stream FID for the data stream of your choice
stream_fid = '5e84b2bb6bc310001921bb75'

# insert original data field names that you want to export
export_fields = [
"_iso_date"
#'_data_source'
#...
]

# insert taxonomy analysis FID
analysis_fid = '5ee7c2d62f86610017d302b3'

# insert taxonomy field names that you want to export
analysis_output_fields = [
"taxonomy.labels"
]

# insert sentiment analysis FID
sentiment_fid = '5ee7c2d65dc5110018239bb3'

# insert taxonomy field names that you want to export
sentiment_output_fields = [
"neural_sentiment_en.score"
]


# Generate proper export field names
formatted_export_fields = [('%s$$%s.%s' % (stream_fid, stream_fid, field)) for field in export_fields]
formatted_sentiment_fields = [('%s$$%s.%s' % (stream_fid, sentiment_fid, field)) for field in sentiment_output_fields]
formatted_output_fields = [('%s$$%s.%s' % (stream_fid, analysis_fid, field)) for field in analysis_output_fields]


# Create the array of field to export in the proper format
formatted_export_fields += formatted_sentiment_fields
formatted_export_fields += formatted_output_fields


#Specify the amount of records you would like to pull per API request
doc_count = 500

# Specify how many days worth of data you want to export, if 0 - export all
numOfDays = 0
date_field ="_iso_date" # name of the field that has the date
end_timestamp = (datetime.today())
start_timestamp = (datetime.today() - timedelta(days=numOfDays))
print('End time is: {0} Start time is: {1}'.format(str(end_timestamp), str(start_timestamp)))

# Convert dates into time_stamps
end_timestamp = calendar.timegm(end_timestamp.timetuple())
start_timestamp = calendar.timegm(start_timestamp.timetuple())
print('End time is: {0} Start time is: {1}'.format(str(end_timestamp), str(start_timestamp)))


body = {'return': formatted_export_fields}


# If the date field exist, and you have specified number of days you want the data for
if date_field and numOfDays != 0:
formatted_filter = {"%s$$%s.%s$temporal" % (stream_fid, stream_fid, date_field): {"$lt":end_timestamp, "$gte":start_timestamp}}
body ={'filter': formatted_filter, 'return': formatted_export_fields}
body['filter']['$'] = { '$index': { '$gte': 0, '$lt': doc_count }}

#If either the date field does not exist, or if you want all of the data.
else:
body = {'return': formatted_export_fields, 'filter': {'$': { '$index': { '$gte': 0, '$lt': doc_count }}}}


countme=0

#creating the export file and starting the export
with open(outputFileName, 'w') as fout:
url = '{0}/actions/streams/{1}{2}'.format(BASE_URL, stream_fid, '/reconstruct')

response = requests.post(url, json=body, verify=False, headers=headers)

payload = response.json().get('payload', {})
current_page_length = len(payload.get('docs'))

for doc in payload.get('docs', []):
record = {}
for key, val in doc.items():
for i, field in enumerate(export_fields):
record[field] = doc[formatted_export_fields[i]]
for i, field in enumerate(analysis_output_fields):
if formatted_output_fields[i] in doc:
record[field] = doc[formatted_output_fields[i]]
for i, field in enumerate(sentiment_output_fields):
if formatted_sentiment_fields[i] in doc:
record[field] = doc[formatted_sentiment_fields[i]]
fout.write(json.dumps(record)+ "\n")

#repeat the loop until there is no more data to export
while(current_page_length >= doc_count):

current_page_length = len(payload.get('docs'))
print('current_page_length is: {0}'.format(str(current_page_length)))
countme += 1
print('Loop number: {0}'.format(str(countme)))

url = '{0}/actions/streams/{1}{2}'.format(BASE_URL, stream_fid, '/reconstruct')

body['filter']['$'] = { '$index': { '$gte': doc_count*countme, '$lt': doc_count*(countme+1) }}

response = requests.post(url, verify=False, headers=headers, json=body)
payload = response.json().get('payload', {})

for doc in payload.get('docs', []):
record = {}
for key, val in doc.items():
for i, field in enumerate(export_fields):
record[field] = doc[formatted_export_fields[i]]
for i, field in enumerate(analysis_output_fields):
if formatted_output_fields[i] in doc:
record[field] = doc[formatted_output_fields[i]]
for i, field in enumerate(sentiment_output_fields):
if formatted_sentiment_fields[i] in doc:
record[field] = doc[formatted_sentiment_fields[i]]
fout.write(json.dumps(record)+ "\n")
Did this answer your question?