如何创建 Jsonpath 文件以在 redshift 中加载数据
Posted
技术标签:
【中文标题】如何创建 Jsonpath 文件以在 redshift 中加载数据【英文标题】:how to create Jsonpath file to load data in redshift 【发布时间】:2017-10-16 11:18:00 【问题描述】:我的 Json 示例记录之一:
"viewerId": "Ext-04835139",
"sid5": "269410578:2995631181:2211755370:3307088398:33879957",
"firstHbTimems": 1.506283958371E12,
"ipAddress": "74.58.57.31",
"streamUrl": "https://dc3-ll-livedazn-dznlivejp.hs.llnwd.net/live/channel/1007/all/stream.m3u8?event_id=61824040049&h=c912885e2a69ffa7ea84f45dc18c004d",
"asset": "[nlq9biy7trxl1cjceg70rogvd] Saints @ Panthers",
"os": "ios",
"osVersion": "10.3.3",
"deviceModel": "iPhone",
"geoInfo":
"city": 63666,
"state": 3851,
"isp": 120,
"longitudeTimes1K": -73562,
"country": 37,
"dma": 0,
"asn": 5769,
"latitudeTimes1K": 45502,
"publicIP": 1245329695
,
"totalPlayingTime": 4.097,
"totalBufferingTime": 0.0,
"VST": 1.411,
"avgBitrate": 202.0,
"playStateSwitch": [
"'seqNum': 0, 'eventNum': 0, 'sessionTimeMs': 7, 'startPlayState': 'eUnknown', 'endPlayState': 'eBuffering'",
"'seqNum': 1, 'eventNum': 5, 'sessionTimeMs': 1411, 'startPlayState': 'eBuffering', 'endPlayState': 'ePlaying'"
],
"bitrateSwitch": [
],
"errorEvent": [
],
"tags":
"LSsportName": "Football",
"c3.device.model": "iPhone+6+Plus",
"LSvideoType": "LIVE",
"c3.device.ua": "DAZN%2F5560+CFNetwork%2F811.5.4+Darwin%2F16.7.0",
"LSfixtureId": "5trxst8tv7slixckvawmtf949",
"genre": "Sport",
"LScompetitionName": "NFL+Game+Pass",
"show": "NFL+Game+Pass",
"c3.cmp.0._type": "DEVATLAS",
"c3.protocol.type": "cws",
"LSsportId": "9ita1e50vxttzd1xll3iyaulu",
"stageId": "8hm0ew6b8m7907ty8vy8tu4tl",
"LSvenueId": "na",
"syndicator": "None",
"applicationVersion": "2.0.8",
"deviceConnectionType": "wifi",
"c3.client.marketingName": "iPhone+6+Plus",
"playerVersion": "1.2.6.0",
"c3.cmp.0._id": "da",
"drmType": "AES128",
"c3.sh": "dc3-ll-livedazn-dznlivejp.hs.llnwd.net",
"c3.pt.ver": "10.3.3",
"applicationType": "ios",
"c3.viewer.id": "Ext-04835139",
"LSinterfaceLanguage": "en",
"c3.pt.os": "IOS",
"playerVendor": "Open+Source",
"c3.client.brand": "Apple",
"c3.cws.sf": "7",
"c3.cmp.0._ver": "1",
"c3.client.hwType": "Mobile+Phone",
"c3.pt.os.ver": "10.3.3",
"isAd": "false",
"c3.device.cver.bld": "2.124.0.33357",
"stageName": "Regular+Season",
"c3.client.osName": "iOS",
"contentType": "Live",
"c3.device.cver": "2.124.0",
"LScompetitionId": "wy3kluvb4efae1of0d8146c1",
"expireDate": "na",
"c3.client.model": "iPhone+6+Plus",
"c3.client.manufacturer": "Apple",
"LSproductionValue": "na",
"pubDate": "2017-09-23",
"c3.cluster.name": "production",
"accountType": "FreeTrial",
"c3.adaptor.type": "eCws1_7",
"c3.device.brand": "iPhone",
"c3.pt.br": "Non-Browser+Apps",
"contentId": "nlq9biy7trxl1cjceg70rogvd",
"streamingProtocol": "FairPlay",
"LSvenueName": "na",
"c3.device.type": "Mobile",
"c3.protocol.level": "2.4",
"c3.player.name": "AVPlayer",
"contentName": "Saints+%40+Panthers",
"c3.device.manufacturer": "Apple",
"c3.framework": "AVFoundation",
"c3.pt": "iOS",
"c3.device.ver": "6+Plus",
"c3.video.isLive": "T",
"c3.cmp.0._cfg_ver": "1504808821",
"c3.cws.clv": "2.124.0.33357",
"LScountryCode": "America%2FEl_Salvador"
,
"playername": "AVPlayer",
"isLive": "T",
"playerVersion": "1.2.6.0"
如何创建 jsonpath 文件以在 redshift 中加载它?
谢谢
【问题讨论】:
【参考方案1】:您的 json 中有一个嵌套数组 - 因此 jsonpath 不会为您扩展它。
对于如何继续,您有两个选择:
-
您可以在更高级别加载数据(例如 playStateSwitch
而不是其中的 seqNum )-然后尝试使用 redshift 来
处理该数据。这可能很棘手,因为您不能爆炸 json
来自 redshift 数组的数据。
您可以使用例如预处理数据aws 胶水/python/pyspark
或其他可以处理这些嵌套数组的 etl 工具。
【讨论】:
【参考方案2】:这一切都取决于最终目标,从上面的描述中并不清楚。 我将按以下顺序处理解决方案
定义需要加载到 Redshift 中的字段和数组值。如果需要复制所有记录,那么接下来检查的是如何处理多个数组记录。
如果 JSON 源中缺少数组或键/值,则 JSONPath 将无法按原样工作 - 因此,最好在将数据集复制到 RS 之前更新 JSON 以添加缺少的数组。 JSON 更新可以使用 Linux 命令或 JP 或 refer additional reference 等外部工具完成
如果需要嵌套数组中的所有值,则另一种解决方法是使用外部表 an example
否则JSONPATH文件可以开发成这种格式
"jsonpaths": [
"$.viewerId", ///root level fields
...
"$geoInfo.city", /// object hierarchy
...
"$playStateSwitch[0].seqNum" ///define the required array number
...
]
希望,这会有所帮助。
【讨论】:
以上是关于如何创建 Jsonpath 文件以在 redshift 中加载数据的主要内容,如果未能解决你的问题,请参考以下文章
如何创建一个 .INI 文件以在 Java 中存储一些设置?
如何创建“另存为”和“加载”对话框以在 javascript 中创建/加载 json 文件/数据?