SQL 语句中的 Databricks 错误:ParseException:不匹配的输入“Service_Date”

Posted

技术标签:

【中文标题】SQL 语句中的 Databricks 错误:ParseException:不匹配的输入“Service_Date”【英文标题】:Databricks Error in SQL statement: ParseException: mismatched input 'Service_Date 【发布时间】:2021-08-10 20:00:53 【问题描述】:

我正在使用 spark SQL 在 Azure Databricks 中运行此脚本,收到此错误:

SQL 语句错误:ParseException: 不匹配的输入 'Service_Date' 期望 '(', 'DESC', 'DESCRIBE', 'FROM', 'MAP', 'REDUCE', 'SELECT', 'TABLE', 'VALUES', 'WITH'(第 16 行, 位置 0)

我是 Databricks 的新手,所以想知道有关解决此问题的任何提示。

 
CREATE OR REPLACE VIEW operations_staging.v_claims AS (


/
WITH Snapshot_Date AS
(
SELECT T1.claim_number,
T1.source_system,
MAX(T1.snapshot_date) snapshot_date
FROM bhc_claim.medical_claim T1
GROUP BY T1.claim_number,
T1.source_system
),
/

Service_Date AS
(
SELECT T1.claim_number,
T1.source_system,
MIN(T1.service_from_date) claim_service_date
FROM bhc_claim.medical_claim_detail T1
GROUP BY T1.claim_number,
T1.source_system
),

Pend_Step1 AS
(
SELECT T1.claim_num Claim_Number,
T1.tax_id,
T1.provider provider_name,
TO_DATE(T1.incurred,"MM/dd/yyyy") Service_Date,
TO_DATE(T1.received,"MM/dd/yyyy") Received_Date,
TO_DATE(T1.report_date,"MM/dd/yyyy") Report_Date,
T1.pending_amount Pend_Amount,
T1.pend_code Pend_Code,
T1.pend_code_description Pend_Code_Desc,
T1.hold_reason_code Hold_Code,
T1.hold_code_description Hold_Code_Desc
FROM loomis_2021.pu T1 -- 277,011
GROUP BY T1.claim_num,
T1.tax_id,
T1.provider,
T1.incurred,
T1.received,
T1.report_date
T1.pending_amount,
T1.pend_code,
T1.pend_code_description,
T1.hold_reason_code,
T1.hold_code_description
),

Pend_Step2 AS
(
SELECT Claim_Number,
concat_ws(",", collect_set(DISTINCT T1.Hold_Code)) Hold_Code,
concat_ws(",", collect_set(DISTINCT T1.Pend_Code)) Pend_Code
FROM Pend_Step1 T1
GROUP BY Claim_Number
),

Pend_Step3 AS
(
SELECT T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Report_Date,
T2.Hold_Code,
T2.Pend_Code,
T1.Pend_Amount
FROM Pend_Step1 T1
LEFT JOIN Pend_Step2 T2
ON T1.Claim_Number = T2.Claim_Number
GROUP BY T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Report_Date,
T2.Hold_Code,
T2.Pend_Code,
T1.Pend_Amount
),

Pend_Step4 AS
(
SELECT T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Hold_Code,
T1.Pend_Code,
SUM(T1.Pend_Amount) Pend_Amount
FROM Pend_Step3 T1 -- 277,011
GROUP BY T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Hold_Code,
T1.Pend_Code
),

Paid_Previous_Step1 AS
(
SELECT MAX(claim_received_date) Max_Received_Date
FROM bhc_claim.medical_claim
WHERE DAYOFWEEK(claim_received_date) = 1
AND claim_received_date < NOW()
),

Paid_Previous_Step2 AS
(
SELECT T1.`claim_#` Claim_Number,
T1.tax_id Tax_ID,
CASE WHEN T1.provider_group_name IS NOT NULL THEN T1.provider_group_name
ELSE CONCAT(T1.provider_first_name,T1.provider_last_name)
END provider_name,
TO_DATE(T1.last_refresh_date,"yyyyMMdd") Refresh_Date,
TO_DATE(T1.received_date,"yyyyMMdd") Received_Date,
TO_DATE(T1.processed_date,"yyyyMMdd") Processed_Date,
MIN(TO_DATE(T1.from_dos,"yyyyMMdd")) Service_Date,
'Issued' Status,
SUM(T1.covered) Paid_Amount,
SUM(T1.billed) Billed_Amount
FROM Loomis_2021.paid_previous T1
CROSS JOIN Paid_Previous_Step1 T2
WHERE TO_DATE(T1.last_refresh_date,"yyyyMMdd") >= T2.Max_Received_Date
GROUP BY T1.`claim_#`,
T1.tax_id,
CASE WHEN T1.provider_group_name IS NOT NULL THEN T1.provider_group_name
ELSE CONCAT(T1.provider_first_name,T1.provider_last_name)
END,
T1.last_refresh_date,
T1.received_date,
T1.processed_date
),

Paid_Previous_Step3 AS
(
SELECT T1.Claim_Number,
MAX(T1.Refresh_Date) Refresh_Date
FROM Paid_Previous_Step2 T1
GROUP BY T1.Claim_Number
),

Paid_Previous_Step4 AS
(
SELECT T1.Claim_Number,
T1.Tax_ID,
T1.provider_name,
T1.Refresh_Date,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Status,
T1.Paid_Amount,
T1.Billed_Amount
FROM Paid_Previous_Step2 T1 -- 60,746
INNER JOIN Paid_Previous_Step3 T2 -- 60,746
ON T1.Claim_Number = T2.Claim_Number --3,816,359
AND T1.Refresh_Date = T2.Refresh_Date
),

Claim_Detail_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
MAX(T1.snapshot_date) snapshot_date
FROM bhc_claim.medical_claim_detail T1 -- 277,011
GROUP BY T1.claim_number,
T1.source_system
),

Revenue_Code_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
T1.snapshot_date,
concat_ws(",", collect_set(DISTINCT T2.revenue_code)) Revenue_Code
FROM Claim_Detail_Step1 T1
INNER JOIN bhc_claim.medical_claim_detail T2
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date -- 277,011
GROUP BY T1.claim_number,
T1.source_system,
T1.snapshot_date
),

Provider_Detail_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
T2.provider_npi,
T2.provider_tin,
T2.provider_type,
CASE WHEN provider_group_name IS NOT NULL THEN provider_group_name
ELSE CONCAT(T2.provider_first_name,T2.provider_last_name)
END provider_name,
T2.sequence_number
FROM Claim_Detail_Step1 T1
INNER JOIN bhc_claim.medical_claim_detail_provider T2
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date
WHERE T2.provider_type = 'BILLING'
GROUP BY T1.claim_number,
T1.source_system,
T2.provider_npi,
T2.provider_tin,
T2.provider_type,
CASE WHEN provider_group_name IS NOT NULL THEN provider_group_name
ELSE CONCAT(T2.provider_first_name,T2.provider_last_name)
END,
T2.sequence_number
),

Market_Detail_Step1 AS
(
SELECT REPLACE(T1.hios_plan_id_standard_component_variant, '-', '') Hios_ID,
MAX(T1.plan_year) plan_year
FROM bdp.plans T1
GROUP BY 1
),

Market_Detail_Step2 AS
(
SELECT T1.Hios_ID,
T1.plan_year,
TRIM(T2.market) market
FROM Market_Detail_Step1 T1
INNER JOIN bdp.plans T2
ON T1.Hios_ID = REPLACE(T2.hios_plan_id_standard_component_variant, '-', '')
AND T1.plan_year = T2.plan_year
),

Market_Detail_Step3 AS
(
SELECT T1.member_id,
TO_DATE(T1.source_start_date) source_start_date,
TO_DATE(T1.source_end_date) source_end_date,
T2.market
FROM dev.enrollment__base_enrollment T1 -- 3,568,717 | 3,568,717
LEFT JOIN Market_Detail_Step2 T2
ON T2.Hios_ID = T1.plan_id
WHERE T1.source_start_date <> T1.source_end_date
AND T1.effectuation_date_utc IS NOT NULL
GROUP BY T1.member_id,
TO_DATE(T1.source_start_date),
TO_DATE(T1.source_end_date),
T2.market
),

Remark_Code_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
concat_ws(",", collect_set(DISTINCT T1.remark_code)) Remark_Code
FROM bhc_claim.medical_claim_detail_remark T1 -- 3,731,653 | 3,731,653
INNER JOIN bhc_claim.medical_claim_detail T2
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date
GROUP BY T1.claim_number,
T1.source_system
),

Integration_Step1 AS
(
SELECT CASE WHEN T1.claim_number IS NOT NULL THEN T1.claim_number
WHEN T7.Claim_Number IS NOT NULL THEN T7.Claim_Number
WHEN T10.Claim_Number IS NOT NULL THEN T10.Claim_Number
END Claim_Number,
TO_DATE(CASE WHEN T1.claim_received_date IS NOT NULL THEN T1.claim_received_date
WHEN T7.Received_Date IS NOT NULL THEN T7.Received_Date
WHEN T10.Received_Date IS NOT NULL THEN T10.Received_Date
END) Received_Date,
TO_DATE(CASE WHEN T1.claim_processed_date IS NOT NULL THEN T1.claim_processed_date
WHEN T10.Processed_Date IS NOT NULL THEN T10.Processed_Date
END) Processed_Date,
TO_DATE(CASE WHEN T3.claim_service_date IS NOT NULL THEN T3.claim_service_date
WHEN T7.Service_Date IS NOT NULL THEN T7.Service_Date
WHEN T10.Service_Date IS NOT NULL THEN T10.Service_Date
END) Service_Date,
TO_DATE(T1.check_date) Check_Date,
CASE WHEN T7.Claim_Number IS NOT NULL THEN 'Loomis'
WHEN T10.Claim_Number IS NOT NULL THEN 'Loomis'
ELSE T1.source_system
END Source_System,
CASE WHEN T1.claim_status_description = 'P' AND T1.total_excluded_amount = T1.total_original_claim_amount THEN 'Denied'
WHEN T10.status = 'Issued' THEN 'Issued'
ELSE T1.claim_status_description
END Status,
T1.payment_status_description Payment_Status,
CASE WHEN T7.Claim_Number IS NOT NULL
THEN 'Pending' END Pend_Status,
CASE WHEN T10.Claim_Number IS NOT NULL
THEN 'Paid Previous' END Paid_Previous_Status,
T1.claim_submission_type_description Submission_Type,
T1.line_of_business Segment,
T7.Hold_Code,
T7.Pend_Code,
T5.Remark_Code,
T9.Revenue_Code,
CASE WHEN T7.Pend_Code IN('17','18','19','44','60','63','86','89','97')
OR T7.Hold_Code LIKE('%13%')
OR T7.Hold_Code LIKE('%70%')
THEN 'Bright'
ELSE 'Loomis'
END Bucket_Owner,
T1.member_id Member_ID,
T1.subscriber_id Subscriber_ID,
T1.subscriber_group_number Subscriber_Group,
T4.provider_npi,
CASE WHEN T4.provider_tin IS NOT NULL THEN T4.provider_tin
WHEN T7.tax_id IS NOT NULL THEN T7.tax_id
WHEN T10.Tax_ID IS NOT NULL THEN T10.Tax_ID
END provider_tin,
CASE WHEN T4.provider_name IS NOT NULL THEN T4.provider_name
WHEN T7.provider_name IS NOT NULL THEN T7.provider_name
WHEN T10.provider_name IS NOT NULL THEN T10.provider_name
END provider_name,
CASE WHEN T10.Billed_Amount IS NOT NULL THEN T10.Billed_Amount
ELSE (T1.total_billed_amount/100)
END Billed_Amount,
(T1.total_processed_amount/100) Processed_Amount,
CASE WHEN T10.Paid_Amount IS NOT NULL THEN T10.Paid_Amount
ELSE (T1.total_paid_amount/100)
END Paid_Amount,
(T1.total_interest_paid_amount/100) Interest_Paid_Amount,
T7.Pend_Amount
FROM bhc_claim.medical_claim T1 -- 3,472,165
INNER JOIN Snapshot_Date T2 -- 3,472,165
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date
LEFT JOIN Service_Date T3 -- 3,472,165
ON T1.claim_number = T3.claim_number
AND T1.source_system = T3.source_system
LEFT JOIN Provider_Detail_Step1 T4 -- 3,498,170 MATCH
ON T1.claim_number = T4.claim_number
AND T1.source_system = T4.source_system
LEFT JOIN Remark_Code_Step1 T5
ON T1.claim_number = T5.claim_number
AND T1.source_system = T5.source_system
FULL OUTER JOIN Pend_Step4 T7 -- 3,472,419 | Match Disctinct 3,472,419
ON LEFT(T1.claim_number,10) = T7.Claim_Number
LEFT JOIN Revenue_Code_Step1 T9
ON T1.claim_number = T9.claim_number
AND T1.source_system = T9.source_system
FULL OUTER JOIN Paid_Previous_Step4 T10 --3,816,359 | Match Disctinct 3,472,419
ON LEFT(T1.claim_number,10) = T10.Claim_Number
),

FINAL AS
(
SELECT T1.Claim_Number,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Report_Date
T1.Check_Date,
T1.Source_System,
T1.Status,
T1.Payment_Status,
T1.Pend_Status,
T1.Paid_Previous_Status,
T1.Submission_Type,
T1.Segment,
MIN(T2.Market) Market,
T1.Hold_Code,
T1.Pend_Code,
T1.Remark_Code,
T1.Revenue_Code,
T1.Bucket_Owner,
T1.Member_ID,
T1.Subscriber_ID,
T1.Subscriber_Group,
T1.provider_npi,
T1.provider_tin,
T1.provider_name,
T1.Billed_Amount,
T1.Processed_Amount,
T1.Paid_Amount,
T1.Interest_Paid_Amount,
T1.Pend_Amount
FROM Integration_Step1 T1
LEFT JOIN Market_Detail_Step3 T2
ON T1.member_id = T2.member_id
AND T1.Service_Date >= T2.source_start_date
AND T1.Service_Date <= T2.source_end_date
GROUP BY T1.Claim_Number,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Report_Date,
T1.Check_Date,
T1.Source_System,
T1.Status,
T1.Payment_Status,
T1.Pend_Status,
T1.Paid_Previous_Status,
T1.Submission_Type,
T1.Segment,
T1.Hold_Code,
T1.Pend_Code,
T1.Remark_Code,
T1.Revenue_Code,
T1.Bucket_Owner,
T1.Member_ID,
T1.Subscriber_ID,
T1.Subscriber_Group,
T1.provider_npi,
T1.provider_tin,
T1.provider_name,
T1.Billed_Amount,
T1.Processed_Amount,
T1.Paid_Amount,
T1.Interest_Paid_Amount,
T1.Pend_Amount
)

SELECT T1.Claim_Number,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Report_Date,
T1.Check_Date,
T1.Source_System,
T1.Status,
T1.Payment_Status,
T1.Pend_Status,
T1.Paid_Previous_Status,
T1.Submission_Type,
T1.Segment,
T1.Market,
T1.Hold_Code,
T1.Pend_Code,
T1.Remark_Code,
T1.Revenue_Code,
T1.Bucket_Owner,
T1.Member_ID,
T1.Subscriber_ID,
T1.Subscriber_Group,
T1.provider_npi,
T1.provider_tin,
T1.provider_name,
T1.Billed_Amount,
T1.Processed_Amount,
T1.Paid_Amount,
T1.Interest_Paid_Amount,
T1.Pend_Amount
FROM FINAL T1-- 3,789,713
)
;


【问题讨论】:

【参考方案1】:

我认为您打算评论和使用“/”和“/”是不正确的,因此是错误的。能否请您删除它们并测试查询?

【讨论】:

以上是关于SQL 语句中的 Databricks 错误:ParseException:不匹配的输入“Service_Date”的主要内容,如果未能解决你的问题,请参考以下文章

如何从 Databricks 中的 SQL 语句输出创建变量

spark sql如何在databricks中创建表

如何使用 Spark sql 在 Databricks 中使用内部联接更新 Databricks Delta 表

SQL 错误:“嵌套的 INSERT、UPDATE、DELETE 或 MERGE 语句必须具有 OUTPUT 子句。” - 在 Azure Databricks 中执行时

如何计算 Spark SQL(Databricks)中表中的列数?

在 Python 中读取 SQL 文件以通过 databricks 对 JDBC 运行