SELECT
[DISTINCT] [* | [ expression [[AS] column_name_alias] [, ...] ]
[INTO #temp_table_name]
[FROM table_name]
[WHERE condition [, ...] ]
[GROUP BY expression [, ...] ]
[HAVING condition [, ...] ]
[ORDER BY expression [ASC | DESC] [, ...] ]
[LIMIT row_count]
[WITH (option_name=option_value] [, ...]) ] [;]
option_name:=IDENTIFIER
option_value:= NUMBER | STRING
SELECT [DISTINCT] [* | [ expression [[AS] column_name_alias] [, ...] ]
FROM #temp_table_name
[[INNER | OUTER | CROSS] JOIN #temp_table_name [alias] ON condition
[join_statement]
[join_statement]
...
]
[WHERE condition [, ...] ]
[GROUP BY expression [, ...] ]
[HAVING condition [, ...] ]
[ORDER BY expression [ASC | DESC] [, ...] ]
[LIMIT row_count]
select_into_temp;
[select_into_temp;]
[select_into_temp;]
...
select_from_temp_table;
--some comment in single line
|
/* some block comment */
select_from_temp_table_1
UNION | UNION ALL
select_from_temp_table_2
[UNION | UNION ALL]
select_from_temp_table_3
...
...
CREATE PROCEDURE proc_name
[@parameter_name [= 'Some Default Value'] ]
[@parameter_name [= 'Some Default Value'] ]
...
...
AS
sql-statement;
[sql-statement | sql-temp-table-usage] ;
[sql-statement | sql-temp-table-usage] ;
...
...
Executing PROCEDURE
EXEC proc_name [value1] [,value2] ... [,valueN]
Deleting PROCEDURE
DROP PROCEDURE proc_name
SELECT * FROM $ WITH (SRC='zappysys-public-bucket/cust-1.xml')
SELECT * FROM $
WITH (
SRC='zappysys-public-bucket/cust*-?.xml'
--,RECURSIVE='True' --Include files from sub folder
)
SELECT * FROM [data.row]
WITH (
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.xml'
--SRC='zappysys-public-bucket/somefile.xml'
--SRC='zappysys-public-bucket/some*.xml'
DATA='
<data>
<row><id>1</id><name>AAA</name></row>
<row><id>2</id><name>BBB</name></row>
</data>'
,ElementsToTreatAsArray='row' --if you don't specify this option then your query may fail when only 1 <row> tag is found (use comma for multiple names e.g. order,item,row).
)
SELECT * FROM [data.row]
WITH
(DATA='
<data>
<row><id>1</id><checknumber>0001</checknumber><name>AAA</name></row>
<row><id>2</id><checknumber>0002</checknumber><name>BBB</name></row>
</data>'
--//Method-1: >>>DIRECT META (compact mode)<<<
,META='id:int;checknumber:string(10);name:string(10)'
-- Add: @OverrideMode:1 in the column list to override meta for only specific column(s) - In below example we are changing default datatype/length for id and checknumber, all other columns are dynamic so dont have to supply in META clause
--,META='@OverrideMode:1;id:int;checknumber:string(10)'
--//Method-2: >>>DIRECT META (xml mode)<<<
--,META='[{Name:"id",Type:"Int32"},{Name:"checknumber",Type:"String",Length:10},{Name:"name",Type:"String",Length:10}]'
-- Add: @OverrideMode entry in the column list to override meta for only specific column(s). No need to set Type for that entry - In below example we are changing default datatype/length for id and checknumber, all other columns are dynamic so dont have to supply in META clause
--,META='[{Name:"@OverrideMode"}, {Name:"id",Type:"Int32"},{Name:"checknumber",Type:"String",Length:10}]'
--//Method-3: >>>META from File <<<
--,META='c:/data/meta.xml'
--//Method-4: >>>META from Internal Storage - Just refer entry by name <<<
--,META='MY_META_STORAGE_NAME'
)
SELECT * FROM [data.row]
WITH (
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.xml'
--SRC='zappysys-public-bucket/somefile.xml'
--SRC='zappysys-public-bucket/some*.xml'
DATA=@'<data>\r\n<row><id>1</id><name>AA\u0041</name></row>\r\n<row><id>2</id><name>BBB</name></row>\r\n</data>'
,ElementsToTreatAsArray='row' --if you don't specify this option then your query may fail when only 1 <row> tag is found (use comma for multiple names e.g. order,item,row).
)
SELECT c.title,o.url,o.data
FROM $
WITH(
Src='http://google.com'
,EnableRawOutputModeSingleRow='True'
,RawOutputFilterExpr='<title>([^<]*)<\/title>{{0,1}}||content="([^"]*)"{{0,1}}' --can be Regex, JsonPath or XPath. Use double pipe to split multiple columns
,RawOutputDataRowTemplate=@'{title:"[$1]", content:"[$2]" }' --must be Json template. Comment this line to generate default columns e.g. col1, col2...If you comment this then use c.col1 and c.col2 instread of c.title and c.content in Select and join1_xxxxx attributes
,RawOutputExtractMode='Regex' --can be Regex, Xml, Json or None. -- Comment this and RawOutputFilterExpr, RawOutputDataRowTemplate to extract Raw data without column parsing (i.e. returns raw text of URL/Path)
,Alias='c'
,join1_RequestMethod='GET'
,join1_src='mybucket/myservice/[$c.title$]/log.ext'
,join1_RequestContentTypeCode = 'TextPlain'
,join1_alias='o'
,join1_EnableRawOutputModeSingleRow='False'
)
SELECT
substr('ZappySys', 1, 5)
, printf('%s %s %.2f', 'v1', CompanyName, CompanyId)
, ('Str1' || CompanyName || 'Str2') as ConcatExample
, upper('ZappySys')
, lower('ZappySys')
, replace('ZappySys', 'Sys', 'XYZ')
, instr('ZappySys', 'Sys')
, trim(' Zappy Sys ') trim1
, trim('@@@ZappySys@@@', '@') trim2
, rtrim(' ZappySys ')
, ltrim(' ZappySys ')
, length( CompanyName )
FROM $ WITH (
FILTER='$.Info.Rows[*]',
DATA='
<Info>
<Rows><CompanyId>1000</CompanyId><CompanyName>ZappySys</CompanyName></Rows>
<Rows><CompanyId>1001</CompanyId><CompanyName>Microsoft</CompanyName></Rows>
<Rows><CompanyId>1002</CompanyId><CompanyName>Amazon</CompanyName></Rows>
</Info>
')
SELECT
COALESCE(10,20) coalesce1 --returns first non-null arg
,COALESCE(null,20) coalesce2 --returns first non-null arg
,COALESCE(null,20,30,40) coalesce3 --returns first non-null arg
,IFNULL(null,20) --returns first non-null arg. Same function as COALESCE but simple (just two arguments allowed)
,NULLIF(20,20) nullif1 --returns null if both argument same
,NULLIF(10,20) nullif2 --returns null if both argument same
FROM $ WITH (
FILTER='$.Info.Rows[*]',
DATA='
<Info>
<Rows><CompanyId>1000</CompanyId><CompanyName>ZappySys</CompanyName></Rows>
<Rows><CompanyId>1001</CompanyId><CompanyName>Microsoft</CompanyName></Rows>
</Info>
')
SELECT
DATE('now') date_now
,STRFTIME('%Y-%m-%dT%H:%M:%f','now') formatted_Date_time
,DATETIME('now') datetime_now
,DATE('now','localtime') datetime_now_local
,TIME('now') time_now
,JULIANDAY('now')
,DATE('now', 'start of month', '+1 month' , '-1 day' ) dt_modify1
,DATE('now', '+5 day') dt_modify2
FROM $ WITH (
FILTER='$.Info.Rows[*]',
DATA='
<Info>
<Rows><CompanyId>1000</CompanyId><CompanyName>ZappySys</CompanyName></Rows>
<Rows><CompanyId>1001</CompanyId><CompanyName>Microsoft</CompanyName></Rows>
<Rows><CompanyId>1002</CompanyId><CompanyName>Amazon</CompanyName></Rows>
</Info>
')
SELECT
abs(-500)
,random()
,round(1000.236, 2)
FROM $ WITH (
FILTER='$.Info.Rows[*]',
DATA='
<Info>
<Rows><CompanyId>1000</CompanyId><CompanyName>ZappySys</CompanyName></Rows>
<Rows><CompanyId>1001</CompanyId><CompanyName>Microsoft</CompanyName></Rows>
</Info>
')
SELECT
substr('ZappySys', 1, 5)
, printf('%s %s %.2f', 'v1', CompanyName, CompanyId)
, ('Str1' || CompanyName || 'Str2') as ConcatExample
, upper('ZappySys')
, lower('ZappySys')
, replace('ZappySys', 'Sys', 'XYZ')
, instr('ZappySys', 'Sys')
, trim(' Zappy Sys ') trim1
, trim('@@@ZappySys@@@', '@') trim2
, rtrim(' ZappySys ')
, ltrim(' ZappySys ')
, length( CompanyName )
FROM $ WITH (
FILTER='$.Info.Rows[*]',
DATA='
<Info>
<Rows><CompanyId>1000</CompanyId><CompanyName>ZappySys</CompanyName></Rows>
<Rows><CompanyId>1001</CompanyId><CompanyName>Microsoft</CompanyName></Rows>
<Rows><CompanyId>1002</CompanyId><CompanyName>Amazon</CompanyName></Rows>
</Info>
')
SELECT
regex_ismatch(SSN, '\d{3}-\d{3}-\d{4}') IsValidSSN, --check pattern found
regex_replace(SSN, '\d','*') MaskedSSN, --search/replace pattern
regex_search(SSN, '\d{4}') SSNLast4Digits, --extract substring using pattern search
regex_search(Email, '(?<name>\w+)@(?<domain>\w+.com)',0, 2) DomainOf1stEmailByIndex, --extract 1st email (zero based index) and get domain name (by group name). For 2nd argument you can use numeric index or named argument. Both arguments are zero based index.
regex_search(Email, '(?<name>\w+)@(?<domain>\w+.com)',2, 2) DomainOf3rdEmailByIndex, --extract 3rd email (zero based index) and get 2nd part of match (i.e. domain name). Both arguments are zero based index.
regex_search('a@x.com | b@y.com | c@z.com', '(?<name>\w+)@(?<domain>\w+.com)','-0', 2) DomainOfLastEmail, --extract last email (use negative sign in quote - zero based index) and get 2nd part of match (i.e. domain name). Both arguments are zero based index.
regex_search(
'a@x.com | b@y.com | c@z.com'
, '(?<name>\w+)@(?<domain>\w+.com)'
, '*' --occurrence match index ( * , N or -N ) negative number will do reverse find (e.g. to get last match use -0 , to get second last use -1)
, 2 --group index from match (default is full match or -1)
--, 0 -- ignore case for pattern match (0=strict match, 1=ignore case, default is 0)
--, 0 -- throw error if not matched 0=don't throw error, 1=throw error
--, '#' -- separator for multi match
) GetAllDomains --extract domains from all emails and show as email (zero based index) and get 2nd part of match (i.e. domain name). Both arguments are zero based index.
FROM $ WITH (Filter='$.data.row[*]', DATA= '<data>
<row><SSN>111-222-4444</SSN><Email>a@x.com | b@y.com | c@z.com</Email></row>
<row><SSN>111-X2Z-AB44</SSN><Email>a@x.com | b@y.com | c@z.com</Email></row>
</data>')
SELECT * FROM $ WITH(SRC='mybucket/myservice/myfile_<<yyyy-MM-dd,FUN_TODAY>>.dat') --read today's file
SELECT RTRIM('trimmed ') Col1,
DATE('now', '+5 day') Col2
SELECT * FROM [data.row]
WITH (
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.xml'
--SRC='zappysys-public-bucket/somefile.xml'
--SRC='zappysys-public-bucket/some*.xml'
DATA='
<data>
<row><_id>1</_id><_name>AAA</_name></row>
<row><_id>2</_id><_name>BBB</_name></row>
</data>'
,ElementsToTreatAsArray='row' --if you don't specify this option then your query may fail when only 1 <row> tag is found (use comma for multiple names e.g. order,item,row).
,EnableCustomReplace=1
,SearchFor='<(/)?_(\w+)>--regex' --//or use --regex-ic (case-insensitive) or no suffix for plain text search.
,ReplaceWith='<$1$2>'
)
SELECT * FROM $
WITH (
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.xml'
--SRC='zappysys-public-bucket/somefile.xml'
--SRC='zappysys-public-bucket/some*.xml'
DATA='
<data parentCol1="p1" parentCol2="p2">
<row><id>1</id><name>AAA</name></row>
<row><id>2</id><name>BBB</name></row>
</data>'
,Filter='$.data.row[*]' -- for more information check this https://zappysys.com/blog/xmlpath-examples-expression-cheetsheet/
--,Filter='$.data.row[?(@name=='AAA')]' --//equal match example
--,Filter='$..[?(@name=='AAA')]' --//equal match example (scan all levels - parent or child)
--,Filter='$.data.row[?(@name=~/AAA|BBB/)]' --//regex match example
,ElementsToTreatAsArray='row' --if you don't specify this option then your query may fail when only 1 <row> tag is found (use comma for multiple names e.g. order,item,row)
,IncludeParentColumns='true' --this will include any columns found above filtered node (e.g. rows).
,ParentColumnPrefix='p_'
)
SELECT
Pivot_Name as ProjectName,
Pivot_Value_id as Id,
Pivot_Value_code as Code
FROM $
WITH (
Filter='$.data',
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.xml'
--SRC='zappysys-public-bucket/some*.xml'
--SRC='zappysys-public-bucket/somefile.xml'
DATA='
<data>
<project1><id>1</id><code>AAA</code></project1>
<project2><id>2</id><code>BBB</code></project2>
<project3><id>3</id><code>CCC</code></project3>
</data>',
EnablePivot='True'
)
SELECT * FROM $
WITH(
Src='zappysys-public-bucket/cust-1.xml'
,TempStorageMode='Disk' --or 'Memory'
--OR
--,TempStorageMode=1 --//Memory=0, Disk=1
)
SELECT * FROM "Root.value"
WHERE OrderDate<=DATETIME('1997-12-31 00:00:00') -- OR use DATE('1997-12-31') DateTime column can be queried this way. You must wrap DATETIME function around it. Also must use 'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd HH:mm:ss.fff' format (where fff is milliseconds)
ORDER BY DATETIME(OrderDate)
WITH (SRC='zappysys-public-bucket/invoices.xml')
SELECT
Country AS Invoice_Country
, SUM(UnitPrice * Quantity) AS Invoice_Total
FROM "Root.value"
WHERE Discount > 0
--AND OrderDate<=DATETIME('1997-12-31 00:00:00') -- OR use DATE('1997-12-31') DateTime column can be queried this way. You must wrap DATETIME function around it. Also must use 'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd HH:mm:ss.fff' format (where fff is milliseconds)
GROUP BY Country
HAVING SUM(UnitPrice * Quantity) > 1000
ORDER BY Invoice_Total DESC --,DATETIME(OrderDate)
LIMIT 3
WITH (SRC='zappysys-public-bucket/invoices.xml')
SELECT
name
, CASE id
WHEN 1 THEN 1+1
WHEN 2 THEN 2+2
ELSE 0
END ThisIsCaseColumn
FROM [data.row]
WITH (
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.xml'
--SRC='zappysys-public-bucket/some*.xml'
--SRC='zappysys-public-bucket/somefile.xml'
DATA='
<data>
<row><id>1</id><name>AAA</name></row>
<row><id>2</id><name>BBB</name></row>
</data>'
,ElementsToTreatAsArray='row' --if you don't specify this option then your query may fail when only 1 <row> tag is found (use comma for multiple names e.g. order,item,row).
)
SELECT * INTO #tmp FROM [data.row]
WITH (
--enter path in SRC or use static value in DATA
--SRC='zappysys-public-bucket/api/data.xml'
--SRC='zappysys-public-bucket/some*.xml'
--SRC='zappysys-public-bucket/somefile.xml'
DATA='
<data>
<row><id>1</id><name>AAA</name></row>
<row><id>2</id><name>BBB</name></row>
<row><id>3</id><name>CCC</name></row>
</data>'
,ElementsToTreatAsArray='row' --if you don't specify this option then your query may fail when only 1 <row> tag is found (use comma for multiple names e.g. order,item,row).
);
--//second statement in the same batch
select * from #tmp
where id > 1
--AND SomeDateColumn=DATETIME('2012-12-31 00:00:00') --DateTime column can be queried this way... Must use 'yyyy-MM-dd HH:mm:ss' format
;
SELECT * INTO #cust FROM [data.row]
WITH (
--enter path in SRC or use static value in DATA
--SRC='zappysys-public-bucket/api/customers.xml'
--SRC='zappysys-public-bucket/cust*.xml'
--SRC='zappysys-public-bucket/customers.xml'
DATA='
<data>
<row><customerid>1</customerid><name>AAA</name></row>
<row><customerid>2</customerid><name>BBB</name></row>
<row><customerid>3</customerid><name>CCC</name></row>
</data>'
,ElementsToTreatAsArray='row' --if you don't specify this option then your query may fail when only 1 <row> tag is found (use comma for multiple names e.g. order,item,row).
);
SELECT * INTO #ord FROM [data.row]
WITH (
DATA='
<data>
<row><orderid>1000</orderid><customerid>1</customerid><orderdate>2012-01-01T00:00:00</orderdate><total>2000.50</total></row>
<row><orderid>1001</orderid><customerid>1</customerid><orderdate>2012-01-02T00:00:00</orderdate><total>1000.50</total></row>
<row><orderid>1002</orderid><customerid>2</customerid><orderdate>2013-01-01T00:00:00</orderdate><total>5000.70</total></row>
<row><orderid>1003</orderid><customerid>2</customerid><orderdate>2013-01-02T00:00:00</orderdate><total>6000.70</total></row>
</data>'
,ElementsToTreatAsArray='row' --if you don't specify this option then your query may fail when only 1 <row> tag is found (use comma for multiple names e.g. order,item,row).
);
--//second statement in the same batch
SELECT c.customerid,o.orderid,o.orderdate,o.total
FROM #cust c
INNER JOIN #ord o ON c.customerid=o.customerid
--LEFT OUTER JOIN #ord o ON c.customerid=o.customerid
--WHERE o.orderdate='2012-01-01 00:00:00' --for datetime column format must be queried in 'yyyy-MM-dd HH:mm:ss' format only
--To Use greater than less than must need to use DATETIME function (e.g. WHERE o.OrderDate<=DATETIME('1997-12-31 23:12:59')
;
SELECT * into #tbl1 FROM [data.row]
WITH (
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.xml'
--SRC='zappysys-public-bucket/some*.xml'
--SRC='zappysys-public-bucket/somefile.xml'
DATA='
<data>
<row><id>1</id><name>AAA</name></row>
<row><id>2</id><name>BBB</name></row>
</data>'
,ElementsToTreatAsArray='row' --if you don't specify this option then your query may fail when only 1 <row> tag is found (use comma for multiple names e.g. order,item,row).
);
SELECT * into #tbl2 FROM [data.row]
WITH (
--enter path in SRC or use static value in DATA
--SRC='zappysys-public-bucket/api/data.xml'
--SRC='zappysys-public-bucket/some*.xml'
--SRC='zappysys-public-bucket/somefile.xml'
DATA='
<data>
<row><id>1</id><name>CCC</name></row>
<row><id>2</id><name>DDD</name></row>
</data>'
,ElementsToTreatAsArray='row' --if you don't specify this option then your query may fail when only 1 <row> tag is found (use comma for multiple names e.g. order,item,row).
);
select * from #tbl1
UNION ALL
select * from #tbl2;
--Whenever Possible try to write Simple Query. When you use WHERE / GROUP BY / ORDER BY Expressions. Driver may invoke Client side processing if server side doesnt support that construct.
--For comment use line comment with two dash (e.g. --my comment) or use block comment for multi line comment (e.g. /*my comment*/ )
--Any query must contain at least two required parts
--data source (or table) names and a list of column names
--column names come first (preceded by SELECT keyword)
--table names follow (preceded by FROM keyword)
--Here is the query that returns all rows with all columns (* stands for all columns) from 'Accounts' table
SELECT * FROM Accounts
--Use quoted identifiers when you have space or other character in names (e.g. Column or Table name contains space)
SELECT ID, "Account Name" FROM "Closed Accounts"
SELECT ID, [Account Name] FROM [Closed Accounts]
--returns all rows from 'Accounts' table, each row will have Id and Amount columns
SELECT Id, Amount FROM Accounts
--returns only those rows of 'Accounts' table that meet next condition: Amount is greater or equal to 1000.0
SELECT Id, Amount FROM Accounts WHERE Amount >= 1000.0
--returns only those rows of 'Accounts' table that meet next condition: Amount is greater than 1000 or DepartamentID is equal to 123
SELECT Id, Amount FROM Accounts WHERE (Amount > 1000.0 OR DepartamentID = 123)
--returns only those rows of 'Accounts' table that meet next condition: DepartamentID is equal to 1, 101 or 202
SELECT Id, Amount FROM Accounts WHERE DepartamentID IN (1, 101, 202)
--returns only those rows of 'Accounts' table that meet next condition: DepartamentID is equal to 321 and Amount is less than 100
SELECT Id, Amount FROM Accounts WHERE DepartamentID = 321 AND Amount < 100
--returns only those rows of 'Persons' table that meet next condition: Name column value starts with 'John'
SELECT * FROM Person WHERE Name LIKE 'John%'
--case statement
SELECT
FirstName,
LastName,
CASE Country
WHEN 'US' THEN 'NA'
WHEN 'Canada' THEN 'NA'
WHEN 'India' THEN 'Asia'
WHEN 'China' THEN 'Asia'
ELSE 'Unknown'
END ColRegion
FROM Person
--escaping string values (use of /n /r /t /v allowed)
SELECT * FROM MyTable WITH( Body=@'Line1/nLine2')