Check out the new tutorial at https://blog.powp.co/my-power-query-web-contents-cheat-sheet-6a5bbfdce5eb
Web.Contents without Refresh Errors
The main cause of Web.Contents not refreshing can be fixed by adding the options[Query] and options[RelativeaPath] parameters. (The UI doesn’t create them for you)
/* web request, act based on the HTTP Status Code returned
see more:
override default error handling: https://docs.microsoft.com/en-us/power-query/handlingstatuscodes
example wait-for loop: https://docs.microsoft.com/en-us/power-query/waitretry#manualstatushandling
*/let
WikiRequest =(pageName astext)asany=>let
BaseUrl ="https://en.wikipedia.org/wiki",
Options =[
RelativePath = pageName,
ManualStatusHandling ={400,404}],// wrap 'Response' in 'Binary.Buffer' if you are using it multiple times
response =Web.Contents(BaseUrl, Options),
buffered =Binary.Buffer(response),
response_metadata =Value.Metadata(response),
status_code = response_metadata[Response.Status],
final_result =[
buffered = buffered,
response_metadata = response_metadata
]in
final_result,
Queries ={"Cat","DoesNot Exist fake page"},
Items =List.Transform(
Queries,eachWikiRequest( _ )),
ResponseTable =Table.FromRecords(
Items,typetable[buffered =binary, response_metadata =record],MissingField.Error),#"Expanded HTTP Status Codes"=Table.ExpandRecordColumn(ResponseTable,"response_metadata",{"Response.Status"},{"Response.Status"}),#"Changed Type"=Table.TransformColumnTypes(#"Expanded HTTP Status Codes",{{"Response.Status",Int64.Type}})in#"Changed Type"
PowerQuery
WebRequest: Wrapper with Better Defaults
You can get the full file with extra comments: WebRequest.pq
let/*
Example using this url:
(https://www.metaweather.com/api/location/search?lattlong=36.96,-122.02)
WebRequest(
"https://www.metaweather.com",
"api/location/search",
[ lattlong = "36.96,-122.02" ]
)
Details on preventing "Refresh Errors", using 'Query' and 'RelativePath':
- Not using Query and Relative path cause refresh errors:
(https://blog.crossjoin.co.uk/2016/08/23/web-contents-m-functions-and-dataset-refresh-errors-in-power-bi/)
- You can opt-in to Skip-Test:
(https://blog.crossjoin.co.uk/2019/04/25/skip-test-connection-power-bi-refresh-failures/)
- Debugging and tracing the HTTP requests
(https://blog.crossjoin.co.uk/2019/11/17/troubleshooting-web-service-refresh-problems-in-power-bi-with-the-power-query-diagnostics-feature/)
*/
WebRequest =(
staticPath astext,// domain
relativePath astext,// basically use everything after ".com" to "?"optional query asnullablerecord,// url query stringoptional asRaw asnullablelogical,// use true if content is not Jsonoptional headers asnullablerecord// optional HTTP headers as a record)asany=>let
query = query ??[],
asRaw = asRaw ??false,// toggles calling Json.Document() or not
headers = headers ??[
Accept="application/json"],
baseUrl = staticPath,
options =[
RelativePath = relativePath,
Headers = headers,
Query = query
// optionally toggle handling errors for specific HTTP Status codes// ManualStatusHandling = {400, 404}],// wrap 'Response' in 'Binary.Buffer' if you are using it multiple times
response =Web.Contents(staticPath, options),
metadata =Value.Metadata(response),
buffered =Binary.Buffer(response),
result =Json.Document(buffered)in[
response =if asRaw then buffered else result,
status_code = metadata[Response.Status],
metadata = metadata
]in
WebRequest
Select a Single Element: an Image, url, text, etc…
This fetches the current heading text on the blog
let
Url ="https://powerbi.microsoft.com/en-us/blog/",
Response =Web.Contents( Url ),/*
note: normally do not pass dynamic urls like this, see cheatsheet on preventing refresh errors
Non-tabular scraping like Images or any single elements, does not use a "RowSelector"
This CSS Selector finds exactly one element, the Page's Header Text
.section-featured-post .text-heading1 a
*/
HeaderText =Html.Table(
Response,{{"Page Header",".section-featured-post .text-heading1 a"}})in
HeaderText
PowerQuery
Parsing Element’s Attributes
let
Url ="https://powerbi.microsoft.com/en-us/blog/",
Response =Web.Contents( Url ),/*
The 3rd argument in "columnNameSelectorPairs" is the transformation function.
by default it uses:
each _[TextContent]
*/
HeaderAsElement =Html.Table(
Response,{{"Link",".section-featured-post .text-heading1 a",each _ }}),
ElementRecord =Table.ExpandRecordColumn(
HeaderAsElement,"Link",{"TagName","TextContent","Attributes"},{"TagName","TextContent","Attributes"}),
ExpandedAttributes =Table.ExpandRecordColumn(
ElementRecord,"Attributes",{"href","rel","title"},{"attr.href","attr.rel","attr.title"})in
ExpandedAttributes
PowerQuery
Select Tables using your own CSS Selectors
Results of the RowSelectorResults of columnNameSelectorPairs
// Docs on Enumlet
Source ="https://docs.microsoft.com/en-us/previous-versions/dynamics/ax-2012/reference/gg841505(v=ax.60)",// note: normally do not pass dynamic urls like this, see cheatsheet on preventing refresh errors
Response =Web.BrowserContents( Source ),/*
Think of "RowSelector" as selecting a table row
Then for every row, you select "columns" using the "columnNameSelectorPairs" selector
The combination gives you a table cell.
For more on CSS Selectors, see: <https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors>
*/
columnNameSelectorPairs ={// column names don't matter here, since I'm using .PromoteHeaders{"Column1","TABLE.table > * > TR > :nth-child(1)"},{"Column2","TABLE.table > * > TR > :nth-child(2)"},{"Column3","TABLE.table > * > TR > :nth-child(3)"}},
t1 =Html.Table(
Response, columnNameSelectorPairs,[RowSelector ="TABLE.table > * > TR"]),
t2 =Table.PromoteHeaders( t1,[PromoteAllScalars =true]),
FinalTable =Table.TransformColumnTypes(
t2,{{"Name",typetext},{"Value",Int64.Type},{"Description",typetext}})in
FinalTable
PowerQuery
SQL Native Query
Parameterized SQL queries with Value.NativeQuery()
let
Source =Sql.Database("localhost","Adventure Works DW"),
Test =Value.NativeQuery(
Source,
"SELECT * FROM DimDate
WHERE EnglishMonthName=@MonthName",[
MonthName ="March",
DayName ="Tuesday"])in
Test
PowerQuery
Other
Cartesian Product
let
#"Add Column Pair2"= Table.AddColumn(
Table_Pairs1,"Pairs2",
each Table_Pairs2,
Table.Type
),
#"Expanded Pairs"= Table.ExpandTableColumn(
#"Add Column Pair2","Pairs2",{"Color","Property"},{"Color","Property"})in
#"Expanded Pairs"
let// Converts a list of any type to text. Works well on most types// although to support all cases, it requires more logic
mixedList ={4,"cat",#date(1999,5,9),0.4},
ListAsText = List.Transform(mixedList, each Text.From(_)),
CsvText = Text.Combine( ListAsText,", ")in//output: "4, cat, 5/9/1999, 0.4"
CsvText
TypeScript
Functions
Using Optional Parameters with Default Values
The Null_coalescing_operator ?? simplifies default values. encoding will be set to what the user passed, unless it’s null. In that case, it is set to TextEncoding.Utf8
let
Read_Json =(json asany, encoding as nullable number)asany=>// calls Json.Document, using UTF8 by defaultlet
encoding = encoding ?? TextEncoding.Utf8,
result = Json.Document(json, encoding)in
result
in
Read_Json
TypeScript
Mapping Function Calls based on Type
Caller chooses which type of conversioin to use, based on type names { date, datetime, datetimezone }
let// 1] get a `type` from caller// 2] return a difference function based on the typeGetTransformByType=(_type as type)asfunction=>let// originally from: <https://docs.microsoft.com/en-us/power-query/helperfunctions#tablechangetype>
func_transform =if(Type.Is(_type,typedate)) then Date.From
elseif(Type.Is(_type,typedatetime)) then DateTime.From
elseif(Type.Is(_type,typedatetimezone)) then DateTimeZone.From
else(t)=> t // else return selfin
func_transform,
nowDtz = DateTimeZone.LocalNow(),// invoke in 2 steps
toDate =GetTransformByType(typedate),
callDate =toDate( DateTimeZone.FixedLocalNow()),// create, and invoke functions
Results =[
asDate =(GetTransformByType(typedate))( nowDtz ),
asDateTime =(GetTransformByType(typedatetime))( nowDtz ),
asDateTimeZone =(GetTransformByType(typedatetimezone))( nowDtz )]in
Results