Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
cld
workloads
tpcds
Commits
a4d09780
Commit
a4d09780
authored
Apr 11, 2017
by
Jonathan Mace
Browse files
Minor changes to workload generator so I knwo what's going on
parent
c44fd11f
Changes
2
Show whitespace changes
Inline
Side-by-side
src/main/resources/reference.conf
View file @
a4d09780
tpcds
{
scaleFactor
=
1
dataLocation
=
"hdfs://
127.0.0.1
:9000/tpcds"
scaleFactor
=
1
00
dataLocation
=
"hdfs://
namenode
:9000/tpcds
100
"
dataFormat
=
"parquet"
overwrite
=
false
partitionTables
=
fals
e
partitionTables
=
tru
e
useDoubleForDecimal
=
false
clusterByPartitionColumns
=
false
filterOutNullPartitionValues
=
false
...
...
src/main/scala/com/databricks/spark/sql/perf/tpcds/Tables.scala
View file @
a4d09780
...
...
@@ -32,7 +32,7 @@ class Tables(sqlContext: SQLContext, scaleFactor: Int) extends Serializable {
case
class
Table
(
name
:
String
,
partitionColumns
:
Seq
[
String
],
fields
:
StructField*
)
{
val
schema
=
StructType
(
fields
)
val
partitions
=
if
(
partitionColumns
.
isEmpty
)
1
else
10
0
val
partitions
=
if
(
partitionColumns
.
isEmpty
)
1
else
2
0
def
nonPartitioned
:
Table
=
{
Table
(
name
,
Nil
,
fields
:
_
*
)
...
...
@@ -113,13 +113,17 @@ class Tables(sqlContext: SQLContext, scaleFactor: Int) extends Serializable {
overwrite
:
Boolean
,
clusterByPartitionColumns
:
Boolean
,
filterOutNullPartitionValues
:
Boolean
)
:
Unit
=
{
println
(
s
"Begin genData for table $name in database to $location"
)
val
mode
=
if
(
overwrite
)
SaveMode
.
Overwrite
else
SaveMode
.
Ignore
val
data
=
df
(
format
!=
"text"
)
println
(
s
"Got dataframe for format $format"
)
val
tempTableName
=
s
"${name}_text"
data
.
registerTempTable
(
tempTableName
)
println
(
s
"Registered temp table $tempTableName"
)
val
writer
=
if
(
partitionColumns
.
nonEmpty
)
{
println
(
s
"Getting writer for nonEmpty partitionColumns"
)
if
(
clusterByPartitionColumns
)
{
val
columnString
=
data
.
schema
.
fields
.
map
{
field
=>
field
.
name
...
...
@@ -150,16 +154,21 @@ class Tables(sqlContext: SQLContext, scaleFactor: Int) extends Serializable {
}
}
else
{
// If the table is not partitioned, coalesce the data to a single file.
println
(
"Coalesce to a single file"
)
data
.
coalesce
(
1
).
write
}
println
(
"A"
)
writer
.
format
(
format
).
mode
(
mode
)
if
(
partitionColumns
.
nonEmpty
)
{
println
(
"B"
)
writer
.
partitionBy
(
partitionColumns
:
_
*
)
}
println
(
s
"Generating table $name in database to $location with save mode $mode."
)
log
.
info
(
s
"Generating table $name in database to $location with save mode $mode."
)
writer
.
save
(
location
)
println
(
"Saved"
)
sqlContext
.
dropTempTable
(
tempTableName
)
println
(
"Dropped temp table"
)
}
def
createExternalTable
(
location
:
String
,
format
:
String
,
databaseName
:
String
,
overwrite
:
Boolean
)
:
Unit
=
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment