blob: 67ed571ee609d430fa0aa5099fe7a7c0f51f68ac (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
#!/usr/bin/perl
use strict;
use warnings;
my $filename = $ARGV[0];
my $outputfile= $ARGV[1];
open my $fh_input, '<', $filename or die "Cannot open $filename: $!";
open my $fh_output, '>', $outputfile or die "Cannot open $outputfile: $!";
while ( my $line = <$fh_input> ) {
chomp ($line);
if ( $line =~ /(.*)(\".*\")(.*)/ ) {
# we have seen examples of the status field containing quoted comma-delimited
# strings which is messing up parsing of the record data which is supposed to be
# comma-separated at the field level. This little block converts sections of
# this type of data into a single-quoted-string with a semi-colon delimiter instead.
my $beforeBadStr = $1;
my $badStr = $2;
my $afterBadStr = $3;
$badStr =~ s/,/;/g;
$badStr =~ s/"/'/g;
$line = $beforeBadStr . $badStr . $afterBadStr ;
}
my @row = split(",", $line);
print $fh_output "{\"index\":{\"_index\":\"topographicalsearchindex-localhost\",\"_type\":\"default\"}\n";
print $fh_output "{\"pkey\": \"$row[0]\", \"entityType\": \"$row[1]\", \"location\" : {\"lat\": \"$row[3]\", \"lon\": \"$row[2]\"}, \"selfLink\": \"$row[4]\"}\n";
}
close($fh_input);
close($fh_output);
|